0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-01-03 06:01:02 +00:00

[utils] Fix InAdvancePagedList.__getitem__

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code
This commit is contained in:
pukkandan 2021-08-10 03:40:40 +05:30
parent 245d43cacf
commit 7be9ccff0b

View file

@ -4041,15 +4041,31 @@ def __str__(self):
return repr(self.exhaust()) return repr(self.exhaust())
class PagedList(object): class PagedList:
def __len__(self): def __len__(self):
# This is only useful for tests # This is only useful for tests
return len(self.getslice()) return len(self.getslice())
def getslice(self, start, end): def __init__(self, pagefunc, pagesize, use_cache=True):
self._pagefunc = pagefunc
self._pagesize = pagesize
self._use_cache = use_cache
self._cache = {}
def getpage(self, pagenum):
page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
if self._use_cache:
self._cache[pagenum] = page_results
return page_results
def getslice(self, start=0, end=None):
return list(self._getslice(start, end))
def _getslice(self, start, end):
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def __getitem__(self, idx): def __getitem__(self, idx):
# NOTE: cache must be enabled if this is used
if not isinstance(idx, int) or idx < 0: if not isinstance(idx, int) or idx < 0:
raise TypeError('indices must be non-negative integers') raise TypeError('indices must be non-negative integers')
entries = self.getslice(idx, idx + 1) entries = self.getslice(idx, idx + 1)
@ -4057,42 +4073,26 @@ def __getitem__(self, idx):
class OnDemandPagedList(PagedList): class OnDemandPagedList(PagedList):
def __init__(self, pagefunc, pagesize, use_cache=True): def _getslice(self, start, end):
self._pagefunc = pagefunc
self._pagesize = pagesize
self._use_cache = use_cache
if use_cache:
self._cache = {}
def getslice(self, start=0, end=None):
res = []
for pagenum in itertools.count(start // self._pagesize): for pagenum in itertools.count(start // self._pagesize):
firstid = pagenum * self._pagesize firstid = pagenum * self._pagesize
nextfirstid = pagenum * self._pagesize + self._pagesize nextfirstid = pagenum * self._pagesize + self._pagesize
if start >= nextfirstid: if start >= nextfirstid:
continue continue
page_results = None
if self._use_cache:
page_results = self._cache.get(pagenum)
if page_results is None:
page_results = list(self._pagefunc(pagenum))
if self._use_cache:
self._cache[pagenum] = page_results
startv = ( startv = (
start % self._pagesize start % self._pagesize
if firstid <= start < nextfirstid if firstid <= start < nextfirstid
else 0) else 0)
endv = ( endv = (
((end - 1) % self._pagesize) + 1 ((end - 1) % self._pagesize) + 1
if (end is not None and firstid <= end <= nextfirstid) if (end is not None and firstid <= end <= nextfirstid)
else None) else None)
page_results = self.getpage(pagenum)
if startv != 0 or endv is not None: if startv != 0 or endv is not None:
page_results = page_results[startv:endv] page_results = page_results[startv:endv]
res.extend(page_results) yield from page_results
# A little optimization - if current page is not "full", ie. does # A little optimization - if current page is not "full", ie. does
# not contain page_size videos then we can assume that this page # not contain page_size videos then we can assume that this page
@ -4105,36 +4105,31 @@ def getslice(self, start=0, end=None):
# break out early as well # break out early as well
if end == nextfirstid: if end == nextfirstid:
break break
return res
class InAdvancePagedList(PagedList): class InAdvancePagedList(PagedList):
def __init__(self, pagefunc, pagecount, pagesize): def __init__(self, pagefunc, pagecount, pagesize):
self._pagefunc = pagefunc
self._pagecount = pagecount self._pagecount = pagecount
self._pagesize = pagesize PagedList.__init__(self, pagefunc, pagesize, True)
def getslice(self, start=0, end=None): def _getslice(self, start, end):
res = []
start_page = start // self._pagesize start_page = start // self._pagesize
end_page = ( end_page = (
self._pagecount if end is None else (end // self._pagesize + 1)) self._pagecount if end is None else (end // self._pagesize + 1))
skip_elems = start - start_page * self._pagesize skip_elems = start - start_page * self._pagesize
only_more = None if end is None else end - start only_more = None if end is None else end - start
for pagenum in range(start_page, end_page): for pagenum in range(start_page, end_page):
page = list(self._pagefunc(pagenum)) page_results = self.getpage(pagenum)
if skip_elems: if skip_elems:
page = page[skip_elems:] page_results = page_results[skip_elems:]
skip_elems = None skip_elems = None
if only_more is not None: if only_more is not None:
if len(page) < only_more: if len(page_results) < only_more:
only_more -= len(page) only_more -= len(page_results)
else: else:
page = page[:only_more] yield from page_results[:only_more]
res.extend(page)
break break
res.extend(page) yield from page_results
return res
def uppercase_escape(s): def uppercase_escape(s):