mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-01 02:21:06 +00:00
[utils] Fix InAdvancePagedList.__getitem__
Since it didn't have any cache, the page was re-fetched for each video. * Also generalized the cache code
This commit is contained in:
parent
245d43cacf
commit
7be9ccff0b
1 changed files with 29 additions and 34 deletions
|
@ -4041,15 +4041,31 @@ def __str__(self):
|
||||||
return repr(self.exhaust())
|
return repr(self.exhaust())
|
||||||
|
|
||||||
|
|
||||||
class PagedList(object):
|
class PagedList:
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
# This is only useful for tests
|
# This is only useful for tests
|
||||||
return len(self.getslice())
|
return len(self.getslice())
|
||||||
|
|
||||||
def getslice(self, start, end):
|
def __init__(self, pagefunc, pagesize, use_cache=True):
|
||||||
|
self._pagefunc = pagefunc
|
||||||
|
self._pagesize = pagesize
|
||||||
|
self._use_cache = use_cache
|
||||||
|
self._cache = {}
|
||||||
|
|
||||||
|
def getpage(self, pagenum):
|
||||||
|
page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
|
||||||
|
if self._use_cache:
|
||||||
|
self._cache[pagenum] = page_results
|
||||||
|
return page_results
|
||||||
|
|
||||||
|
def getslice(self, start=0, end=None):
|
||||||
|
return list(self._getslice(start, end))
|
||||||
|
|
||||||
|
def _getslice(self, start, end):
|
||||||
raise NotImplementedError('This method must be implemented by subclasses')
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
|
# NOTE: cache must be enabled if this is used
|
||||||
if not isinstance(idx, int) or idx < 0:
|
if not isinstance(idx, int) or idx < 0:
|
||||||
raise TypeError('indices must be non-negative integers')
|
raise TypeError('indices must be non-negative integers')
|
||||||
entries = self.getslice(idx, idx + 1)
|
entries = self.getslice(idx, idx + 1)
|
||||||
|
@ -4057,42 +4073,26 @@ def __getitem__(self, idx):
|
||||||
|
|
||||||
|
|
||||||
class OnDemandPagedList(PagedList):
|
class OnDemandPagedList(PagedList):
|
||||||
def __init__(self, pagefunc, pagesize, use_cache=True):
|
def _getslice(self, start, end):
|
||||||
self._pagefunc = pagefunc
|
|
||||||
self._pagesize = pagesize
|
|
||||||
self._use_cache = use_cache
|
|
||||||
if use_cache:
|
|
||||||
self._cache = {}
|
|
||||||
|
|
||||||
def getslice(self, start=0, end=None):
|
|
||||||
res = []
|
|
||||||
for pagenum in itertools.count(start // self._pagesize):
|
for pagenum in itertools.count(start // self._pagesize):
|
||||||
firstid = pagenum * self._pagesize
|
firstid = pagenum * self._pagesize
|
||||||
nextfirstid = pagenum * self._pagesize + self._pagesize
|
nextfirstid = pagenum * self._pagesize + self._pagesize
|
||||||
if start >= nextfirstid:
|
if start >= nextfirstid:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
page_results = None
|
|
||||||
if self._use_cache:
|
|
||||||
page_results = self._cache.get(pagenum)
|
|
||||||
if page_results is None:
|
|
||||||
page_results = list(self._pagefunc(pagenum))
|
|
||||||
if self._use_cache:
|
|
||||||
self._cache[pagenum] = page_results
|
|
||||||
|
|
||||||
startv = (
|
startv = (
|
||||||
start % self._pagesize
|
start % self._pagesize
|
||||||
if firstid <= start < nextfirstid
|
if firstid <= start < nextfirstid
|
||||||
else 0)
|
else 0)
|
||||||
|
|
||||||
endv = (
|
endv = (
|
||||||
((end - 1) % self._pagesize) + 1
|
((end - 1) % self._pagesize) + 1
|
||||||
if (end is not None and firstid <= end <= nextfirstid)
|
if (end is not None and firstid <= end <= nextfirstid)
|
||||||
else None)
|
else None)
|
||||||
|
|
||||||
|
page_results = self.getpage(pagenum)
|
||||||
if startv != 0 or endv is not None:
|
if startv != 0 or endv is not None:
|
||||||
page_results = page_results[startv:endv]
|
page_results = page_results[startv:endv]
|
||||||
res.extend(page_results)
|
yield from page_results
|
||||||
|
|
||||||
# A little optimization - if current page is not "full", ie. does
|
# A little optimization - if current page is not "full", ie. does
|
||||||
# not contain page_size videos then we can assume that this page
|
# not contain page_size videos then we can assume that this page
|
||||||
|
@ -4105,36 +4105,31 @@ def getslice(self, start=0, end=None):
|
||||||
# break out early as well
|
# break out early as well
|
||||||
if end == nextfirstid:
|
if end == nextfirstid:
|
||||||
break
|
break
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
class InAdvancePagedList(PagedList):
|
class InAdvancePagedList(PagedList):
|
||||||
def __init__(self, pagefunc, pagecount, pagesize):
|
def __init__(self, pagefunc, pagecount, pagesize):
|
||||||
self._pagefunc = pagefunc
|
|
||||||
self._pagecount = pagecount
|
self._pagecount = pagecount
|
||||||
self._pagesize = pagesize
|
PagedList.__init__(self, pagefunc, pagesize, True)
|
||||||
|
|
||||||
def getslice(self, start=0, end=None):
|
def _getslice(self, start, end):
|
||||||
res = []
|
|
||||||
start_page = start // self._pagesize
|
start_page = start // self._pagesize
|
||||||
end_page = (
|
end_page = (
|
||||||
self._pagecount if end is None else (end // self._pagesize + 1))
|
self._pagecount if end is None else (end // self._pagesize + 1))
|
||||||
skip_elems = start - start_page * self._pagesize
|
skip_elems = start - start_page * self._pagesize
|
||||||
only_more = None if end is None else end - start
|
only_more = None if end is None else end - start
|
||||||
for pagenum in range(start_page, end_page):
|
for pagenum in range(start_page, end_page):
|
||||||
page = list(self._pagefunc(pagenum))
|
page_results = self.getpage(pagenum)
|
||||||
if skip_elems:
|
if skip_elems:
|
||||||
page = page[skip_elems:]
|
page_results = page_results[skip_elems:]
|
||||||
skip_elems = None
|
skip_elems = None
|
||||||
if only_more is not None:
|
if only_more is not None:
|
||||||
if len(page) < only_more:
|
if len(page_results) < only_more:
|
||||||
only_more -= len(page)
|
only_more -= len(page_results)
|
||||||
else:
|
else:
|
||||||
page = page[:only_more]
|
yield from page_results[:only_more]
|
||||||
res.extend(page)
|
|
||||||
break
|
break
|
||||||
res.extend(page)
|
yield from page_results
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
|
|
Loading…
Reference in a new issue