mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 03:03:01 +00:00
[youtube:playlist] Remove the link with index 0
It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
This commit is contained in:
parent
4a98cdbf3b
commit
6e47b51eef
1 changed files with 5 additions and 3 deletions
|
@ -1528,7 +1528,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||
)"""
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||
IE_NAME = u'youtube:playlist'
|
||||
|
||||
@classmethod
|
||||
|
@ -1562,8 +1562,10 @@ def _real_extract(self, url):
|
|||
for page_num in itertools.count(1):
|
||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||
# The ids are duplicated
|
||||
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
||||
matches = re.finditer(self._VIDEO_RE, page)
|
||||
# We remove the duplicates and the link with index 0
|
||||
# (it's not the first video of the playlist)
|
||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||
ids.extend(new_ids)
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||
|
|
Loading…
Reference in a new issue