mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-03 06:01:02 +00:00
Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL
This commit is contained in:
parent
d3260f40cb
commit
3462ffa892
5 changed files with 202 additions and 149 deletions
|
@ -1146,7 +1146,7 @@ # Supported sites
|
||||||
- **YourPorn**
|
- **YourPorn**
|
||||||
- **YourUpload**
|
- **YourUpload**
|
||||||
- **youtube**: YouTube.com
|
- **youtube**: YouTube.com
|
||||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
- **youtube:favorites**: YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)
|
||||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||||
- **youtube:live**: YouTube.com live streams
|
- **youtube:live**: YouTube.com live streams
|
||||||
- **youtube:playlist**: YouTube.com playlists
|
- **youtube:playlist**: YouTube.com playlists
|
||||||
|
@ -1154,11 +1154,10 @@ # Supported sites
|
||||||
- **youtube:search**: YouTube.com searches
|
- **youtube:search**: YouTube.com searches
|
||||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||||
- **youtube:search_url**: YouTube.com search URLs
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **YoutubeYtUser**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **YoutubeYtUser**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
- **youtube:tab**: YouTube.com tab
|
- **youtube:tab**: YouTube.com tab
|
||||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)
|
||||||
- **Zapiks**
|
- **Zapiks**
|
||||||
- **Zaq1**
|
- **Zaq1**
|
||||||
- **Zattoo**
|
- **Zattoo**
|
||||||
|
|
|
@ -35,6 +35,9 @@ def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||||
assertPlaylist('PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
|
assertTab('https://www.youtube.com/AsapSCIENCE')
|
||||||
|
assertTab('https://www.youtube.com/embedded')
|
||||||
|
assertTab('https://www.youtube.com/feed') # Own channel's home page
|
||||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
|
@ -47,7 +50,7 @@ def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
|
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
|
||||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
# self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
|
||||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||||
|
|
||||||
|
|
|
@ -832,7 +832,7 @@ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_in
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
|
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
|
||||||
except (AssertionError, IndexError):
|
except (AssertionError, IndexError, AttributeError):
|
||||||
temp_id = None
|
temp_id = None
|
||||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
||||||
self.to_screen("[%s] %s: has already been recorded in archive" % (
|
self.to_screen("[%s] %s: has already been recorded in archive" % (
|
||||||
|
|
|
@ -1514,7 +1514,6 @@
|
||||||
YoutubeSearchDateIE,
|
YoutubeSearchDateIE,
|
||||||
YoutubeSearchIE,
|
YoutubeSearchIE,
|
||||||
YoutubeSearchURLIE,
|
YoutubeSearchURLIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTruncatedIDIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
|
|
|
@ -34,7 +34,6 @@
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
@ -64,11 +63,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
|
_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
|
||||||
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
|
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
|
||||||
|
|
||||||
|
_RESERVED_NAMES = (
|
||||||
|
r'course|embed|watch|w|results|storefront|'
|
||||||
|
r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
|
||||||
|
r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
|
||||||
|
|
||||||
_NETRC_MACHINE = 'youtube'
|
_NETRC_MACHINE = 'youtube'
|
||||||
# If True it will raise an error if no login info is provided
|
# If True it will raise an error if no login info is provided
|
||||||
_LOGIN_REQUIRED = False
|
_LOGIN_REQUIRED = False
|
||||||
|
|
||||||
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
|
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|LL|WL)'
|
||||||
|
|
||||||
_YOUTUBE_CLIENT_HEADERS = {
|
_YOUTUBE_CLIENT_HEADERS = {
|
||||||
'x-youtube-client-name': '1',
|
'x-youtube-client-name': '1',
|
||||||
|
@ -2495,7 +2499,13 @@ def decrypt_sig(mobj):
|
||||||
|
|
||||||
class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com tab'
|
IE_DESC = 'YouTube.com tab'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/(?:(?:channel|c|user)/|(?:playlist|watch)\?.*?\blist=)(?P<id>[^/?#&]+)'
|
# (?x)^ will cause warning in LiveIE. So I cant split this into multiple lines using '''
|
||||||
|
_VALID_URL = (
|
||||||
|
r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/'
|
||||||
|
r'(?:(?!(%s)([/#?]|$))|'
|
||||||
|
r'(?:channel|c|user)/|'
|
||||||
|
r'(?:playlist|watch)\?.*?\blist=)'
|
||||||
|
r'(?P<id>[^/?#&]+)') % YoutubeBaseInfoExtractor._RESERVED_NAMES
|
||||||
IE_NAME = 'youtube:tab'
|
IE_NAME = 'youtube:tab'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -2692,8 +2702,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if YoutubeLiveIE.suitable(url) else super(
|
IGNORE = (YoutubeLiveIE,)
|
||||||
YoutubeTabIE, cls).suitable(url)
|
return (
|
||||||
|
False if any(ie.suitable(url) for ie in IGNORE)
|
||||||
|
else super(YoutubeTabIE, cls).suitable(url))
|
||||||
|
|
||||||
def _extract_channel_id(self, webpage):
|
def _extract_channel_id(self, webpage):
|
||||||
channel_id = self._html_search_meta(
|
channel_id = self._html_search_meta(
|
||||||
|
@ -2808,6 +2820,26 @@ def _playlist_entries(self, video_list_renderer):
|
||||||
continue
|
continue
|
||||||
yield self._extract_video(renderer)
|
yield self._extract_video(renderer)
|
||||||
|
|
||||||
|
def _itemSection_entries(self, item_sect_renderer):
|
||||||
|
for content in item_sect_renderer['contents']:
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
continue
|
||||||
|
renderer = content.get('videoRenderer', {})
|
||||||
|
if not isinstance(renderer, dict):
|
||||||
|
continue
|
||||||
|
video_id = renderer.get('videoId')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
yield self._extract_video(renderer)
|
||||||
|
|
||||||
|
def _rich_entries(self, rich_grid_renderer):
|
||||||
|
renderer = try_get(
|
||||||
|
rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict)
|
||||||
|
video_id = renderer.get('videoId')
|
||||||
|
if not video_id:
|
||||||
|
return
|
||||||
|
yield self._extract_video(renderer)
|
||||||
|
|
||||||
def _video_entry(self, video_renderer):
|
def _video_entry(self, video_renderer):
|
||||||
video_id = video_renderer.get('videoId')
|
video_id = video_renderer.get('videoId')
|
||||||
if video_id:
|
if video_id:
|
||||||
|
@ -2899,49 +2931,67 @@ def _extract_continuation(cls, renderer):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _entries(self, tab, identity_token):
|
def _entries(self, tab, identity_token):
|
||||||
continuation = None
|
|
||||||
slr_contents = try_get(tab, lambda x: x['sectionListRenderer']['contents'], list) or []
|
|
||||||
for slr_content in slr_contents:
|
|
||||||
if not isinstance(slr_content, dict):
|
|
||||||
continue
|
|
||||||
is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
|
|
||||||
if not is_renderer:
|
|
||||||
continue
|
|
||||||
isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
|
|
||||||
for isr_content in isr_contents:
|
|
||||||
if not isinstance(isr_content, dict):
|
|
||||||
continue
|
|
||||||
renderer = isr_content.get('playlistVideoListRenderer')
|
|
||||||
if renderer:
|
|
||||||
for entry in self._playlist_entries(renderer):
|
|
||||||
yield entry
|
|
||||||
continuation = self._extract_continuation(renderer)
|
|
||||||
continue
|
|
||||||
renderer = isr_content.get('gridRenderer')
|
|
||||||
if renderer:
|
|
||||||
for entry in self._grid_entries(renderer):
|
|
||||||
yield entry
|
|
||||||
continuation = self._extract_continuation(renderer)
|
|
||||||
continue
|
|
||||||
renderer = isr_content.get('shelfRenderer')
|
|
||||||
if renderer:
|
|
||||||
for entry in self._shelf_entries(renderer):
|
|
||||||
yield entry
|
|
||||||
continue
|
|
||||||
renderer = isr_content.get('backstagePostThreadRenderer')
|
|
||||||
if renderer:
|
|
||||||
for entry in self._post_thread_entries(renderer):
|
|
||||||
yield entry
|
|
||||||
continuation = self._extract_continuation(renderer)
|
|
||||||
continue
|
|
||||||
renderer = isr_content.get('videoRenderer')
|
|
||||||
if renderer:
|
|
||||||
entry = self._video_entry(renderer)
|
|
||||||
if entry:
|
|
||||||
yield entry
|
|
||||||
|
|
||||||
if not continuation:
|
def extract_entries(parent_renderer):
|
||||||
continuation = self._extract_continuation(is_renderer)
|
slr_contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
|
||||||
|
for slr_content in slr_contents:
|
||||||
|
if not isinstance(slr_content, dict):
|
||||||
|
continue
|
||||||
|
is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
|
||||||
|
if not is_renderer:
|
||||||
|
renderer = slr_content.get('richItemRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._rich_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation_list[0] = self._extract_continuation(parent_renderer)
|
||||||
|
continue
|
||||||
|
isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
|
||||||
|
for isr_content in isr_contents:
|
||||||
|
if not isinstance(isr_content, dict):
|
||||||
|
continue
|
||||||
|
renderer = isr_content.get('playlistVideoListRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._playlist_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation_list[0] = self._extract_continuation(renderer)
|
||||||
|
continue
|
||||||
|
renderer = isr_content.get('gridRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._grid_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation_list[0] = self._extract_continuation(renderer)
|
||||||
|
continue
|
||||||
|
renderer = isr_content.get('shelfRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._shelf_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation_list[0] = self._extract_continuation(parent_renderer)
|
||||||
|
continue
|
||||||
|
renderer = isr_content.get('backstagePostThreadRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._post_thread_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation_list[0] = self._extract_continuation(renderer)
|
||||||
|
continue
|
||||||
|
renderer = isr_content.get('videoRenderer')
|
||||||
|
if renderer:
|
||||||
|
entry = self._video_entry(renderer)
|
||||||
|
if entry:
|
||||||
|
yield entry
|
||||||
|
if not continuation_list[0]:
|
||||||
|
continuation_list[0] = self._extract_continuation(is_renderer)
|
||||||
|
if not continuation_list[0]:
|
||||||
|
continuation_list[0] = self._extract_continuation(parent_renderer)
|
||||||
|
|
||||||
|
continuation_list = [None] # Python 2 doesnot support nonlocal
|
||||||
|
parent_renderer = (
|
||||||
|
try_get(tab, lambda x: x['sectionListRenderer'], dict)
|
||||||
|
or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
|
||||||
|
if parent_renderer:
|
||||||
|
for entry in extract_entries(parent_renderer):
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
continuation = continuation_list[0]
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'x-youtube-client-name': '1',
|
'x-youtube-client-name': '1',
|
||||||
|
@ -2953,6 +3003,8 @@ def _entries(self, tab, identity_token):
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
if not continuation:
|
if not continuation:
|
||||||
break
|
break
|
||||||
|
if hasattr(self, '_MAX_PAGES') and page_num > self._MAX_PAGES:
|
||||||
|
break
|
||||||
browse = self._download_json(
|
browse = self._download_json(
|
||||||
'https://www.youtube.com/browse_ajax', None,
|
'https://www.youtube.com/browse_ajax', None,
|
||||||
'Downloading page %d' % page_num,
|
'Downloading page %d' % page_num,
|
||||||
|
@ -2984,6 +3036,13 @@ def _entries(self, tab, identity_token):
|
||||||
yield entry
|
yield entry
|
||||||
continuation = self._extract_continuation(continuation_renderer)
|
continuation = self._extract_continuation(continuation_renderer)
|
||||||
continue
|
continue
|
||||||
|
continuation_renderer = continuation_contents.get('sectionListContinuation')
|
||||||
|
if continuation_renderer:
|
||||||
|
continuation_list = [None]
|
||||||
|
for entry in extract_entries(continuation_renderer):
|
||||||
|
yield entry
|
||||||
|
continuation = continuation_list[0]
|
||||||
|
continue
|
||||||
|
|
||||||
continuation_items = try_get(
|
continuation_items = try_get(
|
||||||
response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
|
response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
|
||||||
|
@ -2998,7 +3057,12 @@ def _entries(self, tab, identity_token):
|
||||||
yield entry
|
yield entry
|
||||||
continuation = self._extract_continuation(video_list_renderer)
|
continuation = self._extract_continuation(video_list_renderer)
|
||||||
continue
|
continue
|
||||||
|
renderer = continuation_item.get('itemSectionRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._itemSection_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation = self._extract_continuation({'contents': continuation_items})
|
||||||
|
continue
|
||||||
break
|
break
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -3036,6 +3100,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
||||||
selected_tab = self._extract_selected_tab(tabs)
|
selected_tab = self._extract_selected_tab(tabs)
|
||||||
renderer = try_get(
|
renderer = try_get(
|
||||||
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
||||||
|
playlist_id = None
|
||||||
if renderer:
|
if renderer:
|
||||||
channel_title = renderer.get('title') or item_id
|
channel_title = renderer.get('title') or item_id
|
||||||
tab_title = selected_tab.get('title')
|
tab_title = selected_tab.get('title')
|
||||||
|
@ -3050,6 +3115,8 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
||||||
title = renderer.get('title')
|
title = renderer.get('title')
|
||||||
description = None
|
description = None
|
||||||
playlist_id = item_id
|
playlist_id = item_id
|
||||||
|
if playlist_id is None:
|
||||||
|
return None
|
||||||
playlist = self.playlist_result(
|
playlist = self.playlist_result(
|
||||||
self._entries(selected_tab['content'], identity_token),
|
self._entries(selected_tab['content'], identity_token),
|
||||||
playlist_id=playlist_id, playlist_title=title,
|
playlist_id=playlist_id, playlist_title=title,
|
||||||
|
@ -3214,7 +3281,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com live streams'
|
IE_DESC = 'YouTube.com live streams'
|
||||||
_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
|
_VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
|
||||||
IE_NAME = 'youtube:live'
|
IE_NAME = 'youtube:live'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -3361,12 +3428,42 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
_SEARCH_PARAMS = 'CAI%3D'
|
_SEARCH_PARAMS = 'CAI%3D'
|
||||||
|
|
||||||
|
|
||||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
class YoutubeSearchURLIE(InfoExtractor):
|
||||||
|
IE_DESC = 'YouTube.com search URLs'
|
||||||
|
IE_NAME = 'youtube:search_url'
|
||||||
|
_PARAM_REGEX = r''
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results/?(?:\?|\?[^#]*?&)(?:sp=(?P<param1>[^&#]+)&(?:[^#]*&)?)?(?:q|search_query)=(?P<query>[^#&]+)(?:[^#]*?&sp=(?P<param2>[^#&]+))?'
|
||||||
|
_MAX_RESULTS = 100
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'youtube-dl test video',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
|
||||||
|
IE = YoutubeSearchIE(self._downloader)
|
||||||
|
IE._SEARCH_PARAMS = mobj.group('param1') or mobj.group('param2')
|
||||||
|
self._downloader.to_screen(IE._SEARCH_PARAMS)
|
||||||
|
IE._MAX_RESULTS = self._MAX_RESULTS
|
||||||
|
return IE._get_n_results(query, self._MAX_RESULTS)
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeFeedsInfoExtractor(YoutubeTabIE):
|
||||||
"""
|
"""
|
||||||
Base class for feed extractors
|
Base class for feed extractors
|
||||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||||
"""
|
"""
|
||||||
_LOGIN_REQUIRED = True
|
_LOGIN_REQUIRED = True
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
# _MAX_PAGES = 5
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def IE_NAME(self):
|
def IE_NAME(self):
|
||||||
|
@ -3375,50 +3472,39 @@ def IE_NAME(self):
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _entries(self, page):
|
def _shelf_entries(self, shelf_renderer):
|
||||||
# The extraction process is the same as for playlists, but the regex
|
renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)
|
||||||
# for the video ids doesn't contain an index
|
if not renderer:
|
||||||
ids = []
|
return
|
||||||
more_widget_html = content_html = page
|
for entry in self._grid_entries(renderer):
|
||||||
for page_num in itertools.count(1):
|
yield entry
|
||||||
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
|
||||||
|
|
||||||
# 'recommended' feed has infinite 'load more' and each new portion spins
|
def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
||||||
# the same videos in (sometimes) slightly different order, so we'll check
|
selected_tab = self._extract_selected_tab(tabs)
|
||||||
# for unicity and break when portion has no new videos
|
return self.playlist_result(
|
||||||
new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
|
self._entries(selected_tab['content'], identity_token),
|
||||||
if not new_ids:
|
playlist_title=self._PLAYLIST_TITLE)
|
||||||
break
|
|
||||||
|
|
||||||
ids.extend(new_ids)
|
|
||||||
|
|
||||||
for entry in self._ids_to_results(new_ids):
|
|
||||||
yield entry
|
|
||||||
|
|
||||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
|
||||||
if not mobj:
|
|
||||||
break
|
|
||||||
|
|
||||||
more = self._download_json(
|
|
||||||
'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
|
||||||
'Downloading page #%s' % page_num,
|
|
||||||
transform_source=uppercase_escape,
|
|
||||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
|
||||||
content_html = more['content_html']
|
|
||||||
more_widget_html = more['load_more_widget_html']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page = self._download_webpage(
|
item_id = self._FEED_NAME
|
||||||
'https://www.youtube.com/feed/%s' % self._FEED_NAME,
|
url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME
|
||||||
self._PLAYLIST_TITLE)
|
webpage = self._download_webpage(url, item_id)
|
||||||
return self.playlist_result(
|
identity_token = self._search_regex(
|
||||||
self._entries(page), playlist_title=self._PLAYLIST_TITLE)
|
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||||
|
'identity token', default=None)
|
||||||
|
data = self._extract_yt_initial_data(item_id, webpage)
|
||||||
|
tabs = try_get(
|
||||||
|
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
||||||
|
if tabs:
|
||||||
|
return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
|
||||||
|
# Failed to recognize
|
||||||
|
raise ExtractorError('Unable to recognize feed page')
|
||||||
|
|
||||||
|
|
||||||
class YoutubeWatchLaterIE(InfoExtractor):
|
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
||||||
IE_NAME = 'youtube:watchlater'
|
IE_DESC = 'Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)'
|
||||||
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater'
|
||||||
|
_FEED_NAME = 'watchlater'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.youtube.com/feed/watch_later',
|
'url': 'https://www.youtube.com/feed/watch_later',
|
||||||
|
@ -3429,25 +3515,33 @@ class YoutubeWatchLaterIE(InfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self.url_result(
|
return self.url_result('WL', ie=YoutubePlaylistIE.ie_key())
|
||||||
'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
|
|
||||||
_, video = self._check_download_just_video(url, 'WL')
|
|
||||||
if video:
|
class YoutubeFavouritesIE(YoutubeFeedsInfoExtractor):
|
||||||
return video
|
IE_DESC = 'YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)'
|
||||||
_, playlist = self._extract_playlist('WL')
|
_VALID_URL = r':ytfav(?:ou?rite)s?'
|
||||||
return playlist
|
_FEED_NAME = 'favourites'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': ':ytfav',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result('LL', ie=YoutubePlaylistIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
|
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'
|
||||||
_FEED_NAME = 'recommended'
|
_FEED_NAME = 'recommended'
|
||||||
_PLAYLIST_TITLE = 'Youtube Recommended videos'
|
_PLAYLIST_TITLE = 'Youtube Recommended videos'
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'
|
||||||
_FEED_NAME = 'subscriptions'
|
_FEED_NAME = 'subscriptions'
|
||||||
_PLAYLIST_TITLE = 'Youtube Subscriptions'
|
_PLAYLIST_TITLE = 'Youtube Subscriptions'
|
||||||
|
|
||||||
|
@ -3525,40 +3619,9 @@ def _real_extract(self, url):
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
|
|
||||||
# Old extractors. Are these cases handled elsewhere?
|
# Do Youtube show urls even exist anymore? I couldn't find any
|
||||||
|
r'''
|
||||||
class YoutubeSearchURLIE(YoutubeSearchIE):
|
class YoutubeShowIE(YoutubeTabIE):
|
||||||
IE_DESC = 'YouTube.com search URLs'
|
|
||||||
IE_NAME = 'youtube:search_url'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
|
||||||
'playlist_mincount': 5,
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'youtube-dl test video',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _process_json_dict(self, obj, videos, c):
|
|
||||||
if "videoId" in obj:
|
|
||||||
videos.append(obj)
|
|
||||||
return
|
|
||||||
|
|
||||||
if "nextContinuationData" in obj:
|
|
||||||
c["continuation"] = obj["nextContinuationData"]
|
|
||||||
return
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
|
|
||||||
webpage = self._download_webpage(url, query)
|
|
||||||
return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeShowIE(InfoExtractor):
|
|
||||||
IE_DESC = 'YouTube.com (multi-season) shows'
|
IE_DESC = 'YouTube.com (multi-season) shows'
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
|
||||||
IE_NAME = 'youtube:show'
|
IE_NAME = 'youtube:show'
|
||||||
|
@ -3575,15 +3638,4 @@ def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
return super(YoutubeShowIE, self)._real_extract(
|
return super(YoutubeShowIE, self)._real_extract(
|
||||||
'https://www.youtube.com/show/%s/playlists' % playlist_id)
|
'https://www.youtube.com/show/%s/playlists' % playlist_id)
|
||||||
|
'''
|
||||||
|
|
||||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
|
||||||
IE_NAME = 'youtube:favorites'
|
|
||||||
IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
|
|
||||||
_LOGIN_REQUIRED = True
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
|
|
||||||
playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
|
|
||||||
return self.url_result(playlist_id, 'YoutubePlaylist')
|
|
||||||
|
|
Loading…
Reference in a new issue