mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
[extractor/youtube] Support changing extraction language (#4470)
Adds `--extractor-args youtube:lang=<supported lang code>` extractor arg to prefer translated fields (e.g. title and description) of that language, if available, for all YouTube extractors. See README or error message for list of supported language codes. Closes https://github.com/yt-dlp/yt-dlp/issues/387 Authored by: coletdjnz
This commit is contained in:
parent
0c0b78b273
commit
c26f9b991a
2 changed files with 286 additions and 75 deletions
|
@ -1705,6 +1705,8 @@ #### youtube
|
|||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests
|
||||
* `lang`: Supported content language code to prefer translated metadata of this language (case-sensitive). By default, video primary language metadata is preferred, with a fallback to `en` translated.
|
||||
* See youtube.py for list of supported content language codes.
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
import calendar
|
||||
import copy
|
||||
import datetime
|
||||
import enum
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
|
@ -275,6 +276,15 @@ def build_innertube_clients():
|
|||
build_innertube_clients()
|
||||
|
||||
|
||||
class BadgeType(enum.Enum):
|
||||
AVAILABILITY_UNLISTED = enum.auto()
|
||||
AVAILABILITY_PRIVATE = enum.auto()
|
||||
AVAILABILITY_PUBLIC = enum.auto()
|
||||
AVAILABILITY_PREMIUM = enum.auto()
|
||||
AVAILABILITY_SUBSCRIPTION = enum.auto()
|
||||
LIVE_NOW = enum.auto()
|
||||
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
|
||||
|
@ -367,6 +377,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||
r'(?:www\.)?piped\.privacy\.com\.de',
|
||||
)
|
||||
|
||||
# extracted from account/account_menu ep
|
||||
# XXX: These are the supported YouTube UI and API languages,
|
||||
# which is slightly different from languages supported for translation in YouTube studio
|
||||
_SUPPORTED_LANG_CODES = [
|
||||
'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
|
||||
'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
|
||||
'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
|
||||
'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
|
||||
'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
|
||||
'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
|
||||
]
|
||||
|
||||
@functools.cached_property
|
||||
def _preferred_lang(self):
|
||||
"""
|
||||
Returns a language code supported by YouTube for the user preferred language.
|
||||
Returns None if no preferred language set.
|
||||
"""
|
||||
preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
|
||||
if not preferred_lang:
|
||||
return
|
||||
if preferred_lang not in self._SUPPORTED_LANG_CODES:
|
||||
raise ExtractorError(
|
||||
f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
|
||||
expected=True)
|
||||
elif preferred_lang != 'en':
|
||||
self.report_warning(
|
||||
f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
|
||||
return preferred_lang
|
||||
|
||||
def _initialize_consent(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
if cookies.get('__Secure-3PSID'):
|
||||
|
@ -391,7 +431,7 @@ def _initialize_pref(self):
|
|||
pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
|
||||
except ValueError:
|
||||
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
|
||||
pref.update({'hl': 'en', 'tz': 'UTC'})
|
||||
pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
|
||||
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
||||
|
||||
def _real_initialize(self):
|
||||
|
@ -439,7 +479,7 @@ def _extract_context(self, ytcfg=None, default_client='web'):
|
|||
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
|
||||
# Enforce language and tz for extraction
|
||||
client_context = traverse_obj(context, 'client', expected_type=dict, default={})
|
||||
client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
|
||||
client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
|
||||
return context
|
||||
|
||||
_SAPISID = None
|
||||
|
@ -678,13 +718,49 @@ def _extract_and_report_alerts(self, data, *args, **kwargs):
|
|||
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
|
||||
|
||||
def _extract_badges(self, renderer: dict):
|
||||
badges = set()
|
||||
for badge in try_get(renderer, lambda x: x['badges'], list) or []:
|
||||
label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
|
||||
if label:
|
||||
badges.add(label.lower())
|
||||
privacy_icon_map = {
|
||||
'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
|
||||
'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
|
||||
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
|
||||
}
|
||||
|
||||
badge_style_map = {
|
||||
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
|
||||
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
|
||||
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
|
||||
}
|
||||
|
||||
label_map = {
|
||||
'unlisted': BadgeType.AVAILABILITY_UNLISTED,
|
||||
'private': BadgeType.AVAILABILITY_PRIVATE,
|
||||
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
|
||||
'live': BadgeType.LIVE_NOW,
|
||||
'premium': BadgeType.AVAILABILITY_PREMIUM
|
||||
}
|
||||
|
||||
badges = []
|
||||
for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
|
||||
badge_type = (
|
||||
privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
|
||||
or badge_style_map.get(traverse_obj(badge, 'style'))
|
||||
)
|
||||
if badge_type:
|
||||
badges.append({'type': badge_type})
|
||||
continue
|
||||
|
||||
# fallback, won't work in some languages
|
||||
label = traverse_obj(badge, 'label', expected_type=str, default='')
|
||||
for match, label_badge_type in label_map.items():
|
||||
if match in label.lower():
|
||||
badges.append({'type': badge_type})
|
||||
continue
|
||||
|
||||
return badges
|
||||
|
||||
@staticmethod
|
||||
def _has_badge(badges, badge_type):
|
||||
return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
|
||||
|
||||
@staticmethod
|
||||
def _get_text(data, *path_list, max_runs=None):
|
||||
for path in path_list or [None]:
|
||||
|
@ -755,9 +831,9 @@ def extract_relative_time(relative_time_text):
|
|||
except ValueError:
|
||||
return None
|
||||
|
||||
def _extract_time_text(self, renderer, *path_list):
|
||||
"""@returns (timestamp, time_text)"""
|
||||
text = self._get_text(renderer, *path_list) or ''
|
||||
def _parse_time_text(self, text):
|
||||
if not text:
|
||||
return
|
||||
dt = self.extract_relative_time(text)
|
||||
timestamp = None
|
||||
if isinstance(dt, datetime.datetime):
|
||||
|
@ -770,9 +846,10 @@ def _extract_time_text(self, renderer, *path_list):
|
|||
(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
|
||||
text.lower(), 'time text', default=None)))
|
||||
|
||||
if text and timestamp is None:
|
||||
self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
|
||||
return timestamp, text
|
||||
if text and timestamp is None and self._preferred_lang in (None, 'en'):
|
||||
self.report_warning(
|
||||
f'Cannot parse localized time text "{text}"', only_once=True)
|
||||
return timestamp
|
||||
|
||||
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
|
||||
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
|
||||
|
@ -848,7 +925,7 @@ def _extract_video(self, renderer):
|
|||
channel_id = traverse_obj(
|
||||
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
|
||||
expected_type=str, get_all=False)
|
||||
timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
|
||||
time_text = self._get_text(renderer, 'publishedTimeText') or ''
|
||||
scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
|
||||
overlay_style = traverse_obj(
|
||||
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
|
||||
|
@ -874,15 +951,21 @@ def _extract_video(self, renderer):
|
|||
'uploader': uploader,
|
||||
'channel_id': channel_id,
|
||||
'thumbnails': thumbnails,
|
||||
'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
|
||||
'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
|
||||
if self._configuration_arg('approximate_date', ie_key='youtubetab')
|
||||
else None),
|
||||
'live_status': ('is_upcoming' if scheduled_timestamp is not None
|
||||
else 'was_live' if 'streamed' in time_text.lower()
|
||||
else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
|
||||
else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
|
||||
else None),
|
||||
'release_timestamp': scheduled_timestamp,
|
||||
'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
|
||||
'availability':
|
||||
'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
|
||||
else self._availability(
|
||||
is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
|
||||
needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
|
||||
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
|
||||
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
|
||||
}
|
||||
|
||||
|
||||
|
@ -2306,6 +2389,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'tags': [],
|
||||
'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
|
||||
}
|
||||
}, {
|
||||
# Prefer primary title+description language metadata by default
|
||||
# Do not prefer translated description if primary is empty
|
||||
'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
|
||||
'info_dict': {
|
||||
'id': 'el3E4MbxRqQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'dlp test video 2 - primary sv no desc',
|
||||
'description': '',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'like_count': int,
|
||||
'playable_in_embed': True,
|
||||
'availability': 'unlisted',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
|
||||
'age_limit': 0,
|
||||
'duration': 5,
|
||||
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'live_status': 'not_live',
|
||||
'upload_date': '20220908',
|
||||
'categories': ['People & Blogs'],
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
# Extractor argument: prefer translated title+description
|
||||
'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
|
||||
'info_dict': {
|
||||
'id': 'gHKT4uU8Zng',
|
||||
'ext': 'mp4',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'tags': [],
|
||||
'duration': 5,
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'upload_date': '20220728',
|
||||
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'view_count': int,
|
||||
'categories': ['People & Blogs'],
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
|
||||
'title': 'dlp test video title translated (fr)',
|
||||
'availability': 'public',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'age_limit': 0,
|
||||
'description': 'dlp test video description translated (fr)',
|
||||
'playable_in_embed': True,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
},
|
||||
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
|
||||
'expected_warnings': [r'Preferring "fr" translated fields'],
|
||||
}, {
|
||||
'note': '6 channel audio',
|
||||
'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
|
||||
|
@ -2907,8 +3045,10 @@ def _extract_comment(self, comment_renderer, parent=None):
|
|||
|
||||
text = self._get_text(comment_renderer, 'contentText')
|
||||
|
||||
# note: timestamp is an estimate calculated from the current time and time_text
|
||||
timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
|
||||
# Timestamp is an estimate calculated from the current time and time_text
|
||||
time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
|
||||
timestamp = self._parse_time_text(time_text)
|
||||
|
||||
author = self._get_text(comment_renderer, 'authorText')
|
||||
author_id = try_get(comment_renderer,
|
||||
lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
|
||||
|
@ -3554,11 +3694,19 @@ def _real_extract(self, url):
|
|||
microformats = traverse_obj(
|
||||
player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
|
||||
expected_type=dict, default=[])
|
||||
video_title = (
|
||||
get_first(video_details, 'title')
|
||||
or self._get_text(microformats, (..., 'title'))
|
||||
or search_meta(['og:title', 'twitter:title', 'title']))
|
||||
video_description = get_first(video_details, 'shortDescription')
|
||||
|
||||
translated_title = self._get_text(microformats, (..., 'title'))
|
||||
video_title = (self._preferred_lang and translated_title
|
||||
or get_first(video_details, 'title') # primary
|
||||
or translated_title
|
||||
or search_meta(['og:title', 'twitter:title', 'title']))
|
||||
translated_description = self._get_text(microformats, (..., 'description'))
|
||||
original_description = get_first(video_details, 'shortDescription')
|
||||
video_description = (
|
||||
self._preferred_lang and translated_description
|
||||
# If original description is blank, it will be an empty string.
|
||||
# Do not prefer translated description in this case.
|
||||
or original_description if original_description is not None else translated_description)
|
||||
|
||||
multifeed_metadata_list = get_first(
|
||||
player_responses,
|
||||
|
@ -3988,7 +4136,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
|||
and info.get('live_status') != 'is_upcoming'
|
||||
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
|
||||
):
|
||||
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
|
||||
upload_date = strftime_or_none(
|
||||
self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
|
||||
info['upload_date'] = upload_date
|
||||
|
||||
for to, frm in fallbacks.items():
|
||||
|
@ -4000,33 +4149,25 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
|||
if v:
|
||||
info[d_k] = v
|
||||
|
||||
is_private = get_first(video_details, 'isPrivate', expected_type=bool)
|
||||
is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
|
||||
is_membersonly = None
|
||||
is_premium = None
|
||||
if initial_data and is_private is not None:
|
||||
is_membersonly = False
|
||||
is_premium = False
|
||||
contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
|
||||
badge_labels = set()
|
||||
for content in contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
|
||||
for badge_label in badge_labels:
|
||||
if badge_label.lower() == 'members only':
|
||||
is_membersonly = True
|
||||
elif badge_label.lower() == 'premium':
|
||||
is_premium = True
|
||||
elif badge_label.lower() == 'unlisted':
|
||||
is_unlisted = True
|
||||
badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
|
||||
|
||||
info['availability'] = self._availability(
|
||||
is_private=is_private,
|
||||
needs_premium=is_premium,
|
||||
needs_subscription=is_membersonly,
|
||||
needs_auth=info['age_limit'] >= 18,
|
||||
is_unlisted=None if is_private is None else is_unlisted)
|
||||
is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
|
||||
or get_first(video_details, 'isPrivate', expected_type=bool))
|
||||
|
||||
info['availability'] = (
|
||||
'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
|
||||
else self._availability(
|
||||
is_private=is_private,
|
||||
needs_premium=(
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
|
||||
or False if initial_data and is_private is not None else None),
|
||||
needs_subscription=(
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
|
||||
or False if initial_data and is_private is not None else None),
|
||||
needs_auth=info['age_limit'] >= 18,
|
||||
is_unlisted=None if is_private is None else (
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
|
||||
or get_first(microformats, 'isUnlisted', expected_type=bool))))
|
||||
|
||||
info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
|
||||
|
||||
|
@ -4472,7 +4613,7 @@ def _get_uncropped(url):
|
|||
playlist_id = item_id
|
||||
|
||||
playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
|
||||
last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
|
||||
last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
|
||||
if title is None:
|
||||
title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
|
||||
title += format_field(selected_tab, 'title', ' - %s')
|
||||
|
@ -4566,31 +4707,37 @@ def _extract_availability(self, data):
|
|||
Note: Unless YouTube tells us explicitly, we do not assume it is public
|
||||
@param data: response
|
||||
"""
|
||||
is_private = is_unlisted = None
|
||||
renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
|
||||
badge_labels = self._extract_badges(renderer)
|
||||
|
||||
player_header_privacy = traverse_obj(
|
||||
data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
|
||||
|
||||
badges = self._extract_badges(renderer)
|
||||
|
||||
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
|
||||
privacy_dropdown_entries = try_get(
|
||||
renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
|
||||
for renderer_dict in privacy_dropdown_entries:
|
||||
is_selected = try_get(
|
||||
renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
|
||||
if not is_selected:
|
||||
continue
|
||||
label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
|
||||
if label:
|
||||
badge_labels.add(label.lower())
|
||||
break
|
||||
privacy_setting_icon = traverse_obj(
|
||||
renderer, (
|
||||
'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
|
||||
lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
|
||||
get_all=False, expected_type=str)
|
||||
|
||||
for badge_label in badge_labels:
|
||||
if badge_label == 'unlisted':
|
||||
is_unlisted = True
|
||||
elif badge_label == 'private':
|
||||
is_private = True
|
||||
elif badge_label == 'public':
|
||||
is_unlisted = is_private = False
|
||||
return self._availability(is_private, False, False, False, is_unlisted)
|
||||
return (
|
||||
'public' if (
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
|
||||
or player_header_privacy == 'PUBLIC'
|
||||
or privacy_setting_icon == 'PRIVACY_PUBLIC')
|
||||
else self._availability(
|
||||
is_private=(
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
|
||||
or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
|
||||
else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
|
||||
is_unlisted=(
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
|
||||
or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
|
||||
else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
|
||||
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
|
||||
needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
|
||||
needs_auth=False))
|
||||
|
||||
@staticmethod
|
||||
def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
|
||||
|
@ -4866,6 +5013,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
|
@ -4883,6 +5031,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}, {
|
||||
|
@ -5029,6 +5178,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
|
||||
'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
|
||||
'channel': 'Christiaan008',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_count': 96,
|
||||
}, {
|
||||
|
@ -5047,6 +5197,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'view_count': int,
|
||||
'description': '',
|
||||
'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 1123,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
|
@ -5070,6 +5221,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel': 'Interstellar Movie',
|
||||
'description': '',
|
||||
'modified_date': r're:\d{8}',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 21,
|
||||
}, {
|
||||
|
@ -5088,6 +5240,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'modified_date': r're:\d{8}',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
|
@ -5107,6 +5260,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'uploader_url': 'https://www.youtube.com/c/blanktv',
|
||||
'modified_date': r're:\d{8}',
|
||||
'description': '',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 1000,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
|
@ -5125,6 +5279,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
|
||||
'channel_url': 'https://www.youtube.com/user/Computerphile',
|
||||
'channel': 'Computerphile',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
|
@ -5290,6 +5445,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'tags': [],
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 166,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
|
@ -5310,6 +5466,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'modified_date': r're:\d{8}',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||
'description': '',
|
||||
'availability': 'public',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'The URL does not have a videos tab',
|
||||
|
@ -5410,6 +5567,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'channel': 'Royalty Free Music - Topic',
|
||||
'view_count': int,
|
||||
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||
'availability': 'public',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'does not have a videos tab',
|
||||
|
@ -5443,6 +5601,45 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
|
||||
},
|
||||
'playlist_mincount': 2
|
||||
}, {
|
||||
'note': 'translated tab name',
|
||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
|
||||
'info_dict': {
|
||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'tags': [],
|
||||
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'description': '',
|
||||
'title': 'cole-dlp-test-acc - 再生リスト',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
|
||||
'expected_warnings': ['Preferring "ja"'],
|
||||
}, {
|
||||
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
|
||||
'note': 'preferred lang set with playlist with translated video titles',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
|
||||
'info_dict': {
|
||||
'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'description': 'test',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'title': 'dlp test playlist',
|
||||
'availability': 'public',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
|
||||
'expected_warnings': ['Preferring "ja"'],
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -5527,10 +5724,20 @@ def get_mobj(url):
|
|||
tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
|
||||
if tabs:
|
||||
selected_tab = self._extract_selected_tab(tabs)
|
||||
selected_tab_name = selected_tab.get('title', '').lower()
|
||||
selected_tab_url = urljoin(
|
||||
url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
|
||||
translated_tab_name = selected_tab.get('title', '').lower()
|
||||
|
||||
# Prefer tab name from tab url as it is always in en,
|
||||
# but only when preferred lang is set as it may not extract reliably in all cases.
|
||||
selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
|
||||
or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
|
||||
or translated_tab_name)
|
||||
|
||||
if selected_tab_name == 'home':
|
||||
selected_tab_name = 'featured'
|
||||
requested_tab_name = mobj['tab'][1:]
|
||||
|
||||
if 'no-youtube-channel-redirect' not in compat_opts:
|
||||
if requested_tab_name == 'live': # Live tab should have redirected to the video
|
||||
raise UserNotLive(video_id=mobj['id'])
|
||||
|
@ -5642,6 +5849,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||
'channel': 'milan',
|
||||
'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
|
||||
'availability': 'public',
|
||||
},
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
|
@ -5660,6 +5868,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||
'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
|
||||
'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
|
||||
'modified_date': r're:\d{8}',
|
||||
'availability': 'public',
|
||||
},
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
|
@ -5848,7 +6057,7 @@ def _extract_notification_renderer(self, notification):
|
|||
title = self._search_regex(
|
||||
rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
|
||||
'video title', default=None)
|
||||
upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
|
||||
upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
|
||||
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
|
||||
else None)
|
||||
return {
|
||||
|
|
Loading…
Reference in a new issue