mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-03 06:01:02 +00:00
[youtube] Extract more thumbnails
* The thumbnail URLs are hard-coded and their actual existence is tested lazily * Added option `--no-check-formats` to not test them Closes #340, Related: #402, #337, https://github.com/ytdl-org/youtube-dl/issues/29049
This commit is contained in:
parent
d9488f69c1
commit
0ba692acc8
5 changed files with 61 additions and 34 deletions
|
@ -638,7 +638,9 @@ ## Video Format Options:
|
||||||
--no-prefer-free-formats Don't give any special preference to free
|
--no-prefer-free-formats Don't give any special preference to free
|
||||||
containers (default)
|
containers (default)
|
||||||
--check-formats Check that the formats selected are
|
--check-formats Check that the formats selected are
|
||||||
actually downloadable (Experimental)
|
actually downloadable
|
||||||
|
--no-check-formats Do not check that the formats selected are
|
||||||
|
actually downloadable
|
||||||
-F, --list-formats List all available formats of requested
|
-F, --list-formats List all available formats of requested
|
||||||
videos
|
videos
|
||||||
--merge-output-format FORMAT If a merge is required (e.g.
|
--merge-output-format FORMAT If a merge is required (e.g.
|
||||||
|
|
|
@ -209,6 +209,9 @@ class YoutubeDL(object):
|
||||||
into a single file
|
into a single file
|
||||||
allow_multiple_audio_streams: Allow multiple audio streams to be merged
|
allow_multiple_audio_streams: Allow multiple audio streams to be merged
|
||||||
into a single file
|
into a single file
|
||||||
|
check_formats Whether to test if the formats are downloadable.
|
||||||
|
Can be True (check all), False (check none)
|
||||||
|
or None (check only if requested by extractor)
|
||||||
paths: Dictionary of output paths. The allowed keys are 'home'
|
paths: Dictionary of output paths. The allowed keys are 'home'
|
||||||
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
|
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
|
||||||
outtmpl: Dictionary of templates for output names. Allowed keys
|
outtmpl: Dictionary of templates for output names. Allowed keys
|
||||||
|
@ -1944,15 +1947,24 @@ def _sanitize_thumbnails(self, info_dict):
|
||||||
t.get('id') if t.get('id') is not None else '',
|
t.get('id') if t.get('id') is not None else '',
|
||||||
t.get('url')))
|
t.get('url')))
|
||||||
|
|
||||||
def test_thumbnail(t):
|
def thumbnail_tester():
|
||||||
self.to_screen('[info] Testing thumbnail %s' % t['id'])
|
if self.params.get('check_formats'):
|
||||||
try:
|
def to_screen(msg):
|
||||||
self.urlopen(HEADRequest(t['url']))
|
return self.to_screen(f'[info] {msg}')
|
||||||
except network_exceptions as err:
|
else:
|
||||||
self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
|
to_screen = self.write_debug
|
||||||
t['id'], t['url'], error_to_compat_str(err)))
|
|
||||||
return False
|
def test_thumbnail(t):
|
||||||
return True
|
to_screen('Testing thumbnail %s' % t['id'])
|
||||||
|
try:
|
||||||
|
self.urlopen(HEADRequest(t['url']))
|
||||||
|
except network_exceptions as err:
|
||||||
|
to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
|
||||||
|
t['id'], t['url'], error_to_compat_str(err)))
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
return test_thumbnail
|
||||||
|
|
||||||
for i, t in enumerate(thumbnails):
|
for i, t in enumerate(thumbnails):
|
||||||
if t.get('id') is None:
|
if t.get('id') is None:
|
||||||
|
@ -1960,8 +1972,11 @@ def test_thumbnail(t):
|
||||||
if t.get('width') and t.get('height'):
|
if t.get('width') and t.get('height'):
|
||||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||||
t['url'] = sanitize_url(t['url'])
|
t['url'] = sanitize_url(t['url'])
|
||||||
if self.params.get('check_formats'):
|
|
||||||
info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()
|
if self.params.get('check_formats') is not False:
|
||||||
|
info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
|
||||||
|
else:
|
||||||
|
info_dict['thumbnails'] = thumbnails
|
||||||
|
|
||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
|
@ -229,6 +229,7 @@ class InfoExtractor(object):
|
||||||
* "resolution" (optional, string "{width}x{height}",
|
* "resolution" (optional, string "{width}x{height}",
|
||||||
deprecated)
|
deprecated)
|
||||||
* "filesize" (optional, int)
|
* "filesize" (optional, int)
|
||||||
|
* "_test_url" (optional, bool) - If true, test the URL
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: Full video description.
|
description: Full video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
|
|
|
@ -2645,7 +2645,21 @@ def feed_entry(name):
|
||||||
f['stretched_ratio'] = ratio
|
f['stretched_ratio'] = ratio
|
||||||
break
|
break
|
||||||
|
|
||||||
|
category = microformat.get('category') or search_meta('genre')
|
||||||
|
channel_id = video_details.get('channelId') \
|
||||||
|
or microformat.get('externalChannelId') \
|
||||||
|
or search_meta('channelId')
|
||||||
|
duration = int_or_none(
|
||||||
|
video_details.get('lengthSeconds')
|
||||||
|
or microformat.get('lengthSeconds')) \
|
||||||
|
or parse_duration(search_meta('duration'))
|
||||||
|
is_live = video_details.get('isLive')
|
||||||
|
is_upcoming = video_details.get('isUpcoming')
|
||||||
|
owner_profile_url = microformat.get('ownerProfileUrl')
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
|
thumbnail_types = ['maxresdefault', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', '1', '2', '3']
|
||||||
|
|
||||||
for container in (video_details, microformat):
|
for container in (video_details, microformat):
|
||||||
for thumbnail in (try_get(
|
for thumbnail in (try_get(
|
||||||
container,
|
container,
|
||||||
|
@ -2662,34 +2676,25 @@ def feed_entry(name):
|
||||||
'url': thumbnail_url,
|
'url': thumbnail_url,
|
||||||
'height': int_or_none(thumbnail.get('height')),
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
'width': int_or_none(thumbnail.get('width')),
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
'preference': 1 if 'maxresdefault' in thumbnail_url else -1
|
|
||||||
})
|
})
|
||||||
thumbnail_url = search_meta(['og:image', 'twitter:image'])
|
thumbnail_url = search_meta(['og:image', 'twitter:image'])
|
||||||
if thumbnail_url:
|
if thumbnail_url:
|
||||||
thumbnails.append({
|
thumbnails.append({
|
||||||
'url': thumbnail_url,
|
'url': thumbnail_url,
|
||||||
'preference': 1 if 'maxresdefault' in thumbnail_url else -1
|
|
||||||
})
|
})
|
||||||
# All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
|
# The best resolution thumbnails sometimes does not appear in the webpage
|
||||||
# See: https://github.com/ytdl-org/youtube-dl/issues/29049
|
# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
|
||||||
thumbnails.append({
|
thumbnails.extend({
|
||||||
'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
|
'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
|
||||||
'preference': 1,
|
video_id=video_id, name=name, ext=ext,
|
||||||
})
|
webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
|
||||||
|
'_test_url': True,
|
||||||
|
} for name in thumbnail_types for ext in ('webp', 'jpg'))
|
||||||
|
for thumb in thumbnails:
|
||||||
|
i = next((i for i, t in enumerate(thumbnail_types) if f'/{video_id}/{t}' in thumb['url']), 20)
|
||||||
|
thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
|
||||||
self._remove_duplicate_formats(thumbnails)
|
self._remove_duplicate_formats(thumbnails)
|
||||||
|
|
||||||
category = microformat.get('category') or search_meta('genre')
|
|
||||||
channel_id = video_details.get('channelId') \
|
|
||||||
or microformat.get('externalChannelId') \
|
|
||||||
or search_meta('channelId')
|
|
||||||
duration = int_or_none(
|
|
||||||
video_details.get('lengthSeconds')
|
|
||||||
or microformat.get('lengthSeconds')) \
|
|
||||||
or parse_duration(search_meta('duration'))
|
|
||||||
is_live = video_details.get('isLive')
|
|
||||||
is_upcoming = video_details.get('isUpcoming')
|
|
||||||
owner_profile_url = microformat.get('ownerProfileUrl')
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(video_title) if is_live else video_title,
|
'title': self._live_title(video_title) if is_live else video_title,
|
||||||
|
|
|
@ -524,8 +524,12 @@ def _dict_from_options_callback(
|
||||||
help="Don't give any special preference to free containers (default)")
|
help="Don't give any special preference to free containers (default)")
|
||||||
video_format.add_option(
|
video_format.add_option(
|
||||||
'--check-formats',
|
'--check-formats',
|
||||||
action='store_true', dest='check_formats', default=False,
|
action='store_true', dest='check_formats', default=None,
|
||||||
help="Check that the formats selected are actually downloadable (Experimental)")
|
help='Check that the formats selected are actually downloadable')
|
||||||
|
video_format.add_option(
|
||||||
|
'--no-check-formats',
|
||||||
|
action='store_false', dest='check_formats',
|
||||||
|
help='Do not check that the formats selected are actually downloadable')
|
||||||
video_format.add_option(
|
video_format.add_option(
|
||||||
'-F', '--list-formats',
|
'-F', '--list-formats',
|
||||||
action='store_true', dest='listformats',
|
action='store_true', dest='listformats',
|
||||||
|
|
Loading…
Reference in a new issue