[youtube] Extract more thumbnails

* The thumbnail URLs are hard-coded and their actual existence is tested lazily * Added option `--no-check-formats` to not test them Closes #340, Related: #402, #337, https://github.com/ytdl-org/youtube-dl/issues/29049
2025-01-03 06:01:02 +00:00 · 2021-07-15 22:49:59 +05:30 · 2021-07-15 22:49:59 +05:30 · 0ba692acc8
commit 0ba692acc8
parent d9488f69c1
5 changed files with 61 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -638,7 +638,9 @@ ## Video Format Options:
    --no-prefer-free-formats         Don't give any special preference to free
                                     containers (default)
    --check-formats                  Check that the formats selected are
-                                     actually downloadable (Experimental)
+                                     actually downloadable
    --no-check-formats               Do not check that the formats selected are
                                     actually downloadable
    -F, --list-formats               List all available formats of requested
                                     videos
    --merge-output-format FORMAT     If a merge is required (e.g.
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -209,6 +209,9 @@ class YoutubeDL(object):
                       into a single file
    allow_multiple_audio_streams:   Allow multiple audio streams to be merged
                       into a single file
    check_formats      Whether to test if the formats are downloadable.
                       Can be True (check all), False (check none)
                       or None (check only if requested by extractor)
    paths:             Dictionary of output paths. The allowed keys are 'home'
                       'temp' and the keys of OUTTMPL_TYPES (in utils.py)
    outtmpl:           Dictionary of templates for output names. Allowed keys
@ -1944,15 +1947,24 @@ def _sanitize_thumbnails(self, info_dict):
                t.get('id') if t.get('id') is not None else '',
                t.get('url')))
-            def test_thumbnail(t):
+            def thumbnail_tester():
-                self.to_screen('[info] Testing thumbnail %s' % t['id'])
+                if self.params.get('check_formats'):
-                try:
+                    def to_screen(msg):
-                    self.urlopen(HEADRequest(t['url']))
+                        return self.to_screen(f'[info] {msg}')
-                except network_exceptions as err:
+                else:
-                    self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
+                    to_screen = self.write_debug
-                        t['id'], t['url'], error_to_compat_str(err)))
+
-                    return False
+                def test_thumbnail(t):
-                return True
+                    to_screen('Testing thumbnail %s' % t['id'])
                    try:
                        self.urlopen(HEADRequest(t['url']))
                    except network_exceptions as err:
                        to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
                            t['id'], t['url'], error_to_compat_str(err)))
                        return False
                    return True
                return test_thumbnail
            for i, t in enumerate(thumbnails):
                if t.get('id') is None:
@ -1960,8 +1972,11 @@ def test_thumbnail(t):
                if t.get('width') and t.get('height'):
                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
                t['url'] = sanitize_url(t['url'])
-            if self.params.get('check_formats'):
+
-                info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()
+            if self.params.get('check_formats') is not False:
                info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
            else:
                info_dict['thumbnails'] = thumbnails
    def process_video_result(self, info_dict, download=True):
        assert info_dict.get('_type', 'video') == 'video'
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -229,6 +229,7 @@ class InfoExtractor(object):
                        * "resolution" (optional, string "{width}x{height}",
                                        deprecated)
                        * "filesize" (optional, int)
                        * "_test_url" (optional, bool) - If true, test the URL
    thumbnail:      Full URL to a video thumbnail image.
    description:    Full video description.
    uploader:       Full name of the video uploader.
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -2645,7 +2645,21 @@ def feed_entry(name):
                                f['stretched_ratio'] = ratio
                        break
        category = microformat.get('category') or search_meta('genre')
        channel_id = video_details.get('channelId') \
            or microformat.get('externalChannelId') \
            or search_meta('channelId')
        duration = int_or_none(
            video_details.get('lengthSeconds')
            or microformat.get('lengthSeconds')) \
            or parse_duration(search_meta('duration'))
        is_live = video_details.get('isLive')
        is_upcoming = video_details.get('isUpcoming')
        owner_profile_url = microformat.get('ownerProfileUrl')
        thumbnails = []
        thumbnail_types = ['maxresdefault', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', '1', '2', '3']
        for container in (video_details, microformat):
            for thumbnail in (try_get(
                    container,
@ -2662,34 +2676,25 @@ def feed_entry(name):
                    'url': thumbnail_url,
                    'height': int_or_none(thumbnail.get('height')),
                    'width': int_or_none(thumbnail.get('width')),
                    'preference': 1 if 'maxresdefault' in thumbnail_url else -1
                })
        thumbnail_url = search_meta(['og:image', 'twitter:image'])
        if thumbnail_url:
            thumbnails.append({
                'url': thumbnail_url,
                'preference': 1 if 'maxresdefault' in thumbnail_url else -1
            })
-        # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
+        # The best resolution thumbnails sometimes does not appear in the webpage
-        # See: https://github.com/ytdl-org/youtube-dl/issues/29049
+        # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
-        thumbnails.append({
+        thumbnails.extend({
-            'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
+            'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
-            'preference': 1,
+                video_id=video_id, name=name, ext=ext,
-        })
+                webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
            '_test_url': True,
        } for name in thumbnail_types for ext in ('webp', 'jpg'))
        for thumb in thumbnails:
            i = next((i for i, t in enumerate(thumbnail_types) if f'/{video_id}/{t}' in thumb['url']), 20)
            thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
        self._remove_duplicate_formats(thumbnails)
        category = microformat.get('category') or search_meta('genre')
        channel_id = video_details.get('channelId') \
            or microformat.get('externalChannelId') \
            or search_meta('channelId')
        duration = int_or_none(
            video_details.get('lengthSeconds')
            or microformat.get('lengthSeconds')) \
            or parse_duration(search_meta('duration'))
        is_live = video_details.get('isLive')
        is_upcoming = video_details.get('isUpcoming')
        owner_profile_url = microformat.get('ownerProfileUrl')
        info = {
            'id': video_id,
            'title': self._live_title(video_title) if is_live else video_title,
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -524,8 +524,12 @@ def _dict_from_options_callback(
        help="Don't give any special preference to free containers (default)")
    video_format.add_option(
        '--check-formats',
-        action='store_true', dest='check_formats', default=False,
+        action='store_true', dest='check_formats', default=None,
-        help="Check that the formats selected are actually downloadable (Experimental)")
+        help='Check that the formats selected are actually downloadable')
    video_format.add_option(
        '--no-check-formats',
        action='store_false', dest='check_formats',
        help='Do not check that the formats selected are actually downloadable')
    video_format.add_option(
        '-F', '--list-formats',
        action='store_true', dest='listformats',