Update to ytdl-commit-cf2dbec

cf2dbec630 Except: [kakao] improve info extraction and detect geo restriction d8085580f6
2024-12-22 06:00:00 +00:00 · 2021-02-20 02:14:36 +05:30 · 2021-02-20 02:14:36 +05:30 · bc2ca1bb75
commit bc2ca1bb75
parent 5e41dca334
19 changed files with 1013 additions and 395 deletions
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -12,6 +12,7 @@
 from youtube_dlc.extractor import (
    YoutubePlaylistIE,
    YoutubeTabIE,
    YoutubeIE,
 )
@ -57,14 +58,22 @@ def test_youtube_toptracks(self):
        entries = result['entries']
        self.assertEqual(len(entries), 100)
-    def test_youtube_flat_playlist_titles(self):
+    def test_youtube_flat_playlist_extraction(self):
        dl = FakeYDL()
        dl.params['extract_flat'] = True
-        ie = YoutubePlaylistIE(dl)
+        ie = YoutubeTabIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
+        result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
        self.assertIsPlaylist(result)
-        for entry in result['entries']:
+        entries = list(result['entries'])
-            self.assertTrue(entry.get('title'))
+        self.assertTrue(len(entries) == 1)
        video = entries[0]
        self.assertEqual(video['_type'], 'url_transparent')
        self.assertEqual(video['ie_key'], 'Youtube')
        self.assertEqual(video['id'], 'BaW_jenozKc')
        self.assertEqual(video['url'], 'BaW_jenozKc')
        self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
        self.assertEqual(video['duration'], 10)
        self.assertEqual(video['uploader'], 'Philipp Hagemeister')
 if __name__ == '__main__':
--- a/youtube_dlc/extractor/ard.py
+++ b/youtube_dlc/extractor/ard.py
@ -324,20 +324,42 @@ def _real_extract(self, url):
        formats = []
        for a in video_node.findall('.//asset'):
            file_name = xpath_text(a, './fileName', default=None)
            if not file_name:
                continue
            format_type = a.attrib.get('type')
            format_url = url_or_none(file_name)
            if format_url:
                ext = determine_ext(file_name)
                if ext == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        format_url, display_id, 'mp4', entry_protocol='m3u8_native',
                        m3u8_id=format_type or 'hls', fatal=False))
                    continue
                elif ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        update_url_query(format_url, {'hdcore': '3.7.0'}),
                        display_id, f4m_id=format_type or 'hds', fatal=False))
                    continue
            f = {
-                'format_id': a.attrib['type'],
+                'format_id': format_type,
-                'width': int_or_none(a.find('./frameWidth').text),
+                'width': int_or_none(xpath_text(a, './frameWidth')),
-                'height': int_or_none(a.find('./frameHeight').text),
+                'height': int_or_none(xpath_text(a, './frameHeight')),
-                'vbr': int_or_none(a.find('./bitrateVideo').text),
+                'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
-                'abr': int_or_none(a.find('./bitrateAudio').text),
+                'abr': int_or_none(xpath_text(a, './bitrateAudio')),
-                'vcodec': a.find('./codecVideo').text,
+                'vcodec': xpath_text(a, './codecVideo'),
-                'tbr': int_or_none(a.find('./totalBitrate').text),
+                'tbr': int_or_none(xpath_text(a, './totalBitrate')),
            }
-            if a.find('./serverPrefix').text:
+            server_prefix = xpath_text(a, './serverPrefix', default=None)
-                f['url'] = a.find('./serverPrefix').text
+            if server_prefix:
-                f['playpath'] = a.find('./fileName').text
+                f.update({
                    'url': server_prefix,
                    'playpath': file_name,
                })
            else:
-                f['url'] = a.find('./fileName').text
+                if not format_url:
                    continue
                f['url'] = format_url
            formats.append(f)
        self._sort_formats(formats)
--- a/youtube_dlc/extractor/canvas.py
+++ b/youtube_dlc/extractor/canvas.py
@ -7,19 +7,21 @@
 from .gigya import GigyaBaseIE
 from ..compat import compat_HTTPError
 from ..utils import (
    extract_attributes,
    ExtractorError,
-    strip_or_none,
+    clean_html,
    extract_attributes,
    float_or_none,
    get_element_by_class,
    int_or_none,
    merge_dicts,
    str_or_none,
    strip_or_none,
    url_or_none,
 )
 class CanvasIE(InfoExtractor):
-    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
        'md5': '68993eda72ef62386a15ea2cf3c93107',
@ -332,3 +334,51 @@ def _real_extract(self, url):
            'display_id': display_id,
            'season_number': int_or_none(page.get('episode_season')),
        })
 class DagelijkseKostIE(InfoExtractor):
    IE_DESC = 'dagelijksekost.een.be'
    _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
        'md5': '30bfffc323009a3e5f689bef6efa2365',
        'info_dict': {
            'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
            'display_id': 'hachis-parmentier-met-witloof',
            'ext': 'mp4',
            'title': 'Hachis parmentier met witloof',
            'description': 'md5:9960478392d87f63567b5b117688cdc5',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 283.02,
        },
        'expected_warnings': ['is not a supported codec'],
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        title = strip_or_none(get_element_by_class(
            'dish-metadata__title', webpage
        ) or self._html_search_meta(
            'twitter:title', webpage))
        description = clean_html(get_element_by_class(
            'dish-description', webpage)
        ) or self._html_search_meta(
            ('description', 'twitter:description', 'og:description'),
            webpage)
        video_id = self._html_search_regex(
            r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
            group='id')
        return {
            '_type': 'url_transparent',
            'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
            'ie_key': CanvasIE.ie_key(),
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
        }
--- a/youtube_dlc/extractor/ccma.py
+++ b/youtube_dlc/extractor/ccma.py
@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import datetime
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    extract_timezone,
    int_or_none,
    parse_duration,
    parse_resolution,
@ -97,8 +99,9 @@ def _real_extract(self, url):
        timestamp = None
        data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
        try:
-            timestamp = datetime.datetime.strptime(
+            timezone, data_utc = extract_timezone(data_utc)
-                data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
+            timestamp = calendar.timegm((datetime.datetime.strptime(
                data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
        except TypeError:
            pass
--- a/youtube_dlc/extractor/dplay.py
+++ b/youtube_dlc/extractor/dplay.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
@ -10,11 +11,13 @@
    ExtractorError,
    float_or_none,
    int_or_none,
    strip_or_none,
    unified_timestamp,
 )
 class DPlayIE(InfoExtractor):
    _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
    _VALID_URL = r'''(?x)https?://
        (?P<domain>
            (?:www\.)?(?P<host>d
@ -24,7 +27,7 @@ class DPlayIE(InfoExtractor):
                )
            )|
            (?P<subdomain_country>es|it)\.dplay\.com
-        )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
+        )/[^/]+''' + _PATH_REGEX
    _TESTS = [{
        # non geo restricted, via secure api, unsigned download hls URL
@ -151,56 +154,79 @@ class DPlayIE(InfoExtractor):
        'only_matching': True,
    }]
    def _process_errors(self, e, geo_countries):
        info = self._parse_json(e.cause.read().decode('utf-8'), None)
        error = info['errors'][0]
        error_code = error.get('code')
        if error_code == 'access.denied.geoblocked':
            self.raise_geo_restricted(countries=geo_countries)
        elif error_code in ('access.denied.missingpackage', 'invalid.token'):
            raise ExtractorError(
                'This video is only available for registered users. You may want to use --cookies.', expected=True)
        raise ExtractorError(info['errors'][0]['detail'], expected=True)
    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
        headers['Authorization'] = 'Bearer ' + self._download_json(
            disco_base + 'token', display_id, 'Downloading token',
            query={
                'realm': realm,
            })['data']['attributes']['token']
    def _download_video_playback_info(self, disco_base, video_id, headers):
        streaming = self._download_json(
            disco_base + 'playback/videoPlaybackInfo/' + video_id,
            video_id, headers=headers)['data']['attributes']['streaming']
        streaming_list = []
        for format_id, format_dict in streaming.items():
            streaming_list.append({
                'type': format_id,
                'url': format_dict.get('url'),
            })
        return streaming_list
    def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
        geo_countries = [country.upper()]
        self._initialize_geo_bypass({
            'countries': geo_countries,
        })
        disco_base = 'https://%s/' % disco_host
        token = self._download_json(
            disco_base + 'token', display_id, 'Downloading token',
            query={
                'realm': realm,
            })['data']['attributes']['token']
        headers = {
            'Referer': url,
            'Authorization': 'Bearer ' + token,
        }
-        video = self._download_json(
+        self._update_disco_api_headers(headers, disco_base, display_id, realm)
-            disco_base + 'content/videos/' + display_id, display_id,
+        try:
-            headers=headers, query={
+            video = self._download_json(
-                'fields[channel]': 'name',
+                disco_base + 'content/videos/' + display_id, display_id,
-                'fields[image]': 'height,src,width',
+                headers=headers, query={
-                'fields[show]': 'name',
+                    'fields[channel]': 'name',
-                'fields[tag]': 'name',
+                    'fields[image]': 'height,src,width',
-                'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+                    'fields[show]': 'name',
-                'include': 'images,primaryChannel,show,tags'
+                    'fields[tag]': 'name',
-            })
+                    'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
                    'include': 'images,primaryChannel,show,tags'
                })
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                self._process_errors(e, geo_countries)
            raise
        video_id = video['data']['id']
        info = video['data']['attributes']
        title = info['name'].strip()
        formats = []
        try:
-            streaming = self._download_json(
+            streaming = self._download_video_playback_info(
-                disco_base + 'playback/videoPlaybackInfo/' + video_id,
+                disco_base, video_id, headers)
                display_id, headers=headers)['data']['attributes']['streaming']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+                self._process_errors(e, geo_countries)
                error = info['errors'][0]
                error_code = error.get('code')
                if error_code == 'access.denied.geoblocked':
                    self.raise_geo_restricted(countries=geo_countries)
                elif error_code == 'access.denied.missingpackage':
                    self.raise_login_required()
                raise ExtractorError(info['errors'][0]['detail'], expected=True)
            raise
-        for format_id, format_dict in streaming.items():
+        for format_dict in streaming:
            if not isinstance(format_dict, dict):
                continue
            format_url = format_dict.get('url')
            if not format_url:
                continue
            format_id = format_dict.get('type')
            ext = determine_ext(format_url)
            if format_id == 'dash' or ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
@ -248,7 +274,7 @@ def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': info.get('description'),
+            'description': strip_or_none(info.get('description')),
            'duration': float_or_none(info.get('videoDuration'), 1000),
            'timestamp': unified_timestamp(info.get('publishStart')),
            'series': series,
@ -268,3 +294,75 @@ def _real_extract(self, url):
        host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
        return self._get_disco_api_info(
            url, display_id, host, 'dplay' + country, country)
 class DiscoveryPlusIE(DPlayIE):
    _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
    _TESTS = [{
        'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
        'info_dict': {
            'id': '1140794',
            'display_id': 'property-brothers-forever-home/food-and-family',
            'ext': 'mp4',
            'title': 'Food and Family',
            'description': 'The brothers help a Richmond family expand their single-level home.',
            'duration': 2583.113,
            'timestamp': 1609304400,
            'upload_date': '20201230',
            'creator': 'HGTV',
            'series': 'Property Brothers: Forever Home',
            'season_number': 1,
            'episode_number': 1,
        },
        'skip': 'Available for Premium users',
    }]
    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
        headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
    def _download_video_playback_info(self, disco_base, video_id, headers):
        return self._download_json(
            disco_base + 'playback/v3/videoPlaybackInfo',
            video_id, headers=headers, data=json.dumps({
                'deviceInfo': {
                    'adBlocker': False,
                },
                'videoId': video_id,
                'wisteriaProperties': {
                    'platform': 'desktop',
                },
            }).encode('utf-8'))['data']['attributes']['streaming']
    def _real_extract(self, url):
        display_id = self._match_id(url)
        return self._get_disco_api_info(
            url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
 class HGTVDeIE(DPlayIE):
    _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
    _TESTS = [{
        'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
        'info_dict': {
            'id': '151205',
            'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
            'ext': 'mp4',
            'title': 'Wer braucht schon eine Toilette',
            'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
            'duration': 1177.024,
            'timestamp': 1595705400,
            'upload_date': '20200725',
            'creator': 'HGTV',
            'series': 'Tiny House - klein, aber oho',
            'season_number': 3,
            'episode_number': 3,
        },
        'params': {
            'format': 'bestvideo',
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        return self._get_disco_api_info(
            url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
--- a/youtube_dlc/extractor/dreisat.py
+++ b/youtube_dlc/extractor/dreisat.py
@ -0,0 +1,193 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    unified_strdate,
    xpath_text,
    determine_ext,
    float_or_none,
    ExtractorError,
 )
 class DreiSatIE(InfoExtractor):
    IE_NAME = '3sat'
    _GEO_COUNTRIES = ['DE']
    _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
    _TESTS = [
        {
            'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
            'md5': 'be37228896d30a88f315b638900a026e',
            'info_dict': {
                'id': '45918',
                'ext': 'mp4',
                'title': 'Waidmannsheil',
                'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
                'uploader': 'SCHWEIZWEIT',
                'uploader_id': '100000210',
                'upload_date': '20140913'
            },
            'params': {
                'skip_download': True,  # m3u8 downloads
            }
        },
        {
            'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
            'only_matching': True,
        },
    ]
    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
        param_groups = {}
        for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
            group_id = param_group.get(self._xpath_ns(
                'id', 'http://www.w3.org/XML/1998/namespace'))
            params = {}
            for param in param_group:
                params[param.get('name')] = param.get('value')
            param_groups[group_id] = params
        formats = []
        for video in smil.findall(self._xpath_ns('.//video', namespace)):
            src = video.get('src')
            if not src:
                continue
            bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
            group_id = video.get('paramGroup')
            param_group = param_groups[group_id]
            for proto in param_group['protocols'].split(','):
                formats.append({
                    'url': '%s://%s' % (proto, param_group['host']),
                    'app': param_group['app'],
                    'play_path': src,
                    'ext': 'flv',
                    'format_id': '%s-%d' % (proto, bitrate),
                    'tbr': bitrate,
                })
        self._sort_formats(formats)
        return formats
    def extract_from_xml_url(self, video_id, xml_url):
        doc = self._download_xml(
            xml_url, video_id,
            note='Downloading video info',
            errnote='Failed to download video info')
        status_code = xpath_text(doc, './status/statuscode')
        if status_code and status_code != 'ok':
            if status_code == 'notVisibleAnymore':
                message = 'Video %s is not available' % video_id
            else:
                message = '%s returned error: %s' % (self.IE_NAME, status_code)
            raise ExtractorError(message, expected=True)
        title = xpath_text(doc, './/information/title', 'title', True)
        urls = []
        formats = []
        for fnode in doc.findall('.//formitaeten/formitaet'):
            video_url = xpath_text(fnode, 'url')
            if not video_url or video_url in urls:
                continue
            urls.append(video_url)
            is_available = 'http://www.metafilegenerator' not in video_url
            geoloced = 'static_geoloced_online' in video_url
            if not is_available or geoloced:
                continue
            format_id = fnode.attrib['basetype']
            format_m = re.match(r'''(?x)
                (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
                (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
            ''', format_id)
            ext = determine_ext(video_url, None) or format_m.group('container')
            if ext == 'meta':
                continue
            elif ext == 'smil':
                formats.extend(self._extract_smil_formats(
                    video_url, video_id, fatal=False))
            elif ext == 'm3u8':
                # the certificates are misconfigured (see
                # https://github.com/ytdl-org/youtube-dl/issues/8665)
                if video_url.startswith('https://'):
                    continue
                formats.extend(self._extract_m3u8_formats(
                    video_url, video_id, 'mp4', 'm3u8_native',
                    m3u8_id=format_id, fatal=False))
            elif ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    video_url, video_id, f4m_id=format_id, fatal=False))
            else:
                quality = xpath_text(fnode, './quality')
                if quality:
                    format_id += '-' + quality
                abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
                vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
                tbr = int_or_none(self._search_regex(
                    r'_(\d+)k', video_url, 'bitrate', None))
                if tbr and vbr and not abr:
                    abr = tbr - vbr
                formats.append({
                    'format_id': format_id,
                    'url': video_url,
                    'ext': ext,
                    'acodec': format_m.group('acodec'),
                    'vcodec': format_m.group('vcodec'),
                    'abr': abr,
                    'vbr': vbr,
                    'tbr': tbr,
                    'width': int_or_none(xpath_text(fnode, './width')),
                    'height': int_or_none(xpath_text(fnode, './height')),
                    'filesize': int_or_none(xpath_text(fnode, './filesize')),
                    'protocol': format_m.group('proto').lower(),
                })
        geolocation = xpath_text(doc, './/details/geolocation')
        if not formats and geolocation and geolocation != 'none':
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
        self._sort_formats(formats)
        thumbnails = []
        for node in doc.findall('.//teaserimages/teaserimage'):
            thumbnail_url = node.text
            if not thumbnail_url:
                continue
            thumbnail = {
                'url': thumbnail_url,
            }
            thumbnail_key = node.get('key')
            if thumbnail_key:
                m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
                if m:
                    thumbnail['width'] = int(m.group(1))
                    thumbnail['height'] = int(m.group(2))
            thumbnails.append(thumbnail)
        upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
        return {
            'id': video_id,
            'title': title,
            'description': xpath_text(doc, './/information/detail'),
            'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
            'thumbnails': thumbnails,
            'uploader': xpath_text(doc, './/details/originChannelTitle'),
            'uploader_id': xpath_text(doc, './/details/originChannelId'),
            'upload_date': upload_date,
            'formats': formats,
        }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
        return self.extract_from_xml_url(video_id, details_url)
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@ -182,6 +182,7 @@
    CanvasIE,
    CanvasEenIE,
    VrtNUIE,
    DagelijkseKostIE,
 )
 from .carambatv import (
    CarambaTVIE,
@ -309,7 +310,12 @@
    DouyuShowIE,
    DouyuTVIE,
 )
-from .dplay import DPlayIE
+from .dplay import (
    DPlayIE,
    DiscoveryPlusIE,
    HGTVDeIE,
 )
 from .dreisat import DreiSatIE
 from .drbonanza import DRBonanzaIE
 from .drtuber import DrTuberIE
 from .drtv import (
@ -1107,6 +1113,11 @@
    VivoIE,
 )
 from .showroomlive import ShowRoomLiveIE
 from .simplecast import (
    SimplecastIE,
    SimplecastEpisodeIE,
    SimplecastPodcastIE,
 )
 from .sina import SinaIE
 from .sixplay import SixPlayIE
 from .skyit import (
@ -1165,11 +1176,6 @@
    BellatorIE,
    ParamountNetworkIE,
 )
 from .storyfire import (
    StoryFireIE,
    StoryFireUserIE,
    StoryFireSeriesIE,
 )
 from .stitcher import StitcherIE
 from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
@ -1193,6 +1199,11 @@
 from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .storyfire import (
    StoryFireIE,
    StoryFireUserIE,
    StoryFireSeriesIE,
 )
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
@ -1652,6 +1663,7 @@
    ZattooLiveIE,
 )
 from .zdf import ZDFIE, ZDFChannelIE
 from .zhihu import ZhihuIE
 from .zingmp3 import ZingMp3IE
 from .zoom import ZoomIE
 from .zype import ZypeIE
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@ -133,6 +133,7 @@
 from .rumble import RumbleEmbedIE
 from .arcpublishing import ArcPublishingIE
 from .medialaan import MedialaanIE
 from .simplecast import SimplecastIE
 class GenericIE(InfoExtractor):
@ -2240,6 +2241,15 @@ class GenericIE(InfoExtractor):
                'duration': 159,
            },
        },
        {
            # Simplecast player embed
            'url': 'https://www.bio.org/podcast',
            'info_dict': {
                'id': 'podcast',
                'title': 'I AM BIO Podcast | BIO',
            },
            'playlist_mincount': 52,
        },
    ]
    def report_following_redirect(self, new_url):
@ -2794,6 +2804,12 @@ def _real_extract(self, url):
            return self.playlist_from_matches(
                matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
        # Look for Simplecast embeds
        simplecast_urls = SimplecastIE._extract_urls(webpage)
        if simplecast_urls:
            return self.playlist_from_matches(
                simplecast_urls, video_id, video_title)
        # Look for BBC iPlayer embed
        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
        if matches:
--- a/youtube_dlc/extractor/ninegag.py
+++ b/youtube_dlc/extractor/ninegag.py
@ -2,10 +2,11 @@
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    ExtractorError,
    determine_ext,
    int_or_none,
    try_get,
    unescapeHTML,
    url_or_none,
 )
@ -14,7 +15,7 @@ class NineGagIE(InfoExtractor):
    IE_NAME = '9gag'
    _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://9gag.com/gag/ae5Ag7B',
        'info_dict': {
            'id': 'ae5Ag7B',
@ -29,7 +30,11 @@ class NineGagIE(InfoExtractor):
            'dislike_count': int,
            'comment_count': int,
        }
-    }
+    }, {
        # HTML escaped title
        'url': 'https://9gag.com/gag/av5nvyb',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        post_id = self._match_id(url)
@ -43,7 +48,7 @@ def _real_extract(self, url):
                'The given url does not contain a video',
                expected=True)
-        title = post['title']
+        title = unescapeHTML(post['title'])
        duration = None
        formats = []
--- a/youtube_dlc/extractor/simplecast.py
+++ b/youtube_dlc/extractor/simplecast.py
@ -0,0 +1,160 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_podcast_url,
    int_or_none,
    parse_iso8601,
    strip_or_none,
    try_get,
    urlencode_postdata,
 )
 class SimplecastBaseIE(InfoExtractor):
    _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
    _API_BASE = 'https://api.simplecast.com/'
    def _call_api(self, path_tmpl, video_id):
        return self._download_json(
            self._API_BASE + path_tmpl % video_id, video_id)
    def _call_search_api(self, resource, resource_id, resource_url):
        return self._download_json(
            'https://api.simplecast.com/%ss/search' % resource, resource_id,
            data=urlencode_postdata({'url': resource_url}))
    def _parse_episode(self, episode):
        episode_id = episode['id']
        title = episode['title'].strip()
        audio_file = episode.get('audio_file') or {}
        audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
        season = episode.get('season') or {}
        season_href = season.get('href')
        season_id = None
        if season_href:
            season_id = self._search_regex(
                r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
                season_href, 'season id', default=None)
        webpage_url = episode.get('episode_url')
        channel_url = None
        if webpage_url:
            channel_url = self._search_regex(
                r'(https?://[^/]+\.simplecast\.com)',
                webpage_url, 'channel url', default=None)
        return {
            'id': episode_id,
            'display_id': episode.get('slug'),
            'title': title,
            'url': clean_podcast_url(audio_file_url),
            'webpage_url': webpage_url,
            'channel_url': channel_url,
            'series': try_get(episode, lambda x: x['podcast']['title']),
            'season_number': int_or_none(season.get('number')),
            'season_id': season_id,
            'thumbnail': episode.get('image_url'),
            'episode_id': episode_id,
            'episode_number': int_or_none(episode.get('number')),
            'description': strip_or_none(episode.get('description')),
            'timestamp': parse_iso8601(episode.get('published_at')),
            'duration': int_or_none(episode.get('duration')),
            'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
        }
 class SimplecastIE(SimplecastBaseIE):
    IE_NAME = 'simplecast'
    _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
    _COMMON_TEST_INFO = {
        'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
        'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
        'ext': 'mp3',
        'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
        'episode_number': 1,
        'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
        'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
        'season_number': 1,
        'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
        'series': 'The RE:BIND.io Podcast',
        'duration': 5343,
        'timestamp': 1580979475,
        'upload_date': '20200206',
        'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
        'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
    }
    _TESTS = [{
        'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
        'md5': '8c93be7be54251bf29ee97464eabe61c',
        'info_dict': _COMMON_TEST_INFO,
    }, {
        'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'''(?x)<iframe[^>]+src=["\']
                (
                    https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
                    player\.simplecast\.com/%s
                ))''' % SimplecastBaseIE._UUID_REGEX, webpage)
    def _real_extract(self, url):
        episode_id = self._match_id(url)
        episode = self._call_api('episodes/%s', episode_id)
        return self._parse_episode(episode)
 class SimplecastEpisodeIE(SimplecastBaseIE):
    IE_NAME = 'simplecast:episode'
    _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
    _TEST = {
        'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
        'md5': '8c93be7be54251bf29ee97464eabe61c',
        'info_dict': SimplecastIE._COMMON_TEST_INFO,
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        episode = self._call_search_api(
            'episode', mobj.group(1), mobj.group(0))
        return self._parse_episode(episode)
 class SimplecastPodcastIE(SimplecastBaseIE):
    IE_NAME = 'simplecast:podcast'
    _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
    _TESTS = [{
        'url': 'https://the-re-bind-io-podcast.simplecast.com',
        'playlist_mincount': 33,
        'info_dict': {
            'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
            'title': 'The RE:BIND.io Podcast',
        },
    }, {
        'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        subdomain = self._match_id(url)
        site = self._call_search_api('site', subdomain, url)
        podcast = site['podcast']
        podcast_id = podcast['id']
        podcast_title = podcast.get('title')
        def entries():
            episodes = self._call_api('podcasts/%s/episodes', podcast_id)
            for episode in (episodes.get('collection') or []):
                info = self._parse_episode(episode)
                info['series'] = podcast_title
                yield info
        return self.playlist_result(entries(), podcast_id, podcast_title)
--- a/youtube_dlc/extractor/storyfire.py
+++ b/youtube_dlc/extractor/storyfire.py
@ -1,255 +1,151 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import itertools
+import functools
 from .common import InfoExtractor
 from ..utils import (
    # HEADRequest,
    int_or_none,
    OnDemandPagedList,
    smuggle_url,
 )
-class StoryFireIE(InfoExtractor):
+class StoryFireBaseIE(InfoExtractor):
-    _VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)'
+    _VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
-    _TESTS = [{
+
    def _call_api(self, path, video_id, resource, query=None):
        return self._download_json(
            'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
            'Downloading %s JSON metadata' % resource, query=query)
    def _parse_video(self, video):
        title = video['title']
        vimeo_id = self._search_regex(
            r'https?://player\.vimeo\.com/external/(\d+)',
            video['vimeoVideoURL'], 'vimeo id')
        # video_url = self._request_webpage(
        #    HEADRequest(video['vimeoVideoURL']), video_id).geturl()
        # formats = []
        # for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
        #    formats.extend(self._extract_m3u8_formats(
        #        v_url, video_id, 'mp4', 'm3u8_native',
        #        m3u8_id='hls' + suffix, fatal=False))
        #    formats.extend(self._extract_mpd_formats(
        #        v_url.replace('.m3u8', '.mpd'), video_id,
        #        mpd_id='dash' + suffix, fatal=False))
        # self._sort_formats(formats)
        uploader_id = video.get('hostID')
        return {
            '_type': 'url_transparent',
            'id': vimeo_id,
            'title': title,
            'description': video.get('description'),
            'url': smuggle_url(
                'https://player.vimeo.com/video/' + vimeo_id, {
                    'http_headers': {
                        'Referer': 'https://storyfire.com/',
                    }
                }),
            # 'formats': formats,
            'thumbnail': video.get('storyImage'),
            'view_count': int_or_none(video.get('views')),
            'like_count': int_or_none(video.get('likesCount')),
            'comment_count': int_or_none(video.get('commentsCount')),
            'duration': int_or_none(video.get('videoDuration')),
            'timestamp': int_or_none(video.get('publishDate')),
            'uploader': video.get('username'),
            'uploader_id': uploader_id,
            'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
            'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
        }
 class StoryFireIE(StoryFireBaseIE):
    _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
    _TEST = {
        'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
-        'md5': '560953bfca81a69003cfa5e53ac8a920',
+        'md5': 'caec54b9e4621186d6079c7ec100c1eb',
        'info_dict': {
-            'id': '5df1d132b6378700117f9181',
+            'id': '378954662',
            'ext': 'mp4',
            'title': 'Buzzfeed Teaches You About Memes',
            'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
            'timestamp': 1576129028,
-            'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies',
+            'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
            'uploader': 'whang!',
            'upload_date': '20191212',
            'duration': 418,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        },
-        'params': {'format': 'bestvideo'}  # There are no merged formats in the playlist.
+        'params': {
-    }, {
+            'skip_download': True,
        'url': 'https://storyfire.app.link/5GxAvWOQr8',  # Alternate URL format, with unrelated short ID
        'md5': '7a2dc6d60c4889edfed459c620fe690d',
        'info_dict': {
            'id': '5f1e11ecd78a57b6c702001d',
            'ext': 'm4a',
            'title': 'Weird Nintendo Prototype Leaks',
            'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis',
            'timestamp': 1595808576,
            'upload_date': '20200727',
            'uploader': 'whang!',
            'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
        },
-        'params': {'format': 'bestaudio'}  # Verifying audio extraction
+        'expected_warnings': ['Unable to download JSON metadata']
    }]
    _aformats = {
        'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10},
        'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1},
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        video = self._call_api(
-
+            'generic/video-detail', video_id, 'video')['video']
-        # Extracting the json blob is mandatory to proceed with extraction.
+        return self._parse_video(video)
        jsontext = self._html_search_regex(
            r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>',
            webpage, 'json_data')
        json = self._parse_json(jsontext, video_id)
        # The currentVideo field in the json is mandatory
        # because it contains the only link to the m3u playlist
        video = json['props']['initialState']['video']['currentVideo']
        videourl = video['vimeoVideoURL']  # Video URL is mandatory
        # Extract other fields from the json in an error tolerant fashion
        # ID may be incorrect (on short URL format), correct it.
        parsed_id = video.get('_id')
        if parsed_id:
            video_id = parsed_id
        title = video.get('title')
        description = video.get('description')
        thumbnail = video.get('storyImage')
        views = video.get('views')
        likes = video.get('likesCount')
        comments = video.get('commentsCount')
        duration = video.get('videoDuration')
        publishdate = video.get('publishDate')  # Apparently epoch time, day only
        uploader = video.get('username')
        uploader_id = video.get('hostID')
        # Construct an uploader URL
        uploader_url = None
        if uploader_id:
            uploader_url = "https://storyfire.com/user/%s/video" % uploader_id
        # Collect root playlist to determine formats
        formats = self._extract_m3u8_formats(
            videourl, video_id, 'mp4', 'm3u8_native')
        # Modify formats to fill in missing information about audio codecs
        for format in formats:
            aformat = self._aformats.get(format['format_id'])
            if aformat:
                format['acodec'] = aformat['acodec']
                format['abr'] = aformat['abr']
                format['quality'] = aformat['preference']
                format['ext'] = 'm4a'
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'ext': "mp4",
            'url': videourl,
            'formats': formats,
            'thumbnail': thumbnail,
            'view_count': views,
            'like_count': likes,
            'comment_count': comments,
            'duration': duration,
            'timestamp': publishdate,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'uploader_url': uploader_url,
        }
-class StoryFireUserIE(InfoExtractor):
+class StoryFireUserIE(StoryFireBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video'
+    _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
-    _TESTS = [{
+    _TEST = {
        'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video',
        'info_dict': {
            'id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
            'title': 'whang!',
        },
        'playlist_mincount': 18
    }, {
        'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
        'info_dict': {
            'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
            'title': 'McJuggerNuggets',
        },
-        'playlist_mincount': 143
+        'playlist_mincount': 151,
    }
    _PAGE_SIZE = 20
-    }]
+    def _fetch_page(self, user_id, page):
-
+        videos = self._call_api(
-    # Generator for fetching playlist items
+            'publicVideos', user_id, 'page %d' % (page + 1), {
-    def _enum_videos(self, baseurl, user_id, firstjson):
+                'skip': page * self._PAGE_SIZE,
-        totalVideos = int(firstjson['videosCount'])
+            })['videos']
-        haveVideos = 0
+        for video in videos:
-        json = firstjson
+            yield self._parse_video(video)
        for page in itertools.count(1):
            for video in json['videos']:
                id = video['_id']
                url = "https://storyfire.com/video-details/%s" % id
                haveVideos += 1
                yield {
                    '_type': 'url',
                    'id': id,
                    'url': url,
                    'ie_key': 'StoryFire',
                    'title': video.get('title'),
                    'description': video.get('description'),
                    'view_count': video.get('views'),
                    'comment_count': video.get('commentsCount'),
                    'duration': video.get('videoDuration'),
                    'timestamp': video.get('publishDate'),
                }
            # Are there more pages we could fetch?
            if haveVideos < totalVideos:
                pageurl = baseurl + ("%i" % haveVideos)
                json = self._download_json(pageurl, user_id,
                                           note='Downloading page %s' % page)
                # Are there any videos in the new json?
                videos = json.get('videos')
                if not videos or len(videos) == 0:
                    break  # no videos
            else:
                break  # We have fetched all the videos, stop
    def _real_extract(self, url):
        user_id = self._match_id(url)
-
+        entries = OnDemandPagedList(functools.partial(
-        baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id
+            self._fetch_page, user_id), self._PAGE_SIZE)
-
+        return self.playlist_result(entries, user_id)
        # Download first page to ensure it can be downloaded, and get user information if available.
        firstpage = baseurl + "0"
        firstjson = self._download_json(firstpage, user_id)
        title = None
        videos = firstjson.get('videos')
        if videos and len(videos):
            title = videos[1].get('username')
        return {
            '_type': 'playlist',
            'entries': self._enum_videos(baseurl, user_id, firstjson),
            'id': user_id,
            'title': title,
        }
-class StoryFireSeriesIE(InfoExtractor):
+class StoryFireSeriesIE(StoryFireBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)'
+    _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
    _TESTS = [{
        'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
        'info_dict': {
            'id': '-Lq6MsuIHLODO6d2dDkr',
        },
-        'playlist_mincount': 13
+        'playlist_mincount': 13,
    }, {
        'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
        'info_dict': {
            'id': 'the_mortal_one',
        },
-        'playlist_count': 0  # This playlist has entries, but no videos.
+        'playlist_count': 0,
    }, {
        'url': 'https://storyfire.com/write/series/stories/story_time',
        'info_dict': {
            'id': 'story_time',
        },
        'playlist_mincount': 10
    }]
-    # Generator for returning playlist items
+    def _extract_videos(self, stories):
-    # This object is substantially different than the one in the user videos page above
+        for story in stories.values():
-    def _enum_videos(self, jsonlist):
+            if story.get('hasVideo'):
-        for video in jsonlist:
+                yield self._parse_video(story)
            id = video['_id']
            if video.get('hasVideo'):  # Boolean element
                url = "https://storyfire.com/video-details/%s" % id
                yield {
                    '_type': 'url',
                    'id': id,
                    'url': url,
                    'ie_key': 'StoryFire',
                    'title': video.get('title'),
                    'description': video.get('description'),
                    'view_count': video.get('views'),
                    'likes_count': video.get('likesCount'),
                    'comment_count': video.get('commentsCount'),
                    'duration': video.get('videoDuration'),
                    'timestamp': video.get('publishDate'),
                }
    def _real_extract(self, url):
-        list_id = self._match_id(url)
+        series_id = self._match_id(url)
-
+        stories = self._call_api(
-        listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id
+            'seriesStories', series_id, 'series stories')
-        json = self._download_json(listurl, list_id)
+        return self.playlist_result(self._extract_videos(stories), series_id)
        return {
            '_type': 'playlist',
            'entries': self._enum_videos(json),
            'id': list_id
        }
--- a/youtube_dlc/extractor/videopress.py
+++ b/youtube_dlc/extractor/videopress.py
@ -4,21 +4,22 @@
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    determine_ext,
    float_or_none,
    int_or_none,
    parse_age_limit,
    qualities,
    random_birthday,
    try_get,
    unified_timestamp,
    urljoin,
 )
 class VideoPressIE(InfoExtractor):
-    _VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
+    _ID_REGEX = r'[\da-zA-Z]{8}'
    _PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
    _VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
    _TESTS = [{
        'url': 'https://videopress.com/embed/kUJmAcSf',
        'md5': '706956a6c875873d51010921310e4bc6',
@ -36,35 +37,36 @@ class VideoPressIE(InfoExtractor):
        # 17+, requires birth_* params
        'url': 'https://videopress.com/embed/iH3gstfZ',
        'only_matching': True,
    }, {
        'url': 'https://video.wordpress.com/embed/kUJmAcSf',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
-            r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
+            r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
            webpage)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        query = random_birthday('birth_year', 'birth_month', 'birth_day')
        query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width'
        video = self._download_json(
            'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
            video_id, query=query)
        title = video['title']
-        def base_url(scheme):
+        file_url_base = video.get('file_url_base') or {}
-            return try_get(
+        base_url = file_url_base.get('https') or file_url_base.get('http')
                video, lambda x: x['file_url_base'][scheme], compat_str)
        base_url = base_url('https') or base_url('http')
        QUALITIES = ('std', 'dvd', 'hd')
        quality = qualities(QUALITIES)
        formats = []
-        for format_id, f in video['files'].items():
+        for format_id, f in (video.get('files') or {}).items():
            if not isinstance(f, dict):
                continue
            for ext, path in f.items():
@ -75,12 +77,14 @@ def base_url(scheme):
                        'ext': determine_ext(path, ext),
                        'quality': quality(format_id),
                    })
-        original_url = try_get(video, lambda x: x['original'], compat_str)
+        original_url = video.get('original')
        if original_url:
            formats.append({
                'url': original_url,
                'format_id': 'original',
                'quality': len(QUALITIES),
                'width': int_or_none(video.get('width')),
                'height': int_or_none(video.get('height')),
            })
        self._sort_formats(formats)
--- a/youtube_dlc/extractor/viki.py
+++ b/youtube_dlc/extractor/viki.py
@ -22,6 +22,7 @@
    parse_iso8601,
    sanitized_Request,
    std_headers,
    try_get,
 )
@ -42,7 +43,7 @@ class VikiBaseIE(InfoExtractor):
    _ERRORS = {
        'geo': 'Sorry, this content is not available in your region.',
        'upcoming': 'Sorry, this content is not yet available.',
-        # 'paywall': 'paywall',
+        'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
    }
    def _prepare_call(self, path, timestamp=None, post_data=None):
@ -94,11 +95,13 @@ def _raise_error(self, error):
            expected=True)
    def _check_errors(self, data):
-        for reason, status in data.get('blocking', {}).items():
+        for reason, status in (data.get('blocking') or {}).items():
            if status and reason in self._ERRORS:
                message = self._ERRORS[reason]
                if reason == 'geo':
                    self.raise_geo_restricted(msg=message)
                elif reason == 'paywall':
                    self.raise_login_required(message)
                raise ExtractorError('%s said: %s' % (
                    self.IE_NAME, message), expected=True)
@ -143,13 +146,19 @@ class VikiIE(VikiBaseIE):
        'info_dict': {
            'id': '1023585v',
            'ext': 'mp4',
-            'title': 'Heirs Episode 14',
+            'title': 'Heirs - Episode 14',
-            'uploader': 'SBS',
+            'uploader': 'SBS Contents Hub',
-            'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            'timestamp': 1385047627,
            'upload_date': '20131121',
            'age_limit': 13,
            'duration': 3570,
            'episode_number': 14,
        },
        'params': {
            'format': 'bestvideo',
        },
        'skip': 'Blocked in the US',
        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
    }, {
        # clip
        'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
@ -165,7 +174,8 @@ class VikiIE(VikiBaseIE):
            'uploader': 'Arirang TV',
            'like_count': int,
            'age_limit': 0,
-        }
+        },
        'skip': 'Sorry. There was an error loading this video',
    }, {
        'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
        'info_dict': {
@ -183,7 +193,7 @@ class VikiIE(VikiBaseIE):
    }, {
        # episode
        'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '94e0e34fd58f169f40c184f232356cfe',
+        'md5': '0a53dc252e6e690feccd756861495a8c',
        'info_dict': {
            'id': '44699v',
            'ext': 'mp4',
@ -195,6 +205,10 @@ class VikiIE(VikiBaseIE):
            'uploader': 'group8',
            'like_count': int,
            'age_limit': 13,
            'episode_number': 1,
        },
        'params': {
            'format': 'bestvideo',
        },
        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
    }, {
@ -221,7 +235,7 @@ class VikiIE(VikiBaseIE):
    }, {
        # non-English description
        'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': 'adf9e321a0ae5d0aace349efaaff7691',
+        'md5': '41faaba0de90483fb4848952af7c7d0d',
        'info_dict': {
            'id': '158036v',
            'ext': 'mp4',
@ -232,6 +246,10 @@ class VikiIE(VikiBaseIE):
            'title': 'Love In Magic',
            'age_limit': 13,
        },
        'params': {
            'format': 'bestvideo',
        },
        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
    }]
    def _real_extract(self, url):
@ -249,22 +267,19 @@ def _real_extract(self, url):
        self._check_errors(video)
        title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
        episode_number = int_or_none(video.get('number'))
        if not title:
-            title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
+            title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
-            container_titles = video.get('container', {}).get('titles', {})
+            container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
            container_title = self.dict_selection(container_titles, 'en')
            title = '%s - %s' % (container_title, title)
        description = self.dict_selection(video.get('descriptions', {}), 'en')
-        duration = int_or_none(video.get('duration'))
+        like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
        timestamp = parse_iso8601(video.get('created_at'))
        uploader = video.get('author')
        like_count = int_or_none(video.get('likes', {}).get('count'))
        age_limit = parse_age_limit(video.get('rating'))
        thumbnails = []
-        for thumbnail_id, thumbnail in video.get('images', {}).items():
+        for thumbnail_id, thumbnail in (video.get('images') or {}).items():
            thumbnails.append({
                'id': thumbnail_id,
                'url': thumbnail.get('url'),
@ -289,7 +304,7 @@ def _real_extract(self, url):
                }]
        except AttributeError:
            # fall-back to the old way if there isn't a streamSubtitles attribute
-            for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+            for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
                subtitles[subtitle_lang] = [{
                    'ext': subtitles_format,
                    'url': self._prepare_call(
@ -300,13 +315,15 @@ def _real_extract(self, url):
            'id': video_id,
            'title': title,
            'description': description,
-            'duration': duration,
+            'duration': int_or_none(video.get('duration')),
-            'timestamp': timestamp,
+            'timestamp': parse_iso8601(video.get('created_at')),
-            'uploader': uploader,
+            'uploader': video.get('author'),
            'uploader_url': video.get('author_url'),
            'like_count': like_count,
-            'age_limit': age_limit,
+            'age_limit': parse_age_limit(video.get('rating')),
            'thumbnails': thumbnails,
            'subtitles': subtitles,
            'episode_number': episode_number,
        }
        formats = []
@ -400,7 +417,7 @@ class VikiChannelIE(VikiBaseIE):
        'info_dict': {
            'id': '50c',
            'title': 'Boys Over Flowers',
-            'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
+            'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
        },
        'playlist_mincount': 71,
    }, {
@ -411,6 +428,7 @@ class VikiChannelIE(VikiBaseIE):
            'description': 'md5:05bf5471385aa8b21c18ad450e350525',
        },
        'playlist_count': 127,
        'skip': 'Page not found',
    }, {
        'url': 'http://www.viki.com/news/24569c-showbiz-korea',
        'only_matching': True,
--- a/youtube_dlc/extractor/vimeo.py
+++ b/youtube_dlc/extractor/vimeo.py
@ -221,10 +221,12 @@ def _parse_config(self, config, video_id):
            'is_live': is_live,
        }
-    def _extract_original_format(self, url, video_id):
+    def _extract_original_format(self, url, video_id, unlisted_hash=None):
        query = {'action': 'load_download_config'}
        if unlisted_hash:
            query['unlisted_hash'] = unlisted_hash
        download_data = self._download_json(
-            url, video_id, fatal=False,
+            url, video_id, fatal=False, query=query,
            query={'action': 'load_download_config'},
            headers={'X-Requested-With': 'XMLHttpRequest'})
        if download_data:
            source_file = download_data.get('source_file')
@ -504,6 +506,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
        {
            'url': 'https://vimeo.com/160743502/abd0e13fb4',
            'only_matching': True,
        },
        {
            # requires passing unlisted_hash(a52724358e) to load_download_config request
            'url': 'https://vimeo.com/392479337/a52724358e',
            'only_matching': True,
        }
        # https://gettingthingsdone.com/workflowmap/
        # vimeo embed with check-password page protected by Referer header
@ -668,7 +675,8 @@ def _real_extract(self, url):
            if config.get('view') == 4:
                config = self._verify_player_video_password(redirect_url, video_id, headers)
-        vod = config.get('video', {}).get('vod', {})
+        video = config.get('video') or {}
        vod = video.get('vod') or {}
        def is_rented():
            if '>You rented this title.<' in webpage:
@ -728,7 +736,7 @@ def is_rented():
        formats = []
        source_format = self._extract_original_format(
-            'https://vimeo.com/' + video_id, video_id)
+            'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
        if source_format:
            formats.append(source_format)
--- a/youtube_dlc/extractor/xboxclips.py
+++ b/youtube_dlc/extractor/xboxclips.py
@ -1,40 +1,55 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    int_or_none,
    month_by_abbreviation,
    parse_filesize,
    unified_strdate,
 )
 class XboxClipsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
+    _VALID_URL = r'https?://(?:www\.)?(?:xboxclips\.com|gameclips\.io)/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
        'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
        'info_dict': {
            'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
            'ext': 'mp4',
-            'title': 'Iabdulelah playing Titanfall',
+            'title': 'iAbdulElah playing Titanfall',
            'filesize_approx': 26800000,
            'upload_date': '20140807',
            'duration': 56,
        }
-    }
+    }, {
        'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        if '/video.php' in url:
            qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
            url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
-        video_url = self._html_search_regex(
+        webpage = self._download_webpage(url, video_id)
-            r'>(?:Link|Download): <a[^>]+href="([^"]+)"', webpage, 'video URL')
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
-        title = self._html_search_regex(
+
-            r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
+        title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
-        upload_date = unified_strdate(self._html_search_regex(
+        upload_date = None
-            r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
+        mobj = re.search(
            r'>Recorded: (\d{2})-(Jan|Feb|Mar|Apr|May|Ju[nl]|Aug|Sep|Oct|Nov|Dec)-(\d{4})',
            webpage)
        if mobj:
            upload_date = '%s%.2d%s' % (mobj.group(3), month_by_abbreviation(mobj.group(2)), mobj.group(1))
        filesize = parse_filesize(self._html_search_regex(
            r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
        duration = int_or_none(self._html_search_regex(
@ -42,12 +57,12 @@ def _real_extract(self, url):
        view_count = int_or_none(self._html_search_regex(
            r'>Views: (\d+)<', webpage, 'view count', fatal=False))
-        return {
+        info.update({
            'id': video_id,
            'url': video_url,
            'title': title,
            'upload_date': upload_date,
            'filesize_approx': filesize,
            'duration': duration,
            'view_count': view_count,
-        }
+        })
        return info
--- a/youtube_dlc/extractor/yandexmusic.py
+++ b/youtube_dlc/extractor/yandexmusic.py
@ -1,8 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 import hashlib
 import itertools
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
@ -209,17 +210,27 @@ def _extract_tracks(self, source, item_id, url, tld):
            missing_track_ids = [
                track_id for track_id in track_ids
                if track_id not in present_track_ids]
-            missing_tracks = self._call_api(
+            # Request missing tracks in chunks to avoid exceeding max HTTP header size,
-                'track-entries', tld, url, item_id,
+            # see https://github.com/ytdl-org/youtube-dl/issues/27355
-                'Downloading missing tracks JSON', {
+            _TRACKS_PER_CHUNK = 250
-                    'entries': ','.join(missing_track_ids),
+            for chunk_num in itertools.count(0):
-                    'lang': tld,
+                start = chunk_num * _TRACKS_PER_CHUNK
-                    'external-domain': 'music.yandex.%s' % tld,
+                end = start + _TRACKS_PER_CHUNK
-                    'overembed': 'false',
+                missing_track_ids_req = missing_track_ids[start:end]
-                    'strict': 'true',
+                assert missing_track_ids_req
-                })
+                missing_tracks = self._call_api(
-            if missing_tracks:
+                    'track-entries', tld, url, item_id,
-                tracks.extend(missing_tracks)
+                    'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
                        'entries': ','.join(missing_track_ids_req),
                        'lang': tld,
                        'external-domain': 'music.yandex.%s' % tld,
                        'overembed': 'false',
                        'strict': 'true',
                    })
                if missing_tracks:
                    tracks.extend(missing_tracks)
                if end >= len(missing_track_ids):
                    break
        return tracks
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@ -324,7 +324,9 @@ def _extract_video(self, renderer):
            r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
            'view count', default=None))
        uploader = try_get(
-            renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+            renderer,
            (lambda x: x['ownerText']['runs'][0]['text'],
             lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
        return {
            '_type': 'url_transparent',
            'ie_key': YoutubeIE.ie_key(),
@ -340,64 +342,70 @@ def _extract_video(self, renderer):
 class YoutubeIE(YoutubeBaseInfoExtractor):
    IE_DESC = 'YouTube.com'
    _INVIDIOUS_SITES = (
        # invidious-redirect websites
        r'(?:www\.)?redirect\.invidious\.io',
        r'(?:(?:www|dev)\.)?invidio\.us',
        # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
        r'(?:www\.)?invidious\.pussthecat\.org',
        r'(?:www\.)?invidious\.048596\.xyz',
        r'(?:www\.)?invidious\.zee\.li',
        r'(?:www\.)?vid\.puffyan\.us',
        r'(?:(?:www|au)\.)?ytprivate\.com',
        r'(?:www\.)?invidious\.namazso\.eu',
        r'(?:www\.)?invidious\.ethibox\.fr',
        r'(?:www\.)?inv\.skyn3t\.in',
        r'(?:www\.)?invidious\.himiko\.cloud',
        r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
        r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
        r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
        r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
        # youtube-dl invidious instances list
        r'(?:(?:www|no)\.)?invidiou\.sh',
        r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
        r'(?:www\.)?invidious\.kabi\.tk',
        r'(?:www\.)?invidious\.13ad\.de',
        r'(?:www\.)?invidious\.mastodon\.host',
        r'(?:www\.)?invidious\.zapashcanon\.fr',
        r'(?:www\.)?invidious\.kavin\.rocks',
        r'(?:www\.)?invidious\.tube',
        r'(?:www\.)?invidiou\.site',
        r'(?:www\.)?invidious\.site',
        r'(?:www\.)?invidious\.xyz',
        r'(?:www\.)?invidious\.nixnet\.xyz',
        r'(?:www\.)?invidious\.drycat\.fr',
        r'(?:www\.)?tube\.poal\.co',
        r'(?:www\.)?tube\.connect\.cafe',
        r'(?:www\.)?vid\.wxzm\.sx',
        r'(?:www\.)?vid\.mint\.lgbt',
        r'(?:www\.)?yewtu\.be',
        r'(?:www\.)?yt\.elukerio\.org',
        r'(?:www\.)?yt\.lelux\.fi',
        r'(?:www\.)?invidious\.ggc-project\.de',
        r'(?:www\.)?yt\.maisputain\.ovh',
        r'(?:www\.)?invidious\.toot\.koeln',
        r'(?:www\.)?invidious\.fdn\.fr',
        r'(?:www\.)?watch\.nettohikari\.com',
        r'(?:www\.)?kgg2m7yk5aybusll\.onion',
        r'(?:www\.)?qklhadlycap4cnod\.onion',
        r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
        r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
        r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
        r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
        r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
        r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
    )
    _VALID_URL = r"""(?x)^
                     (
                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
-                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
+                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
-                            (?:www\.)?deturl\.com/www\.youtube\.com/|
+                            (?:www\.)?deturl\.com/www\.youtube\.com|
-                            (?:www\.)?pwnyoutube\.com/|
+                            (?:www\.)?pwnyoutube\.com|
-                            (?:www\.)?hooktube\.com/|
+                            (?:www\.)?hooktube\.com|
-                            (?:www\.)?yourepeat\.com/|
+                            (?:www\.)?yourepeat\.com|
-                            tube\.majestyc\.net/|
+                            tube\.majestyc\.net|
-                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
+                            %(invidious)s|
-                            (?:www\.)?invidious\.pussthecat\.org/|
+                            youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
                            (?:www\.)?invidious\.048596\.xyz/|
                            (?:www\.)?invidious\.zee\.li/|
                            (?:www\.)?vid\.puffyan\.us/|
                            (?:(?:www|au)\.)?ytprivate\.com/|
                            (?:www\.)?invidious\.namazso\.eu/|
                            (?:www\.)?invidious\.ethibox\.fr/|
                            (?:www\.)?inv\.skyn3t\.in/|
                            (?:www\.)?invidious\.himiko\.cloud/|
                            (?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion/|
                            (?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion/|
                            (?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion/|
                            (?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion/|
                            (?:(?:www|dev)\.)?invidio\.us/|
                            (?:(?:www|no)\.)?invidiou\.sh/|
                            (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
                            (?:www\.)?invidious\.kabi\.tk/|
                            (?:www\.)?invidious\.13ad\.de/|
                            (?:www\.)?invidious\.mastodon\.host/|
                            (?:www\.)?invidious\.zapashcanon\.fr/|
                            (?:www\.)?invidious\.kavin\.rocks/|
                            (?:www\.)?invidious\.tube/|
                            (?:www\.)?invidiou\.site/|
                            (?:www\.)?invidious\.site/|
                            (?:www\.)?invidious\.xyz/|
                            (?:www\.)?invidious\.nixnet\.xyz/|
                            (?:www\.)?invidious\.drycat\.fr/|
                            (?:www\.)?tube\.poal\.co/|
                            (?:www\.)?tube\.connect\.cafe/|
                            (?:www\.)?vid\.wxzm\.sx/|
                            (?:www\.)?vid\.mint\.lgbt/|
                            (?:www\.)?yewtu\.be/|
                            (?:www\.)?yt\.elukerio\.org/|
                            (?:www\.)?yt\.lelux\.fi/|
                            (?:www\.)?invidious\.ggc-project\.de/|
                            (?:www\.)?yt\.maisputain\.ovh/|
                            (?:www\.)?invidious\.toot\.koeln/|
                            (?:www\.)?invidious\.fdn\.fr/|
                            (?:www\.)?watch\.nettohikari\.com/|
                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
                            (?:www\.)?qklhadlycap4cnod\.onion/|
                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
                            (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
@ -412,6 +420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            youtu\.be|                                        # just youtu.be/xxxx
                            vid\.plus|                                        # or vid.plus/xxxx
                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
                            %(invidious)s
                         )/
                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                         )
@ -424,7 +433,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        )
                     )
                     (?(1).+)?                                                # if we found the ID, everything can follow
-                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+                     $""" % {
        'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
        'invidious': '|'.join(_INVIDIOUS_SITES),
    }
    _PLAYER_INFO_RE = (
        r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
@ -1031,6 +1043,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'url': 'https://invidio.us/watch?v=BaW_jenozKc',
            'only_matching': True,
        },
        {
            'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
            'only_matching': True,
        },
        {
            # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
            'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
            'only_matching': True,
        },
        {
            # DRM protected
            'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
@ -1169,6 +1190,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'skip_download': True,
            },
        },
        {
            # controversial video, only works with bpctr when authenticated with cookies
            'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
            'only_matching': True,
        },
    ]
    def __init__(self, *args, **kwargs):
@ -1426,7 +1452,7 @@ def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
        video_id = self._match_id(url)
        base_url = self.http_scheme() + '//www.youtube.com/'
-        webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1'
+        webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1&bpctr=9999999999'
        webpage = self._download_webpage(webpage_url, video_id, fatal=False)
        player_response = None
--- a/youtube_dlc/extractor/zhihu.py
+++ b/youtube_dlc/extractor/zhihu.py
@ -0,0 +1,69 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import float_or_none, int_or_none
 class ZhihuIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
        'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
        'info_dict': {
            'id': '1342930761977176064',
            'ext': 'mp4',
            'title': '写春联也太难了吧！',
            'thumbnail': r're:^https?://.*\.jpg',
            'uploader': '桥半舫',
            'timestamp': 1612959715,
            'upload_date': '20210210',
            'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
            'duration': 146.333,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        zvideo = self._download_json(
            'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
        title = zvideo['title']
        video = zvideo.get('video') or {}
        formats = []
        for format_id, q in (video.get('playlist') or {}).items():
            play_url = q.get('url') or q.get('play_url')
            if not play_url:
                continue
            formats.append({
                'asr': int_or_none(q.get('sample_rate')),
                'filesize': int_or_none(q.get('size')),
                'format_id': format_id,
                'fps': int_or_none(q.get('fps')),
                'height': int_or_none(q.get('height')),
                'tbr': float_or_none(q.get('bitrate')),
                'url': play_url,
                'width': int_or_none(q.get('width')),
            })
        self._sort_formats(formats)
        author = zvideo.get('author') or {}
        url_token = author.get('url_token')
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
            'uploader': author.get('name'),
            'timestamp': int_or_none(zvideo.get('published_at')),
            'uploader_id': author.get('id'),
            'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None,
            'duration': float_or_none(video.get('duration')),
            'view_count': int_or_none(zvideo.get('play_count')),
            'like_count': int_or_none(zvideo.get('liked_count')),
            'comment_count': int_or_none(zvideo.get('comment_count')),
        }
--- a/youtube_dlc/postprocessor/embedthumbnail.py
+++ b/youtube_dlc/postprocessor/embedthumbnail.py
@ -127,10 +127,13 @@ def is_webp(path):
            except PostProcessingError as err:
                self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
-                if not check_executable('AtomicParsley', ['-v']):
+                atomicparsley = next((
                    x for x in ['AtomicParsley', 'atomicparsley']
                    if check_executable(x, ['-v'])), None)
                if atomicparsley is None:
                    raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
-                cmd = [encodeFilename('AtomicParsley', True),
+                cmd = [encodeFilename(atomicparsley, True),
                       encodeFilename(filename, True),
                       encodeArgument('--artwork'),
                       encodeFilename(thumbnail_filename, True),