Update to ytdl-2021.02.04.1 except youtube

2024-11-22 02:15:12 +00:00 · 2021-02-04 13:26:01 +05:30 · 2021-02-04 13:26:01 +05:30 · 2181983a0c
commit 2181983a0c
parent e29663c644
24 changed files with 663 additions and 434 deletions
--- a/youtube_dlc/extractor/abcnews.py
+++ b/youtube_dlc/extractor/abcnews.py
@ -1,14 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import calendar
 import re
-import time

 from .amp import AMPIE
 from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import compat_urlparse
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+    try_get,
+)


 class AbcNewsVideoIE(AMPIE):
@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
                        (?:
                            abcnews\.go\.com/
                            (?:
-                                [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
-                                video/embed\?.*?\bid=
+                                (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
+                                video/(?:embed|itemfeed)\?.*?\bid=
                            )|
                            fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
                        )
@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
            'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
            'duration': 180,
            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1380454200,
+            'upload_date': '20130929',
        },
        'params': {
            # m3u8 download
@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
    }, {
        'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
        'only_matching': True,
+    }, {
+        'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+        'only_matching': True,
+    }, {
+        'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
    _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'

    _TESTS = [{
-        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+        # Youtube Embeds
+        'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
        'info_dict': {
-            'id': '10505354',
-            'ext': 'flv',
-            'display_id': 'dramatic-video-rare-death-job-america',
-            'title': 'Occupational Hazards',
-            'description': 'Nightline investigates the dangers that lurk at various jobs.',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'upload_date': '20100428',
-            'timestamp': 1272412800,
+            'id': '51286501',
+            'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+            'description': 'Billingsley went from a child actor to Hollywood power player.',
        },
-        'add_ie': ['AbcNewsVideo'],
+        'playlist_count': 5,
    }, {
        'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
        'info_dict': {
            'id': '38897857',
            'ext': 'mp4',
-            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
            'title': 'Justin Timberlake Drops Hints For Secret Single',
            'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
-            'upload_date': '20160515',
-            'timestamp': 1463329500,
+            'upload_date': '20160505',
+            'timestamp': 1462442280,
        },
        'params': {
            # m3u8 download
@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
    }, {
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
        'only_matching': True,
+    }, {
+        # inline.type == 'video'
+        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        video_id = mobj.group('id')
+        story_id = self._match_id(url)
+        webpage = self._download_webpage(url, story_id)
+        story = self._parse_json(self._search_regex(
+            r"window\['__abcnews__'\]\s*=\s*({.+?});",
+            webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+        article_contents = story.get('articleContents') or {}

-        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(
-            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
-        full_video_url = compat_urlparse.urljoin(url, video_url)
+        def entries():
+            featured_video = story.get('featuredVideo') or {}
+            feed = try_get(featured_video, lambda x: x['video']['feed'])
+            if feed:
+                yield {
+                    '_type': 'url',
+                    'id': featured_video.get('id'),
+                    'title': featured_video.get('name'),
+                    'url': feed,
+                    'thumbnail': featured_video.get('images'),
+                    'description': featured_video.get('description'),
+                    'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+                    'duration': parse_duration(featured_video.get('duration')),
+                    'ie_key': AbcNewsVideoIE.ie_key(),
+                }

-        youtube_url = YoutubeIE._extract_url(webpage)
+            for inline in (article_contents.get('inlines') or []):
+                inline_type = inline.get('type')
+                if inline_type == 'iframe':
+                    iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+                    if iframe_url:
+                        yield self.url_result(iframe_url)
+                elif inline_type == 'video':
+                    video_id = inline.get('id')
+                    if video_id:
+                        yield {
+                            '_type': 'url',
+                            'id': video_id,
+                            'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+                            'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+                            'description': inline.get('description'),
+                            'duration': parse_duration(inline.get('duration')),
+                            'ie_key': AbcNewsVideoIE.ie_key(),
+                        }

-        timestamp = None
-        date_str = self._html_search_regex(
-            r'<span[^>]+class="timestamp">([^<]+)</span>',
-            webpage, 'timestamp', fatal=False)
-        if date_str:
-            tz_offset = 0
-            if date_str.endswith(' ET'):  # Eastern Time
-                tz_offset = -5
-                date_str = date_str[:-3]
-            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
-            for date_format in date_formats:
-                try:
-                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
-                except ValueError:
-                    continue
-            if timestamp is not None:
-                timestamp -= tz_offset * 3600
-
-        entry = {
-            '_type': 'url_transparent',
-            'ie_key': AbcNewsVideoIE.ie_key(),
-            'url': full_video_url,
-            'id': video_id,
-            'display_id': display_id,
-            'timestamp': timestamp,
-        }
-
-        if youtube_url:
-            entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
-            return self.playlist_result(entries)
-
-        return entry
+        return self.playlist_result(
+            entries(), story_id, article_contents.get('headline'),
+            article_contents.get('subHead'))
--- a/youtube_dlc/extractor/adn.py
+++ b/youtube_dlc/extractor/adn.py
@ -26,6 +26,7 @@
    strip_or_none,
    try_get,
    unified_strdate,
+    urlencode_postdata,
 )


@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
        }
    }

+    _NETRC_MACHINE = 'animedigitalnetwork'
    _BASE_URL = 'http://animedigitalnetwork.fr'
    _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
    _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+    _HEADERS = {}
+    _LOGIN_ERR_MESSAGE = 'Unable to log in'
    _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
    _POS_ALIGN_MAP = {
        'start': 1,
@ -129,19 +133,42 @@ def _get_subtitles(self, sub_url, video_id):
            }])
        return subtitles

+    def _real_initialize(self):
+        username, password = self._get_login_info()
+        if not username:
+            return
+        try:
+            access_token = (self._download_json(
+                self._API_BASE_URL + 'authentication/login', None,
+                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+                data=urlencode_postdata({
+                    'password': password,
+                    'rememberMe': False,
+                    'source': 'Web',
+                    'username': username,
+                })) or {}).get('accessToken')
+            if access_token:
+                self._HEADERS = {'authorization': 'Bearer ' + access_token}
+        except ExtractorError as e:
+            message = None
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                resp = self._parse_json(
+                    e.cause.read().decode(), None, fatal=False) or {}
+                message = resp.get('message') or resp.get('code')
+            self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
        player = self._download_json(
            video_base_url + 'configuration', video_id,
-            'Downloading player config JSON metadata')['player']
+            'Downloading player config JSON metadata',
+            headers=self._HEADERS)['player']
        options = player['options']

        user = options['user']
        if not user.get('hasAccess'):
-            raise ExtractorError(
-                'This video is only available for paying users', expected=True)
-            # self.raise_login_required() # FIXME: Login is not implemented
+            self.raise_login_required()

        token = self._download_json(
            user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@ -188,8 +215,7 @@ def _real_extract(self, url):
                message = error.get('message')
                if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                    self.raise_geo_restricted(msg=message)
-                else:
-                    raise ExtractorError(message)
+                raise ExtractorError(message)
        else:
            raise ExtractorError('Giving up retrying')

--- a/youtube_dlc/extractor/aenetworks.py
+++ b/youtube_dlc/extractor/aenetworks.py
@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
    _TESTS = [{
        'url': 'http://www.history.com/shows/ancient-aliens',
        'info_dict': {
-            'id': 'SH012427480000',
+            'id': 'SERIES1574',
            'title': 'Ancient Aliens',
            'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
        },
--- a/youtube_dlc/extractor/amp.py
+++ b/youtube_dlc/extractor/amp.py
@ -8,6 +8,7 @@
    int_or_none,
    mimetype2ext,
    parse_iso8601,
+    unified_timestamp,
    url_or_none,
 )

@ -88,7 +89,7 @@ def get_media_node(name, default=None):

        self._sort_formats(formats)

-        timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))

        return {
            'id': video_id,
--- a/youtube_dlc/extractor/awaan.py
+++ b/youtube_dlc/extractor/awaan.py
@ -48,6 +48,7 @@ def _parse_video_data(self, video_data, video_id, is_live):
            'duration': int_or_none(video_data.get('duration')),
            'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
            'is_live': is_live,
+            'uploader_id': video_data.get('user_id'),
        }


@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
            'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'upload_date': '20150107',
            'timestamp': 1420588800,
+            'uploader_id': '71',
        },
        'params': {
            # m3u8 download
--- a/youtube_dlc/extractor/azmedien.py
+++ b/youtube_dlc/extractor/azmedien.py
@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
        'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
        'only_matching': True
    }]
-    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
    _PARTNER_ID = '1719221'

    def _real_extract(self, url):
--- a/youtube_dlc/extractor/bleacherreport.py
+++ b/youtube_dlc/extractor/bleacherreport.py
@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
    _TESTS = [{
        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
-        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
+        'md5': '670b2d73f48549da032861130488c681',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
            'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+            'upload_date': '20150723',
+            'timestamp': 1437679032,
+
        },
+        'expected_warnings': [
+            'Unable to download f4m manifest'
+        ]
    }]

    def _real_extract(self, url):
--- a/youtube_dlc/extractor/bravotv.py
+++ b/youtube_dlc/extractor/bravotv.py
@ -12,7 +12,7 @@


 class BravoTVIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
        'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
    }, {
        'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
        'only_matching': True,
+    }, {
+        'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, display_id)
        settings = self._parse_json(self._search_regex(
            r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@ -53,11 +56,14 @@ def _real_extract(self, url):
                tp_path = release_pid = tve['release_pid']
            if tve.get('entitlement') == 'auth':
                adobe_pass = settings.get('tve_adobe_auth', {})
+                if site == 'bravotv':
+                    site = 'bravo'
                resource = self._get_mvpd_resource(
-                    adobe_pass.get('adobePassResourceId', 'bravo'),
+                    adobe_pass.get('adobePassResourceId') or site,
                    tve['title'], release_pid, tve.get('rating'))
                query['auth'] = self._extract_mvpd_auth(
-                    url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+                    url, release_pid,
+                    adobe_pass.get('adobePassRequestorId') or site, resource)
        else:
            shared_playlist = settings['ls_playlist']
            account_pid = shared_playlist['account_pid']
--- a/youtube_dlc/extractor/ccma.py
+++ b/youtube_dlc/extractor/ccma.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import datetime
 import re

 from .common import InfoExtractor
@ -8,8 +9,8 @@
    clean_html,
    int_or_none,
    parse_duration,
-    parse_iso8601,
    parse_resolution,
+    try_get,
    url_or_none,
 )

@ -24,8 +25,9 @@ class CCMAIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'L\'espot de La Marató de TV3',
            'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
-            'timestamp': 1470918540,
-            'upload_date': '20160811',
+            'timestamp': 1478608140,
+            'upload_date': '20161108',
+            'age_limit': 0,
        }
    }, {
        'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
@ -35,8 +37,24 @@ class CCMAIE(InfoExtractor):
            'ext': 'mp3',
            'title': 'El Consell de Savis analitza el derbi',
            'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
-            'upload_date': '20171205',
-            'timestamp': 1512507300,
+            'upload_date': '20170512',
+            'timestamp': 1494622500,
+            'vcodec': 'none',
+            'categories': ['Esports'],
+        }
+    }, {
+        'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
+        'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
+        'info_dict': {
+            'id': '6031387',
+            'ext': 'mp4',
+            'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
+            'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
+            'timestamp': 1582577700,
+            'upload_date': '20200224',
+            'subtitles': 'mincount:4',
+            'age_limit': 16,
+            'series': 'Crims',
        }
    }]

@ -72,17 +90,27 @@ def _real_extract(self, url):

        informacio = media['informacio']
        title = informacio['titol']
-        durada = informacio.get('durada', {})
+        durada = informacio.get('durada') or {}
        duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
-        timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
+        tematica = try_get(informacio, lambda x: x['tematica']['text'])
+
+        timestamp = None
+        data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
+        try:
+            timestamp = datetime.datetime.strptime(
+                data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
+        except TypeError:
+            pass

        subtitles = {}
-        subtitols = media.get('subtitols', {})
-        if subtitols:
-            sub_url = subtitols.get('url')
+        subtitols = media.get('subtitols') or []
+        if isinstance(subtitols, dict):
+            subtitols = [subtitols]
+        for st in subtitols:
+            sub_url = st.get('url')
            if sub_url:
                subtitles.setdefault(
-                    subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
+                    st.get('iso') or st.get('text') or 'ca', []).append({
                        'url': sub_url,
                    })

@ -97,6 +125,16 @@ def _real_extract(self, url):
                    'height': int_or_none(imatges.get('alcada')),
                }]

+        age_limit = None
+        codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
+        if codi_etic:
+            codi_etic_s = codi_etic.split('_')
+            if len(codi_etic_s) == 2:
+                if codi_etic_s[1] == 'TP':
+                    age_limit = 0
+                else:
+                    age_limit = int_or_none(codi_etic_s[1])
+
        return {
            'id': media_id,
            'title': title,
@ -106,4 +144,9 @@ def _real_extract(self, url):
            'thumbnails': thumbnails,
            'subtitles': subtitles,
            'formats': formats,
+            'age_limit': age_limit,
+            'alt_title': informacio.get('titol_complet'),
+            'episode_number': int_or_none(informacio.get('capitol')),
+            'categories': [tematica] if tematica else None,
+            'series': informacio.get('programa'),
        }
--- a/youtube_dlc/extractor/cda.py
+++ b/youtube_dlc/extractor/cda.py
@ -96,7 +96,7 @@ def _real_extract(self, url):
            raise ExtractorError('This video is only available for premium users.', expected=True)

        need_confirm_age = False
-        if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
+        if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
                                   webpage, 'birthday validate form', default=None):
            webpage = self._download_age_confirm_page(
                url, video_id, note='Confirming age')
--- a/youtube_dlc/extractor/egghead.py
+++ b/youtube_dlc/extractor/egghead.py
@ -12,7 +12,14 @@
 )


-class EggheadCourseIE(InfoExtractor):
+class EggheadBaseIE(InfoExtractor):
+    def _call_api(self, path, video_id, resource, fatal=True):
+        return self._download_json(
+            'https://app.egghead.io/api/v1/' + path,
+            video_id, 'Downloading %s JSON' % resource, fatal=fatal)
+
+
+class EggheadCourseIE(EggheadBaseIE):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
-
-        lessons = self._download_json(
-            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
-            playlist_id, 'Downloading course lessons JSON')
+        series_path = 'series/' + playlist_id
+        lessons = self._call_api(
+            series_path + '/lessons', playlist_id, 'course lessons')

        entries = []
        for lesson in lessons:
@ -44,9 +50,8 @@ def _real_extract(self, url):
            entries.append(self.url_result(
                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))

-        course = self._download_json(
-            'https://egghead.io/api/v1/series/%s' % playlist_id,
-            playlist_id, 'Downloading course JSON', fatal=False) or {}
+        course = self._call_api(
+            series_path, playlist_id, 'course', False) or {}

        playlist_id = course.get('id')
        if playlist_id:
@ -57,7 +62,7 @@ def _real_extract(self, url):
            course.get('description'))


-class EggheadLessonIE(InfoExtractor):
+class EggheadLessonIE(EggheadBaseIE):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
            'upload_date': '20161209',
            'duration': 304,
            'view_count': 0,
-            'tags': ['javascript', 'free'],
+            'tags': 'count:2',
        },
        'params': {
            'skip_download': True,
@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)

-        lesson = self._download_json(
-            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+        lesson = self._call_api(
+            'lessons/' + display_id, display_id, 'lesson')

        lesson_id = compat_str(lesson['id'])
        title = lesson['title']
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@ -1308,6 +1308,7 @@
    TV2IE,
    TV2ArticleIE,
    KatsomoIE,
+    MTVUutisetArticleIE,
 )
 from .tv2dk import (
    TV2DKIE,
@ -1448,7 +1449,6 @@
    VidmeUserIE,
    VidmeUserLikesIE,
 )
-from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
 from .viewlift import (
    ViewLiftIE,
@ -1508,6 +1508,7 @@
    VRVSeriesIE,
 )
 from .vshare import VShareIE
+from .vtm import VTMIE
 from .medialaan import MedialaanIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@ -131,6 +131,7 @@
 from .rcs import RCSEmbedsIE
 from .bitchute import BitChuteIE
 from .arcpublishing import ArcPublishingIE
+from .medialaan import MedialaanIE


 class GenericIE(InfoExtractor):
@ -2224,6 +2225,20 @@ class GenericIE(InfoExtractor):
                'duration': 1581,
            },
        },
+        {
+            # MyChannels SDK embed
+            # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
+            'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
+            'md5': '90c0699c37006ef18e198c032d81739c',
+            'info_dict': {
+                'id': '194165',
+                'ext': 'mp4',
+                'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
+                'timestamp': 1611740340,
+                'upload_date': '20210127',
+                'duration': 159,
+            },
+        },
    ]

    def report_following_redirect(self, new_url):
@ -2463,6 +2478,9 @@ def _real_extract(self, url):
        webpage = self._webpage_read_content(
            full_response, url, video_id, prefix=first_bytes)

+        if '<title>DPG Media Privacy Gate</title>' in webpage:
+            webpage = self._download_webpage(url, video_id)
+
        self.report_extraction(video_id)

        # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@ -2594,6 +2612,11 @@ def _real_extract(self, url):
        if arc_urls:
            return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())

+        mychannels_urls = MedialaanIE._extract_urls(webpage)
+        if mychannels_urls:
+            return self.playlist_from_matches(
+                mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
+
        # Look for embedded rtl.nl player
        matches = re.findall(
            r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
--- a/youtube_dlc/extractor/googledrive.py
+++ b/youtube_dlc/extractor/googledrive.py
@ -7,6 +7,7 @@
 from ..utils import (
    determine_ext,
    ExtractorError,
+    get_element_by_class,
    int_or_none,
    lowercase_escape,
    try_get,
@ -237,7 +238,7 @@ def add_source_format(urlh):
                if confirmation_webpage:
                    confirm = self._search_regex(
                        r'confirm=([^&"\']+)', confirmation_webpage,
-                        'confirmation code', fatal=False)
+                        'confirmation code', default=None)
                    if confirm:
                        confirmed_source_url = update_url_query(source_url, {
                            'confirm': confirm,
@ -245,6 +246,11 @@ def add_source_format(urlh):
                        urlh = request_source_file(confirmed_source_url, 'confirmed source')
                        if urlh and urlh.headers.get('Content-Disposition'):
                            add_source_format(urlh)
+                    else:
+                        self.report_warning(
+                            get_element_by_class('uc-error-subcaption', confirmation_webpage)
+                            or get_element_by_class('uc-error-caption', confirmation_webpage)
+                            or 'unable to extract confirmation code')

        if not formats and reason:
            raise ExtractorError(reason, expected=True)
--- a/youtube_dlc/extractor/medialaan.py
+++ b/youtube_dlc/extractor/medialaan.py
@ -2,268 +2,113 @@

 import re

-from .gigya import GigyaBaseIE
-
-from ..compat import compat_str
+from .common import InfoExtractor
 from ..utils import (
+    extract_attributes,
    int_or_none,
-    parse_duration,
-    try_get,
-    unified_timestamp,
+    mimetype2ext,
+    parse_iso8601,
 )


-class MedialaanIE(GigyaBaseIE):
+class MedialaanIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
-                        (?:www\.|nieuws\.)?
                        (?:
-                            (?P<site_id>vtm|q2|vtmkzoom)\.be/
-                            (?:
-                                video(?:/[^/]+/id/|/?\?.*?\baid=)|
-                                (?:[^/]+/)*
-                            )
+                            (?:embed\.)?mychannels.video/embed/|
+                            embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
+                            (?:www\.)?(?:
+                                (?:
+                                    7sur7|
+                                    demorgen|
+                                    hln|
+                                    joe|
+                                    qmusic
+                                )\.be|
+                                (?:
+                                    [abe]d|
+                                    bndestem|
+                                    destentor|
+                                    gelderlander|
+                                    pzc|
+                                    tubantia|
+                                    volkskrant
+                                )\.nl
+                            )/video/(?:[^/]+/)*[^/?&#]+~p
                        )
-                        (?P<id>[^/?#&]+)
+                        (?P<id>\d+)
                    '''
-    _NETRC_MACHINE = 'medialaan'
-    _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
-    _SITE_TO_APP_ID = {
-        'vtm': 'vtm_watch',
-        'q2': 'q2',
-        'vtmkzoom': 'vtmkzoom',
-    }
    _TESTS = [{
-        # vod
-        'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
+        'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
        'info_dict': {
-            'id': 'vtm_20170219_VM0678361_vtmwatch',
+            'id': '193993',
            'ext': 'mp4',
-            'title': 'Allemaal Chris afl. 6',
-            'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
-            'timestamp': 1487533280,
-            'upload_date': '20170219',
-            'duration': 2562,
-            'series': 'Allemaal Chris',
-            'season': 'Allemaal Chris',
-            'season_number': 1,
-            'season_id': '256936078124527',
-            'episode': 'Allemaal Chris afl. 6',
-            'episode_number': 6,
-            'episode_id': '256936078591527',
+            'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
+            'timestamp': 1611663540,
+            'upload_date': '20210126',
+            'duration': 238,
        },
        'params': {
            'skip_download': True,
        },
-        'skip': 'Requires account credentials',
    }, {
-        # clip
-        'url': 'http://vtm.be/video?aid=168332',
-        'info_dict': {
-            'id': '168332',
-            'ext': 'mp4',
-            'title': '"Veronique liegt!"',
-            'description': 'md5:1385e2b743923afe54ba4adc38476155',
-            'timestamp': 1489002029,
-            'upload_date': '20170308',
-            'duration': 96,
-        },
-    }, {
-        # vod
-        'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
+        'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
        'only_matching': True,
    }, {
-        # vod
-        'url': 'http://vtm.be/video?aid=163157',
+        'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
        'only_matching': True,
    }, {
-        # vod
-        'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
+        'url': 'https://embed.mychannels.video/script/production/193993',
        'only_matching': True,
    }, {
-        # clip
-        'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
+        'url': 'https://embed.mychannels.video/production/193993',
        'only_matching': True,
    }, {
-        # http/s redirect
-        'url': 'https://vtmkzoom.be/video?aid=45724',
-        'info_dict': {
-            'id': '257136373657000',
-            'ext': 'mp4',
-            'title': 'K3 Dansstudio Ushuaia afl.6',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'Requires account credentials',
+        'url': 'https://mychannels.video/embed/193993',
+        'only_matching': True,
    }, {
-        # nieuws.vtm.be
-        'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
+        'url': 'https://embed.mychannels.video/embed/193993',
        'only_matching': True,
    }]

-    def _real_initialize(self):
-        self._logged_in = False
-
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            self.raise_login_required()
-
-        auth_data = {
-            'APIKey': self._APIKEY,
-            'sdk': 'js_6.1',
-            'format': 'json',
-            'loginID': username,
-            'password': password,
-        }
-
-        auth_info = self._gigya_login(auth_data)
-
-        self._uid = auth_info['UID']
-        self._uid_signature = auth_info['UIDSignature']
-        self._signature_timestamp = auth_info['signatureTimestamp']
-
-        self._logged_in = True
+    @staticmethod
+    def _extract_urls(webpage):
+        entries = []
+        for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
+            mychannels_id = extract_attributes(element).get('data-mychannels-id')
+            if mychannels_id:
+                entries.append('https://mychannels.video/embed/' + mychannels_id)
+        return entries

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id, site_id = mobj.group('id', 'site_id')
+        production_id = self._match_id(url)
+        production = self._download_json(
+            'https://embed.mychannels.video/sdk/production/' + production_id,
+            production_id, query={'options': 'UUUU_default'})['productions'][0]
+        title = production['title']

-        webpage = self._download_webpage(url, video_id)
-
-        config = self._parse_json(
-            self._search_regex(
-                r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
-                webpage, 'config', default='{}'), video_id,
-            transform_source=lambda s: s.replace(
-                '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
-
-        vod_id = config.get('vodId') or self._search_regex(
-            (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
-             r'"vodId"\s*:\s*"(.+?)"',
-             r'<[^>]+id=["\']vod-(\d+)'),
-            webpage, 'video_id', default=None)
-
-        # clip, no authentication required
-        if not vod_id:
-            player = self._parse_json(
-                self._search_regex(
-                    r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
-                    default=''),
-                video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
-            if player:
-                video = player[-1]
-                if video['videoUrl'] in ('http', 'https'):
-                    return self.url_result(video['url'], MedialaanIE.ie_key())
-                info = {
-                    'id': video_id,
-                    'url': video['videoUrl'],
-                    'title': video['title'],
-                    'thumbnail': video.get('imageUrl'),
-                    'timestamp': int_or_none(video.get('createdDate')),
-                    'duration': int_or_none(video.get('duration')),
-                }
+        formats = []
+        for source in (production.get('sources') or []):
+            src = source.get('src')
+            if not src:
+                continue
+            ext = mimetype2ext(source.get('type'))
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, production_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
            else:
-                info = self._parse_html5_media_entries(
-                    url, webpage, video_id, m3u8_id='hls')[0]
-                info.update({
-                    'id': video_id,
-                    'title': self._html_search_meta('description', webpage),
-                    'duration': parse_duration(self._html_search_meta('duration', webpage)),
+                formats.append({
+                    'ext': ext,
+                    'url': src,
                })
-        # vod, authentication required
-        else:
-            if not self._logged_in:
-                self._login()
+        self._sort_formats(formats)

-            settings = self._parse_json(
-                self._search_regex(
-                    r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
-                    webpage, 'drupal settings', default='{}'),
-                video_id)
-
-            def get(container, item):
-                return try_get(
-                    settings, lambda x: x[container][item],
-                    compat_str) or self._search_regex(
-                    r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
-                    default=None)
-
-            app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
-            sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
-
-            data = self._download_json(
-                'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
-                video_id, query={
-                    'app_id': app_id,
-                    'user_network': sso,
-                    'UID': self._uid,
-                    'UIDSignature': self._uid_signature,
-                    'signatureTimestamp': self._signature_timestamp,
-                })
-
-            formats = self._extract_m3u8_formats(
-                data['response']['uri'], video_id, entry_protocol='m3u8_native',
-                ext='mp4', m3u8_id='hls')
-
-            self._sort_formats(formats)
-
-            info = {
-                'id': vod_id,
-                'formats': formats,
-            }
-
-            api_key = get('vod', 'apiKey')
-            channel = get('medialaanGigya', 'channel')
-
-            if api_key:
-                videos = self._download_json(
-                    'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
-                    query={
-                        'channels': channel,
-                        'ids': vod_id,
-                        'limit': 1,
-                        'apikey': api_key,
-                    })
-                if videos:
-                    video = try_get(
-                        videos, lambda x: x['response']['videos'][0], dict)
-                    if video:
-                        def get(container, item, expected_type=None):
-                            return try_get(
-                                video, lambda x: x[container][item], expected_type)
-
-                        def get_string(container, item):
-                            return get(container, item, compat_str)
-
-                        info.update({
-                            'series': get_string('program', 'title'),
-                            'season': get_string('season', 'title'),
-                            'season_number': int_or_none(get('season', 'number')),
-                            'season_id': get_string('season', 'id'),
-                            'episode': get_string('episode', 'title'),
-                            'episode_number': int_or_none(get('episode', 'number')),
-                            'episode_id': get_string('episode', 'id'),
-                            'duration': int_or_none(
-                                video.get('duration')) or int_or_none(
-                                video.get('durationMillis'), scale=1000),
-                            'title': get_string('episode', 'title'),
-                            'description': get_string('episode', 'text'),
-                            'timestamp': unified_timestamp(get_string(
-                                'publication', 'begin')),
-                        })
-
-            if not info.get('title'):
-                info['title'] = try_get(
-                    config, lambda x: x['videoConfig']['title'],
-                    compat_str) or self._html_search_regex(
-                    r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
-                    default=None) or self._og_search_title(webpage)
-
-        if not info.get('description'):
-            info['description'] = self._html_search_regex(
-                r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
-                webpage, 'description', default=None)
-
-        return info
+        return {
+            'id': production_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': production.get('posterUrl'),
+            'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
+            'duration': int_or_none(production.get('duration')) or None,
+        }
--- a/youtube_dlc/extractor/pornhub.py
+++ b/youtube_dlc/extractor/pornhub.py
@ -22,11 +22,15 @@
    orderedSet,
    remove_quotes,
    str_to_int,
+    update_url_query,
+    urlencode_postdata,
    url_or_none,
 )


 class PornHubBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'pornhub'
+
    def _download_webpage_handle(self, *args, **kwargs):
        def dl(*args, **kwargs):
            return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@ -52,6 +56,66 @@ def dl(*args, **kwargs):

        return webpage, urlh

+    def _real_initialize(self):
+        self._logged_in = False
+
+    def _login(self, host):
+        if self._logged_in:
+            return
+
+        site = host.split('.')[0]
+
+        # Both sites pornhub and pornhubpremium have separate accounts
+        # so there should be an option to provide credentials for both.
+        # At the same time some videos are available under the same video id
+        # on both sites so that we have to identify them as the same video.
+        # For that purpose we have to keep both in the same extractor
+        # but under different netrc machines.
+        username, password = self._get_login_info(netrc_machine=site)
+        if username is None:
+            return
+
+        login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
+        login_page = self._download_webpage(
+            login_url, None, 'Downloading %s login page' % site)
+
+        def is_logged(webpage):
+            return any(re.search(p, webpage) for p in (
+                r'class=["\']signOut',
+                r'>Sign\s+[Oo]ut\s*<'))
+
+        if is_logged(login_page):
+            self._logged_in = True
+            return
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        response = self._download_json(
+            'https://www.%s/front/authenticate' % host, None,
+            'Logging in to %s' % site,
+            data=urlencode_postdata(login_form),
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                'Referer': login_url,
+                'X-Requested-With': 'XMLHttpRequest',
+            })
+
+        if response.get('success') == '1':
+            self._logged_in = True
+            return
+
+        message = response.get('message')
+        if message is not None:
+            raise ExtractorError(
+                'Unable to login: %s' % message, expected=True)
+
+        raise ExtractorError('Unable to log in')
+

 class PornHubIE(PornHubBaseIE):
    IE_DESC = 'PornHub and Thumbzilla'
@ -163,12 +227,20 @@ class PornHubIE(PornHubBaseIE):
    }, {
        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
        'only_matching': True,
+    }, {
+        # Some videos are available with the same id on both premium
+        # and non-premium sites (e.g. this and the following test)
+        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
-            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
            webpage)

    def _extract_count(self, pattern, webpage, name):
@ -180,12 +252,7 @@ def _real_extract(self, url):
        host = mobj.group('host') or 'pornhub.com'
        video_id = mobj.group('id')

-        if 'premium' in host:
-            if not self._downloader.params.get('cookiefile'):
-                raise ExtractorError(
-                    'PornHub Premium requires authentication.'
-                    ' You may want to use --cookies.',
-                    expected=True)
+        self._login(host)

        self._set_cookie(host, 'age_verified', '1')

@ -405,6 +472,10 @@ def extract_list(meta_key):


 class PornHubPlaylistBaseIE(PornHubBaseIE):
+    def _extract_page(self, url):
+        return int_or_none(self._search_regex(
+            r'\bpage=(\d+)', url, 'page', default=None))
+
    def _extract_entries(self, webpage, host):
        # Only process container div with main playlist content skipping
        # drop-down menu that uses similar pattern for videos (see
@ -422,26 +493,6 @@ def _extract_entries(self, webpage, host):
                container))
        ]

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        playlist_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        entries = self._extract_entries(webpage, host)
-
-        playlist = self._parse_json(
-            self._search_regex(
-                r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
-                'playlist', default='{}'),
-            playlist_id, fatal=False)
-        title = playlist.get('title') or self._search_regex(
-            r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
-
-        return self.playlist_result(
-            entries, playlist_id, title, playlist.get('description'))
-

 class PornHubUserIE(PornHubPlaylistBaseIE):
    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
@ -463,14 +514,27 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
    }, {
        'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
        'only_matching': True,
+    }, {
+        # Unavailable via /videos page, but available with direct pagination
+        # on pornstar page (see [1]), requires premium
+        # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+        'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
+        'only_matching': True,
+    }, {
+        # Same as before, multi page
+        'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user_id = mobj.group('id')
+        videos_url = '%s/videos' % mobj.group('url')
+        page = self._extract_page(url)
+        if page:
+            videos_url = update_url_query(videos_url, {'page': page})
        return self.url_result(
-            '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
-            video_id=user_id)
+            videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)


 class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@ -483,32 +547,55 @@ def _has_more(webpage):
                <button[^>]+\bid=["\']moreDataBtn
            ''', webpage) is not None

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        item_id = mobj.group('id')
+    def _entries(self, url, host, item_id):
+        page = self._extract_page(url)

-        page = int_or_none(self._search_regex(
-            r'\bpage=(\d+)', url, 'page', default=None))
+        VIDEOS = '/videos'

-        entries = []
-        for page_num in (page, ) if page is not None else itertools.count(1):
+        def download_page(base_url, num, fallback=False):
+            note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
+            return self._download_webpage(
+                base_url, item_id, note, query={'page': num})
+
+        def is_404(e):
+            return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+
+        base_url = url
+        has_page = page is not None
+        first_page = page if has_page else 1
+        for page_num in (first_page, ) if has_page else itertools.count(first_page):
            try:
-                webpage = self._download_webpage(
-                    url, item_id, 'Downloading page %d' % page_num,
-                    query={'page': page_num})
+                try:
+                    webpage = download_page(base_url, page_num)
+                except ExtractorError as e:
+                    # Some sources may not be available via /videos page,
+                    # trying to fallback to main page pagination (see [1])
+                    # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+                    if is_404(e) and page_num == first_page and VIDEOS in base_url:
+                        base_url = base_url.replace(VIDEOS, '')
+                        webpage = download_page(base_url, page_num, fallback=True)
+                    else:
+                        raise
            except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                if is_404(e) and page_num != first_page:
                    break
                raise
            page_entries = self._extract_entries(webpage, host)
            if not page_entries:
                break
-            entries.extend(page_entries)
+            for e in page_entries:
+                yield e
            if not self._has_more(webpage):
                break

-        return self.playlist_result(orderedSet(entries), item_id)
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        item_id = mobj.group('id')
+
+        self._login(host)
+
+        return self.playlist_result(self._entries(url, host, item_id), item_id)


 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
--- a/youtube_dlc/extractor/svt.py
+++ b/youtube_dlc/extractor/svt.py
@ -255,8 +255,10 @@ def _real_extract(self, url):
            svt_id = self._search_regex(
                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
+                 r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
-                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
+                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
+                 r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
                webpage, 'video id')

        info_dict = self._extract_by_video_id(svt_id, webpage)
--- a/youtube_dlc/extractor/tv2.py
+++ b/youtube_dlc/extractor/tv2.py
@ -20,7 +20,7 @@

 class TV2IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.tv2.no/v/916509/',
        'info_dict': {
            'id': '916509',
@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
            'view_count': int,
            'categories': list,
        },
-    }
+    }]
    _API_DOMAIN = 'sumo.tv2.no'
    _PROTOCOLS = ('HDS', 'HLS', 'DASH')
    _GEO_COUNTRIES = ['NO']
@ -42,6 +42,12 @@ def _real_extract(self, url):
        video_id = self._match_id(url)
        api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)

+        asset = self._download_json(
+            api_base + '.json', video_id,
+            'Downloading metadata JSON')['asset']
+        title = asset.get('subtitle') or asset['title']
+        is_live = asset.get('live') is True
+
        formats = []
        format_urls = []
        for protocol in self._PROTOCOLS:
@ -81,7 +87,8 @@ def _real_extract(self, url):
                elif ext == 'm3u8':
                    if not data.get('drmProtected'):
                        formats.extend(self._extract_m3u8_formats(
-                            video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                            video_url, video_id, 'mp4',
+                            'm3u8' if is_live else 'm3u8_native',
                            m3u8_id=format_id, fatal=False))
                elif ext == 'mpd':
                    formats.extend(self._extract_mpd_formats(
@ -99,11 +106,6 @@ def _real_extract(self, url):
            raise ExtractorError('This video is DRM protected.', expected=True)
        self._sort_formats(formats)

-        asset = self._download_json(
-            api_base + '.json', video_id,
-            'Downloading metadata JSON')['asset']
-        title = asset['title']
-
        thumbnails = [{
            'id': thumbnail.get('@type'),
            'url': thumbnail.get('url'),
@ -112,7 +114,7 @@ def _real_extract(self, url):
        return {
            'id': video_id,
            'url': video_url,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
            'description': strip_or_none(asset.get('description')),
            'thumbnails': thumbnails,
            'timestamp': parse_iso8601(asset.get('createTime')),
@ -120,6 +122,7 @@ def _real_extract(self, url):
            'view_count': int_or_none(asset.get('views')),
            'categories': asset.get('keywords', '').split(','),
            'formats': formats,
+            'is_live': is_live,
        }


@ -168,13 +171,13 @@ def _real_extract(self, url):


 class KatsomoIE(TV2IE):
-    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
+    _TESTS = [{
        'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
        'info_dict': {
            'id': '1181321',
            'ext': 'mp4',
-            'title': 'MTV Uutiset Live',
+            'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
            'description': 'Päätöksen teki Pelicansin hallitus.',
            'timestamp': 1575116484,
            'upload_date': '20191130',
@ -186,7 +189,60 @@ class KatsomoIE(TV2IE):
            # m3u8 download
            'skip_download': True,
        },
-    }
+    }, {
+        'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.mtvuutiset.fi/video/prog1311159',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.katsomo.fi/#!/jakso/1311159',
+        'only_matching': True,
+    }]
    _API_DOMAIN = 'api.katsomo.fi'
    _PROTOCOLS = ('HLS', 'MPD')
    _GEO_COUNTRIES = ['FI']
+
+
+class MTVUutisetArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
+        'info_dict': {
+            'id': '1311159',
+            'ext': 'mp4',
+            'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+            'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+            'timestamp': 1600608966,
+            'upload_date': '20200920',
+            'duration': 153.7886666,
+            'view_count': int,
+            'categories': list,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # multiple Youtube embeds
+        'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        article_id = self._match_id(url)
+        article = self._download_json(
+            'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
+            article_id)
+
+        def entries():
+            for video in (article.get('videos') or []):
+                video_type = video.get('videotype')
+                video_url = video.get('url')
+                if not (video_url and video_type in ('katsomo', 'youtube')):
+                    continue
+                yield self.url_result(
+                    video_url, video_type.capitalize(), video.get('video_id'))
+
+        return self.playlist_result(
+            entries(), article_id, article.get('title'), article.get('description'))
--- a/youtube_dlc/extractor/tv4.py
+++ b/youtube_dlc/extractor/tv4.py
@ -17,7 +17,7 @@ class TV4IE(InfoExtractor):
            tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
            tv4play\.se/
            (?:
-                (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
+                (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
                iframe/video/|
                film/|
                sport/|
@ -65,6 +65,10 @@ class TV4IE(InfoExtractor):
        {
            'url': 'http://www.tv4play.se/program/farang/3922081',
            'only_matching': True,
+        },
+        {
+            'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
+            'only_matching': True,
        }
    ]

--- a/youtube_dlc/extractor/vidio.py
+++ b/youtube_dlc/extractor/vidio.py
@ -4,7 +4,13 @@
 import re

 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    str_or_none,
+    strip_or_none,
+    try_get,
+)


 class VidioIE(InfoExtractor):
@ -21,57 +27,63 @@ class VidioIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 149,
            'like_count': int,
+            'uploader': 'TWELVE Pic',
+            'timestamp': 1444902800,
+            'upload_date': '20151015',
+            'uploader_id': 'twelvepictures',
+            'channel': 'Cover Music Video',
+            'channel_id': '280236',
+            'view_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+            'tags': 'count:4',
        },
    }, {
        'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
        'only_matching': True,
    }]

+    def _real_initialize(self):
+        self._api_key = self._download_json(
+            'https://www.vidio.com/auth', None, data=b'')['api_key']
+
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id, display_id = mobj.group('id', 'display_id')
+        video_id, display_id = re.match(self._VALID_URL, url).groups()
+        data = self._download_json(
+            'https://api.vidio.com/videos/' + video_id, display_id, headers={
+                'Content-Type': 'application/vnd.api+json',
+                'X-API-KEY': self._api_key,
+            })
+        video = data['videos'][0]
+        title = video['title'].strip()

-        webpage = self._download_webpage(url, display_id)
-
-        title = self._og_search_title(webpage)
-
-        m3u8_url, duration, thumbnail = [None] * 3
-
-        clips = self._parse_json(
-            self._html_search_regex(
-                r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
-                webpage, 'video data', default='[]', group='data'),
-            display_id, fatal=False)
-        if clips:
-            clip = clips[0]
-            m3u8_url = clip.get('sources', [{}])[0].get('file')
-            duration = clip.get('clip_duration')
-            thumbnail = clip.get('image')
-
-        m3u8_url = m3u8_url or self._search_regex(
-            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'hls url', group='url')
        formats = self._extract_m3u8_formats(
-            m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
+            data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
        self._sort_formats(formats)

-        duration = int_or_none(duration or self._search_regex(
-            r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
-            'duration', fatal=False, group='duration'))
-        thumbnail = thumbnail or self._og_search_thumbnail(webpage)
-
-        like_count = int_or_none(self._search_regex(
-            (r'<span[^>]+data-comment-vote-count=["\'](\d+)',
-             r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
-            webpage, 'like count', fatal=False))
+        get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
+        channel = get_first('channel')
+        user = get_first('user')
+        username = user.get('username')
+        get_count = lambda x: int_or_none(video.get('total_' + x))

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': self._og_search_description(webpage),
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'like_count': like_count,
+            'description': strip_or_none(video.get('description')),
+            'thumbnail': video.get('image_url_medium'),
+            'duration': int_or_none(video.get('duration')),
+            'like_count': get_count('likes'),
            'formats': formats,
+            'uploader': user.get('name'),
+            'timestamp': parse_iso8601(video.get('created_at')),
+            'uploader_id': username,
+            'uploader_url': 'https://www.vidio.com/@' + username if username else None,
+            'channel': channel.get('name'),
+            'channel_id': str_or_none(channel.get('id')),
+            'view_count': get_count('view_count'),
+            'dislike_count': get_count('dislikes'),
+            'comment_count': get_count('comments'),
+            'tags': video.get('tag_list'),
        }
--- a/youtube_dlc/extractor/vlive.py
+++ b/youtube_dlc/extractor/vlive.py
@ -125,7 +125,7 @@ def _call_api(self, path_template, video_id, fields=None, limit=None):
                headers={'Referer': 'https://www.vlive.tv/'}, query=query)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self.raise_login_required(json.loads(e.cause.read().decode())['message'])
+                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
            raise

    def _real_extract(self, url):
--- a/youtube_dlc/extractor/vtm.py
+++ b/youtube_dlc/extractor/vtm.py
@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    try_get,
+)
+
+
+class VTMIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
+    _TEST = {
+        'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
+        'md5': '37dca85fbc3a33f2de28ceb834b071f8',
+        'info_dict': {
+            'id': '192445',
+            'ext': 'mp4',
+            'title': 'Gast vernielt Genkse hotelkamer',
+            'timestamp': 1611060180,
+            'upload_date': '20210119',
+            'duration': 74,
+            # TODO: fix url _type result processing
+            # 'series': 'Op Interventie',
+        }
+    }
+
+    def _real_extract(self, url):
+        uuid = self._match_id(url)
+        video = self._download_json(
+            'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
+            uuid, query={
+                'query': '''{
+  getComponent(type: Video, uuid: "%s") {
+    ... on Video {
+      description
+      duration
+      myChannelsVideo
+      program {
+        title
+      }
+      publishedAt
+      title
+    }
+  }
+}''' % uuid,
+            }, headers={
+                'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
+            })['data']['getComponent']
+
+        return {
+            '_type': 'url',
+            'id': uuid,
+            'title': video.get('title'),
+            'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
+            'description': video.get('description'),
+            'timestamp': parse_iso8601(video.get('publishedAt')),
+            'duration': int_or_none(video.get('duration')),
+            'series': try_get(video, lambda x: x['program']['title']),
+            'ie_key': 'Medialaan',
+        }
--- a/youtube_dlc/extractor/vvvvid.py
+++ b/youtube_dlc/extractor/vvvvid.py
@ -4,6 +4,7 @@
 import re

 from .common import InfoExtractor
+from .youtube import YoutubeIE
 from ..utils import (
    ExtractorError,
    int_or_none,
@ -47,6 +48,22 @@ class VVVVIDIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
+    }, {
+        # video_type == 'video/youtube'
+        'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
+        'md5': '33e0edfba720ad73a8782157fdebc648',
+        'info_dict': {
+            'id': 'RzmFKUDOUgw',
+            'ext': 'mp4',
+            'title': 'Trailer',
+            'upload_date': '20150906',
+            'description': 'md5:a5e802558d35247fee285875328c0b80',
+            'uploader_id': 'BandaiVisual',
+            'uploader': 'BANDAI NAMCO Arts Channel',
+        },
+        'params': {
+            'skip_download': True,
+        },
    }, {
        'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
        'only_matching': True
@ -154,12 +171,13 @@ def metadata_from_url(r_url):
                    if season_number:
                        info['season_number'] = int(season_number)

-        for quality in ('_sd', ''):
+        video_type = video_data.get('video_type')
+        is_youtube = False
+        for quality in ('', '_sd'):
            embed_code = video_data.get('embed_info' + quality)
            if not embed_code:
                continue
            embed_code = ds(embed_code)
-            video_type = video_data.get('video_type')
            if video_type in ('video/rcs', 'video/kenc'):
                if video_type == 'video/kenc':
                    kenc = self._download_json(
@ -172,19 +190,28 @@ def metadata_from_url(r_url):
                    if kenc_message:
                        embed_code += '?' + ds(kenc_message)
                formats.extend(self._extract_akamai_formats(embed_code, video_id))
+            elif video_type == 'video/youtube':
+                info.update({
+                    '_type': 'url_transparent',
+                    'ie_key': YoutubeIE.ie_key(),
+                    'url': embed_code,
+                })
+                is_youtube = True
+                break
            else:
                formats.extend(self._extract_wowza_formats(
                    'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
            metadata_from_url(embed_code)

-        self._sort_formats(formats)
+        if not is_youtube:
+            self._sort_formats(formats)
+            info['formats'] = formats

        metadata_from_url(video_data.get('thumbnail'))
        info.update(self._extract_common_video_info(video_data))
        info.update({
            'id': video_id,
            'title': title,
-            'formats': formats,
            'duration': int_or_none(video_data.get('length')),
            'series': video_data.get('show_title'),
            'season_id': season_id,
--- a/youtube_dlc/extractor/zype.py
+++ b/youtube_dlc/extractor/zype.py
@ -87,11 +87,16 @@ def _real_extract(self, url):
                r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
                body, 'm3u8 url', group='url', default=None)
            if not m3u8_url:
-                source = self._parse_json(self._search_regex(
-                    r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
-                    'source'), video_id, js_to_json)
-                if source.get('integration') == 'verizon-media':
-                    m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
+                source = self._search_regex(
+                    r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
+
+                def get_attr(key):
+                    return self._search_regex(
+                        r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
+                        source, key, group='val')
+
+                if get_attr('integration') == 'verizon-media':
+                    m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
            formats = self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
            text_tracks = self._search_regex(