From c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:00:33 -0500 Subject: [PATCH] [ie/tele5] Overhaul extractor (#10024) Closes #3051, Closes #7955, Closes #8501, Closes #9792 Authored by: bashonly --- yt_dlp/extractor/tele5.py | 134 +++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 73 deletions(-) diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 72f67e402..a45537541 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,89 +1,77 @@ -from .dplay import DPlayIE -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - extract_attributes, -) +import functools + +from .dplay import DiscoveryPlusBaseIE +from ..utils import join_nonempty +from ..utils.traversal import traverse_obj -class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P[^/?#&]+)' - _GEO_COUNTRIES = ['DE'] +class Tele5IE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P[\w-]+)/(?P[\w-]+)(?:/(?P[\w-]+))?' _TESTS = [{ - 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', + # slug_a and slug_b + 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane', 'info_dict': { - 'id': '1549416', + 'id': '6852024', 'ext': 'mp4', - 'upload_date': '20180814', - 'timestamp': 1534290623, - 'title': 'Pandorum', + 'title': 'Quarantäne', + 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675', + 'episode': 'Episode 73', + 'episode_number': 73, + 'season': 'Season 4', + 'season_number': 4, + 'series': 'Stargate Atlantis', + 'upload_date': '20240525', + 'timestamp': 1716643200, + 'duration': 2503.2, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available: "404 Seite nicht gefunden"', }, { - # jwplatform, nexx unavailable - 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', + # only slug_a + 'url': 'https://tele5.de/mediathek/inside-out', 'info_dict': { - 'id': 'WJuiOlUp', + 'id': '6819502', 'ext': 'mp4', - 'upload_date': '20200603', - 'timestamp': 1591214400, - 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters', - 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97', + 'title': 'Inside out', + 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd', + 'series': 'Inside out', + 'upload_date': '20240523', + 'timestamp': 1716494400, + 'duration': 5343.4, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available, redirects to Filme page', }, { - 'url': 'https://tele5.de/mediathek/angel-of-mine/', + # playlist + 'url': 'https://tele5.de/mediathek/schlefaz', 'info_dict': { - 'id': '1252360', - 'ext': 'mp4', - 'upload_date': '20220109', - 'timestamp': 1641762000, - 'title': 'Angel of Mine', - 'description': 'md5:a72546a175e1286eb3251843a52d1ad7', + 'id': 'mediathek-schlefaz', }, - 'params': { - 'format': 'bestvideo', - }, - }, { - 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/anders-ist-sevda/', - 'only_matching': True, + 'playlist_mincount': 3, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_element = self._search_regex(r'(]+?>)', webpage, 'video player') - player_info = extract_attributes(player_element) - asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', )) - endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname - source_type = player_info.get('sourcetype') - if source_type: - endpoint = '%s-%s' % (source_type, endpoint) - try: - return self._get_disco_api_info(url, asset_id, endpoint, realm, country) - except ExtractorError as e: - if getattr(e, 'message', '') == 'Missing deviceId in context': - self.report_drm(video_id) - raise + parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b') + playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-') + + query = {'environment': 'tele5', 'v': '2'} + if not slug_b: + endpoint = f'page/{slug_a}' + query['parent_slug'] = parent_slug + else: + endpoint = f'videos/{slug_b}' + query['filter[show.slug]'] = slug_a + cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query) + + return self.playlist_result(map( + functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'), + traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + })