From b40471282286bd2b09c485bf79afd271d229272c Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Sat, 4 Mar 2023 13:41:41 -0600 Subject: [PATCH] [extractor/telecaribe] Add extractor (#6311) Authored by: elyse0 Closes #6001 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/telecaribe.py | 77 +++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 yt_dlp/extractor/telecaribe.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ccac634b3..b7bce6a5e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1854,6 +1854,7 @@ from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE +from .telecaribe import TelecaribePlayIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telegram import TelegramEmbedIE diff --git a/yt_dlp/extractor/telecaribe.py b/yt_dlp/extractor/telecaribe.py new file mode 100644 index 000000000..b6d88a809 --- /dev/null +++ b/yt_dlp/extractor/telecaribe.py @@ -0,0 +1,77 @@ +import re + +from .common import InfoExtractor +from ..utils import traverse_obj + + +class TelecaribePlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?play\.telecaribe\.co/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.play.telecaribe.co/breicok', + 'info_dict': { + 'id': 'breicok', + 'title': 'Breicok', + }, + 'playlist_count': 7, + }, { + 'url': 'https://www.play.telecaribe.co/si-fue-gol-de-yepes', + 'info_dict': { + 'id': 'si-fue-gol-de-yepes', + 'title': 'Sí Fue Gol de Yepes', + }, + 'playlist_count': 6, + }, { + 'url': 'https://www.play.telecaribe.co/ciudad-futura', + 'info_dict': { + 'id': 'ciudad-futura', + 'title': 'Ciudad Futura', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.play.telecaribe.co/live', + 'info_dict': { + 'id': 'live', + 'title': r're:^Señal en vivo', + 'live_status': 'is_live', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': 'Livestream', + } + }] + + def _download_player_webpage(self, webpage, display_id): + page_id = self._search_regex( + (r'window.firstPageId\s*=\s*["\']([^"\']+)', r']+id\s*=\s*"pageBackground_([^"]+)'), + webpage, 'page_id') + + props = self._download_json(self._search_regex( + rf']+href\s*=\s*"([^"]+)"[^>]+id\s*=\s*"features_{page_id}"', + webpage, 'json_props_url'), display_id)['props']['render']['compProps'] + + return self._download_webpage(traverse_obj(props, (..., 'url'))[-1], display_id) + + def _get_clean_title(self, title): + return re.sub(r'\s*\|\s*Telecaribe\s*VOD', '', title or '').strip() or None + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + player = self._download_player_webpage(webpage, display_id) + + if display_id != 'live': + return self.playlist_from_matches( + re.findall(r']+href\s*=\s*"([^"]+\.mp4)', player), display_id, + self._get_clean_title(self._og_search_title(webpage))) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + self._search_regex(r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url'), + display_id, 'mp4') + + return { + 'id': display_id, + 'title': self._get_clean_title(self._og_search_title(webpage)), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + }