From d7d861811c15585a4f7ec9d5ae68d2ac28de28a0 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:59:17 -0500 Subject: [PATCH] [ie/tubitv:series] Fix extractor (#10116) Closes #8563 Authored by: bashonly --- yt_dlp/extractor/tubitv.py | 57 +++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 9d9ddae72..85eb3a211 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -13,6 +13,7 @@ class TubiTvIE(InfoExtractor): + IE_NAME = 'tubitv' _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?Pvideo|movies|tv-shows)/(?P\d+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' @@ -148,30 +149,54 @@ def _real_extract(self, url): class TubiTvShowIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P[^/?#]+)' + IE_NAME = 'tubitv:series' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/\d+/(?P[^/?#]+)(?:/season-(?P\d+))?' _TESTS = [{ 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true', - 'playlist_mincount': 390, + 'playlist_mincount': 389, 'info_dict': { 'id': 'the-joy-of-painting-with-bob-ross', }, + }, { + 'url': 'https://tubitv.com/series/2311/the-saddle-club/season-1', + 'playlist_count': 26, + 'info_dict': { + 'id': 'the-saddle-club-season-1', + }, + }, { + 'url': 'https://tubitv.com/series/2311/the-saddle-club/season-3', + 'playlist_count': 19, + 'info_dict': { + 'id': 'the-saddle-club-season-3', + }, + }, { + 'url': 'https://tubitv.com/series/2311/the-saddle-club/', + 'playlist_mincount': 71, + 'info_dict': { + 'id': 'the-saddle-club', + }, }] - def _entries(self, show_url, show_name): - show_webpage = self._download_webpage(show_url, show_name) + def _entries(self, show_url, playlist_id, selected_season): + webpage = self._download_webpage(show_url, playlist_id) - show_json = self._parse_json(self._search_regex( - r'window\.__data\s*=\s*({[^<]+});\s*', - show_webpage, 'data'), show_name, transform_source=js_to_json)['video'] + data = self._search_json( + r'window\.__data\s*=', webpage, 'data', playlist_id, + transform_source=js_to_json)['video'] - for episode_id in show_json['fullContentById']: - if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's': - continue - yield self.url_result( - f'https://tubitv.com/tv-shows/{episode_id}/', - ie=TubiTvIE.ie_key(), video_id=episode_id) + # v['number'] is already a decimal string, but stringify to protect against API changes + path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}] + + for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)): + season_number = int_or_none(season.get('number')) + for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])): + episode_id = episode['id'] + yield self.url_result( + f'https://tubitv.com/tv-shows/{episode_id}/', TubiTvIE, episode_id, + season_number=season_number, episode_number=int_or_none(episode.get('num'))) def _real_extract(self, url): - show_name = self._match_valid_url(url).group('show_name') - return self.playlist_result(self._entries(url, show_name), playlist_id=show_name) + playlist_id, selected_season = self._match_valid_url(url).group('show_name', 'season') + if selected_season: + playlist_id = f'{playlist_id}-season-{selected_season}' + return self.playlist_result(self._entries(url, playlist_id, selected_season), playlist_id)