From b58f8d8f2c6389ad07fa31a81a6489cae7d205c9 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Wed, 4 May 2022 23:16:56 +0900 Subject: [PATCH] [TVer] Improve extraction (#3634) Authored by: Lesmiscore --- yt_dlp/extractor/tver.py | 43 +++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 19236f8e8..b04575bd5 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,8 +1,10 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + join_nonempty, smuggle_url, str_or_none, + strip_or_none, traverse_obj, ) @@ -11,19 +13,16 @@ class TVerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/episodes/ephss8yveb', + 'url': 'https://tver.jp/episodes/ep83nf3w4p', 'info_dict': { - 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', - 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', - }, - 'add_ie': ['BrightcoveNew'], - }, { - 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', - 'info_dict': { - # sorry but this is "correct" - 'title': '4月11日(月)23時06分 ~ 放送予定', - 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + 'title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b', + 'series': '家事ヤロウ!!!', + 'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'channel': 'テレビ朝日', + 'onair_label': '5月3日(火)放送分', + 'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分', }, 'add_ie': ['BrightcoveNew'], }, { @@ -78,14 +77,26 @@ def _real_extract(self, url): 'x-tver-platform-type': 'web' }) + additional_content_info = traverse_obj( + additional_info, ('result', 'episode', 'content'), get_all=False) or {} + episode = strip_or_none(additional_content_info.get('title')) + series = str_or_none(additional_content_info.get('seriesTitle')) + title = ( + join_nonempty(series, episode, delim=' ') + or str_or_none(video_info.get('title'))) + provider = str_or_none(additional_content_info.get('productionProviderName')) + onair_label = str_or_none(additional_content_info.get('broadcastDateLabel')) + return { '_type': 'url_transparent', - 'title': str_or_none(video_info.get('title')), + 'title': title, + 'series': series, + 'episode': episode, + # an another title which is considered "full title" for some viewers + 'alt_title': join_nonempty(title, provider, onair_label, delim=' '), + 'channel': provider, 'description': str_or_none(video_info.get('description')), 'url': smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), - 'series': traverse_obj( - additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), - get_all=False), 'ie_key': 'BrightcoveNew', }