From 7d337ca977d73a0a6c07ab481ed8faa8f6ff8726 Mon Sep 17 00:00:00 2001 From: HitomaruKonpaku Date: Sat, 11 Nov 2023 08:34:22 +0700 Subject: [PATCH] [ie/twitter:broadcast] Improve metadata extraction (#8383) Authored by: HitomaruKonpaku --- yt_dlp/extractor/periscope.py | 5 +++-- yt_dlp/extractor/twitter.py | 40 ++++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index 84bcf1573..dcd021926 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -24,12 +24,13 @@ def _parse_broadcast_data(self, broadcast, video_id): thumbnails = [{ 'url': broadcast[image], - } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + } for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)] return { 'id': broadcast.get('id') or video_id, 'title': title, - 'timestamp': parse_iso8601(broadcast.get('created_at')), + 'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none( + broadcast.get('created_at_ms'), scale=1000), 'uploader': uploader, 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), 'thumbnails': thumbnails, diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index b6386214d..7bd78eb48 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1563,7 +1563,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): IE_NAME = 'twitter:broadcast' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P[0-9a-zA-Z]{13})' - _TEST = { + _TESTS = [{ # untitled Periscope video 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj', 'info_dict': { @@ -1571,11 +1571,42 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): 'ext': 'mp4', 'title': 'Andrea May Sahouri - Periscope Broadcast', 'uploader': 'Andrea May Sahouri', - 'uploader_id': '1PXEdBZWpGwKe', + 'uploader_id': 'andreamsahouri', + 'uploader_url': 'https://twitter.com/andreamsahouri', + 'timestamp': 1590973638, + 'upload_date': '20200601', 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', 'view_count': int, }, - } + }, { + 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv', + 'info_dict': { + 'id': '1ZkKzeyrPbaxv', + 'ext': 'mp4', + 'title': 'Starship | SN10 | High-Altitude Flight Test', + 'uploader': 'SpaceX', + 'uploader_id': 'SpaceX', + 'uploader_url': 'https://twitter.com/SpaceX', + 'timestamp': 1614812942, + 'upload_date': '20210303', + 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', + 'view_count': int, + }, + }, { + 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb', + 'info_dict': { + 'id': '1OyKAVQrgzwGb', + 'ext': 'mp4', + 'title': 'Starship Flight Test', + 'uploader': 'SpaceX', + 'uploader_id': 'SpaceX', + 'uploader_url': 'https://twitter.com/SpaceX', + 'timestamp': 1681993964, + 'upload_date': '20230420', + 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', + 'view_count': int, + }, + }] def _real_extract(self, url): broadcast_id = self._match_id(url) @@ -1585,6 +1616,9 @@ def _real_extract(self, url): if not broadcast: raise ExtractorError('Broadcast no longer exists', expected=True) info = self._parse_broadcast_data(broadcast, broadcast_id) + info['title'] = broadcast.get('status') or info.get('title') + info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id') + info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None) media_key = broadcast['media_key'] source = self._call_api( f'live_video_stream/status/{media_key}', media_key)['source']