mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
[extractor/twitter] Fix --no-playlist
and add media view_count
when using GraphQL (#6211)
Authored by: Grub4K
This commit is contained in:
parent
2e269bd998
commit
b6795fd310
1 changed files with 75 additions and 22 deletions
|
@ -293,7 +293,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class TwitterIE(TwitterBaseIE):
|
class TwitterIE(TwitterBaseIE):
|
||||||
IE_NAME = 'twitter'
|
IE_NAME = 'twitter'
|
||||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||||
|
@ -336,7 +336,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'id': '665052190608723968',
|
'id': '665052190608723968',
|
||||||
'display_id': '665052190608723968',
|
'display_id': '665052190608723968',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:e99588f17b3dd0503814ffb560e64731',
|
'title': r're:Star Wars.*A new beginning is coming December 18.*',
|
||||||
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
|
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
|
||||||
'uploader_id': 'starwars',
|
'uploader_id': 'starwars',
|
||||||
'uploader': r're:Star Wars.*',
|
'uploader': r're:Star Wars.*',
|
||||||
|
@ -752,7 +752,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1600649511827013632',
|
'id': '1600649511827013632',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
|
'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
|
||||||
'thumbnail': r're:^https?://.+\.jpg',
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
'timestamp': 1670459604.0,
|
'timestamp': 1670459604.0,
|
||||||
'uploader_id': 'CTVJLaidlaw',
|
'uploader_id': 'CTVJLaidlaw',
|
||||||
|
@ -792,6 +792,52 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1599108643743473680',
|
||||||
|
'display_id': '1599108751385972737',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '\u06ea - \U0001F48B',
|
||||||
|
'uploader_url': 'https://twitter.com/hlo_again',
|
||||||
|
'like_count': int,
|
||||||
|
'uploader_id': 'hlo_again',
|
||||||
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
|
||||||
|
'repost_count': int,
|
||||||
|
'duration': 9.531,
|
||||||
|
'comment_count': int,
|
||||||
|
'upload_date': '20221203',
|
||||||
|
'age_limit': 0,
|
||||||
|
'timestamp': 1670092210.0,
|
||||||
|
'tags': [],
|
||||||
|
'uploader': '\u06ea',
|
||||||
|
'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
|
||||||
|
},
|
||||||
|
'params': {'noplaylist': True},
|
||||||
|
}, {
|
||||||
|
# Media view count is GraphQL only, force in test
|
||||||
|
'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1600009362759733248',
|
||||||
|
'display_id': '1600009574919962625',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
||||||
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||||
|
'view_count': int,
|
||||||
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
|
'age_limit': 0,
|
||||||
|
'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
|
||||||
|
'repost_count': int,
|
||||||
|
'upload_date': '20221206',
|
||||||
|
'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': [],
|
||||||
|
'uploader_id': 'MunTheShinobi',
|
||||||
|
'duration': 139.987,
|
||||||
|
'timestamp': 1670306984.0,
|
||||||
|
},
|
||||||
|
'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
@ -920,13 +966,6 @@ def _real_extract(self, url):
|
||||||
title = f'{uploader} - {title}'
|
title = f'{uploader} - {title}'
|
||||||
uploader_id = user.get('screen_name')
|
uploader_id = user.get('screen_name')
|
||||||
|
|
||||||
tags = []
|
|
||||||
for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
|
|
||||||
hashtag_text = hashtag.get('text')
|
|
||||||
if not hashtag_text:
|
|
||||||
continue
|
|
||||||
tags.append(hashtag_text)
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': twid,
|
'id': twid,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -939,7 +978,7 @@ def _real_extract(self, url):
|
||||||
'repost_count': int_or_none(status.get('retweet_count')),
|
'repost_count': int_or_none(status.get('retweet_count')),
|
||||||
'comment_count': int_or_none(status.get('reply_count')),
|
'comment_count': int_or_none(status.get('reply_count')),
|
||||||
'age_limit': 18 if status.get('possibly_sensitive') else 0,
|
'age_limit': 18 if status.get('possibly_sensitive') else 0,
|
||||||
'tags': tags,
|
'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_from_video_info(media):
|
def extract_from_video_info(media):
|
||||||
|
@ -973,6 +1012,7 @@ def add_thumbnail(name, size):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
|
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
|
||||||
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
||||||
# The codec of http formats are unknown
|
# The codec of http formats are unknown
|
||||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
||||||
|
@ -1052,11 +1092,31 @@ def get_binding_value(k):
|
||||||
'content_duration_seconds')),
|
'content_duration_seconds')),
|
||||||
}
|
}
|
||||||
|
|
||||||
media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
|
videos = traverse_obj(status, (
|
||||||
videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
|
(None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
|
||||||
cards = extract_from_card_info(status.get('card'))
|
|
||||||
entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
|
|
||||||
|
|
||||||
|
if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
|
||||||
|
selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
|
||||||
|
else:
|
||||||
|
desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
|
||||||
|
if not desired_obj:
|
||||||
|
raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
|
||||||
|
elif desired_obj.get('type') != 'video':
|
||||||
|
raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
|
||||||
|
|
||||||
|
# Restore original archive id and video index in title
|
||||||
|
for index, entry in enumerate(videos, 1):
|
||||||
|
if entry.get('id') != desired_obj.get('id'):
|
||||||
|
continue
|
||||||
|
if index == 1:
|
||||||
|
info['_old_archive_ids'] = [make_archive_id(self, twid)]
|
||||||
|
if len(videos) != 1:
|
||||||
|
info['title'] += f' #{index}'
|
||||||
|
break
|
||||||
|
|
||||||
|
return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
|
||||||
|
|
||||||
|
entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
|
||||||
if not entries:
|
if not entries:
|
||||||
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
||||||
if not expanded_url or expanded_url == url:
|
if not expanded_url or expanded_url == url:
|
||||||
|
@ -1066,13 +1126,6 @@ def get_binding_value(k):
|
||||||
|
|
||||||
entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
|
entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
|
||||||
|
|
||||||
if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
|
|
||||||
index = int(selected_index) - 1
|
|
||||||
if index >= len(entries):
|
|
||||||
raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
|
|
||||||
|
|
||||||
return entries[index]
|
|
||||||
|
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
return entries[0]
|
return entries[0]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue