From 471d0367c76e1413bb35e0be45765a277e469ee2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Jun 2022 02:45:05 +0530 Subject: [PATCH] [youtube:clips] Support downloading clips Closes #2543 --- README.md | 1 + yt_dlp/extractor/youtube.py | 39 ++++++++++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3261f88c7..bace8a2eb 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) * Download livestreams from the start using `--live-from-start` (experimental) * Support for downloading stories (`ytstories:`) + * Support for downloading clips * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]` diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index da249f7a9..ccb41cb2e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5978,14 +5978,43 @@ class YoutubeTruncatedURLIE(InfoExtractor): expected=True) -class YoutubeClipIE(InfoExtractor): +class YoutubeClipIE(YoutubeTabBaseInfoExtractor): IE_NAME = 'youtube:clip' - IE_DESC = False # Do not list - _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P[^/?#]+)' + _TESTS = [{ + # FIXME: Other metadata should be extracted from the clip, not from the base video + 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ', + 'info_dict': { + 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ', + 'ext': 'mp4', + 'section_start': 29.0, + 'section_end': 39.7, + 'duration': 10.7, + } + }] def _real_extract(self, url): - self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead') - return self.url_result(url, 'Generic') + clip_id = self._match_id(url) + _, data = self._extract_webpage(url, clip_id) + + video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId')) + if not video_id: + raise ExtractorError('Unable to find video ID') + + clip_data = traverse_obj(data, ( + 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer', + 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ..., + 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command', + 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False) + + return { + '_type': 'url_transparent', + 'url': f'https://www.youtube.com/watch?v={video_id}', + 'ie_key': YoutubeIE.ie_key(), + 'id': clip_id, + 'section_start': int(clip_data['startTimeMs']) / 1000, + 'section_end': int(clip_data['endTimeMs']) / 1000, + } class YoutubeTruncatedIDIE(InfoExtractor):