From 15e9e578c04f1fa3f408dc3ec99491cc3f0ba839 Mon Sep 17 00:00:00 2001 From: chris <6024426+iw0nderhow@users.noreply.github.com> Date: Tue, 27 Dec 2022 20:52:58 +0100 Subject: [PATCH] [extractor/ArteTV] Extract chapters (#5879) Authored by: iw0nderhow, bashonly --- yt_dlp/extractor/arte.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 54e4d2d0c..dfbfe03c3 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -65,6 +65,21 @@ class ArteTVIE(ArteTVBaseIE): }, { 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE', 'only_matching': True, + }, { + 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', + 'info_dict': { + 'id': '110203-006-A', + 'chapters': 'count:16', + 'description': 'md5:cf592f1df52fe52007e3f8eac813c084', + 'alt_title': 'Zaz', + 'title': 'Baloise Session 2022', + 'timestamp': 1668445200, + 'duration': 4054, + 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530', + 'upload_date': '20221114', + 'ext': 'mp4', + }, + 'expected_warnings': ['geo restricted'] }] _GEO_BYPASS = True @@ -180,9 +195,6 @@ def _real_extract(self, url): else: self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}') - # TODO: chapters from stream['segments']? - # The JS also looks for chapters in config['data']['attributes']['chapters'], - # but I am yet to find a video having those formats.extend(secondary_formats) self._remove_duplicate_formats(formats) @@ -205,6 +217,11 @@ def _real_extract(self, url): {'url': image['url'], 'id': image.get('caption')} for image in metadata.get('images') or [] if url_or_none(image.get('url')) ], + # TODO: chapters may also be in stream['segments']? + 'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., { + 'start_time': 'startTime', + 'title': 'title', + })) or None, }