[ie/mixch:archive] Fix extractor (#8761)

Closes #2373 Authored by: pzhlkj6612
2024-12-22 06:00:00 +00:00 · 2024-04-01 22:41:09 +00:00 · 2024-04-01 22:41:09 +00:00 · c59de48e2b
commit c59de48e2b
parent 0284f1fee2
1 changed files with 30 additions and 12 deletions
--- a/yt_dlp/extractor/mixch.py
+++ b/yt_dlp/extractor/mixch.py
@ -1,5 +1,7 @@
 from .common import InfoExtractor
-from ..utils import UserNotLive, traverse_obj
+from ..networking.exceptions import HTTPError
 from ..utils import ExtractorError, UserNotLive, url_or_none
 from ..utils.traversal import traverse_obj
 class MixchIE(InfoExtractor):
@ -60,22 +62,38 @@ class MixchArchiveIE(InfoExtractor):
        'skip': 'paid video, no DRM. expires at Jan 23',
        'info_dict': {
            'id': '421',
            'ext': 'mp4',
            'title': '96NEKO SHOW TIME',
        }
    }, {
        'url': 'https://mixch.tv/archive/1213',
        'skip': 'paid video, no DRM. expires at Dec 31, 2023',
        'info_dict': {
            'id': '1213',
            'ext': 'mp4',
            'title': '【特別トーク番組アーカイブス】Merm4id×燐舞曲 2nd LIVE「VERSUS」',
            'release_date': '20231201',
            'thumbnail': str,
        }
    }, {
        'url': 'https://mixch.tv/archive/1214',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        html5_videos = self._parse_html5_media_entries(
+        try:
-            url, webpage.replace('video-js', 'video'), video_id, 'hls')
+            info_json = self._download_json(
-        if not html5_videos:
+                f'https://mixch.tv/api-web/archive/{video_id}', video_id)['archive']
-            self.raise_login_required(method='cookies')
+        except ExtractorError as e:
-        infodict = html5_videos[0]
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
-        infodict.update({
+                self.raise_login_required()
            raise
        return {
            'id': video_id,
-            'title': self._html_search_regex(r'class="archive-title">(.+?)</', webpage, 'title')
+            'title': traverse_obj(info_json, ('title', {str})),
-        })
+            'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
-
+            'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
-        return infodict
+        }