[niconico] Fix HLS formats

Closes #171 * The structure of the API JSON was changed * Smile Video seems to be no longer available. So remove the warning * Move ping to downloader * Change heartbeat interval to 40sec * Remove unnecessary API headers Authored-by: CXwudi, tsukumijima, nao20010128nao, pukkandan Tested by: tsukumijima
2024-12-28 01:41:06 +00:00 · 2021-04-05 13:11:21 +05:30 · 2021-04-05 13:11:21 +05:30 · 2291dbce2a
commit 2291dbce2a
parent 58f197b76c
2 changed files with 54 additions and 54 deletions
--- a/yt_dlp/downloader/niconico.py
+++ b/yt_dlp/downloader/niconico.py
@ -24,16 +24,14 @@ def real_download(self, filename, info_dict):

        success = download_complete = False
        timer = [None]
-
        heartbeat_lock = threading.Lock()
        heartbeat_url = heartbeat_info_dict['url']
-        heartbeat_data = heartbeat_info_dict['data']
+        heartbeat_data = heartbeat_info_dict['data'].encode()
        heartbeat_interval = heartbeat_info_dict.get('interval', 30)
-        self.to_screen('[%s] Heartbeat with %s second interval ...' % (self.FD_NAME, heartbeat_interval))

        def heartbeat():
            try:
-                compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data.encode())
+                compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data)
            except Exception:
                self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)

@ -42,13 +40,16 @@ def heartbeat():
                    timer[0] = threading.Timer(heartbeat_interval, heartbeat)
                    timer[0].start()

+        heartbeat_info_dict['ping']()
+        self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
        try:
            heartbeat()
+            if type(fd).__name__ == 'HlsFD':
+                info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
            success = fd.real_download(filename, info_dict)
        finally:
            if heartbeat_lock:
                with heartbeat_lock:
                    timer[0].cancel()
                    download_complete = True
-
            return success
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@ -164,6 +164,11 @@ class NiconicoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
    _NETRC_MACHINE = 'niconico'

+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
    def _real_initialize(self):
        self._login()

@ -197,46 +202,48 @@ def _get_heartbeat_info(self, info_dict):

        video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')

-        # Get video webpage for API data.
-        webpage, handle = self._download_webpage_handle(
-            'http://www.nicovideo.jp/watch/' + video_id, video_id)
-
-        api_data = self._parse_json(self._html_search_regex(
-            'data-api-data="([^"]+)"', webpage,
-            'API data', default='{}'), video_id)
+        api_data = (
+            info_dict.get('_api_data')
+            or self._parse_json(
+                self._html_search_regex(
+                    'data-api-data="([^"]+)"',
+                    self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+                    'API data', default='{}'),
+                video_id))

        session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
        session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])

-        # ping
-        self._download_json(
-            'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
-            query={'t': try_get(api_data, lambda x: x['video']['dmcInfo']['tracking_id'])},
-            headers={
-                'Origin': 'https://www.nicovideo.jp',
-                'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
-                'X-Frontend-Id': '6',
-                'X-Frontend-Version': '0'
-            })
+        def ping():
+            status = try_get(
+                self._download_json(
+                    'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
+                    query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
+                    note='Acquiring permission for downloading video',
+                    headers=self._API_HEADERS),
+                lambda x: x['meta']['status'])
+            if status != 200:
+                self.report_warning('Failed to acquire permission for playing video. The video may not download.')

        yesno = lambda x: 'yes' if x else 'no'

        # m3u8 (encryption)
-        if 'encryption' in (try_get(api_data, lambda x: x['media']['delivery']['movie']) or {}):
+        if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
            protocol = 'm3u8'
+            encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
            session_api_http_parameters = {
                'parameters': {
                    'hls_parameters': {
                        'encryption': {
-                            'hls_encryption_v1': {
-                                'encrypted_key': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['encrypted_key']),
-                                'key_uri': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['key_uri'])
+                            encryption: {
+                                'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
+                                'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
                            }
                        },
                        'transfer_preset': '',
-                        'use_ssl': yesno(session_api_endpoint['is_ssl']),
-                        'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
-                        'segment_duration': 6000
+                        'use_ssl': yesno(session_api_endpoint['isSsl']),
+                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+                        'segment_duration': 6000,
                    }
                }
            }
@ -310,7 +317,8 @@ def _get_heartbeat_info(self, info_dict):
            'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
            'data': json.dumps(session_response['data']),
            # interval, convert milliseconds to seconds, then halve to make a buffer.
-            'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=2000),
+            'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
+            'ping': ping
        }

        return info_dict, heartbeat_info_dict
@ -400,7 +408,7 @@ def get_video_info_xml(items):
        # Get HTML5 videos info
        quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
        if not quality_info:
-            raise ExtractorError('The video can\'t downloaded.', expected=True)
+            raise ExtractorError('The video can\'t be downloaded', expected=True)

        for audio_quality in quality_info.get('audios') or {}:
            for video_quality in quality_info.get('videos') or {}:
@ -412,9 +420,7 @@ def get_video_info_xml(items):
        # Get flv/swf info
        timestamp = None
        video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
-        if not video_real_url:
-            self.report_warning('Unable to obtain smile video information')
-        else:
+        if video_real_url:
            is_economy = video_real_url.endswith('low')

            if is_economy:
@ -486,9 +492,6 @@ def get_video_info_xml(items):
                    'filesize': filesize
                })

-        if len(formats) == 0:
-            raise ExtractorError('Unable to find video info.')
-
        self._sort_formats(formats)

        # Start extracting information
@ -585,6 +588,7 @@ def get_video_info_xml(items):

        return {
            'id': video_id,
+            '_api_data': api_data,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
@ -619,24 +623,19 @@ class NiconicoPlaylistIE(InfoExtractor):
        'only_matching': True,
    }]

+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
    def _real_extract(self, url):
        list_id = self._match_id(url)
-        webpage = self._download_webpage(url, list_id)
-
-        header = self._parse_json(self._html_search_regex(
-            r'data-common-header="([^"]+)"', webpage,
-            'webpage header'), list_id)
-        frontendId = header.get('initConfig').get('frontendId')
-        frontendVersion = header.get('initConfig').get('frontendVersion')

        def get_page_data(pagenum, pagesize):
            return self._download_json(
                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
                query={'page': 1 + pagenum, 'pageSize': pagesize},
-                headers={
-                    'X-Frontend-Id': frontendId,
-                    'X-Frontend-Version': frontendVersion,
-                }).get('data').get('mylist')
+                headers=self._API_HEADERS).get('data').get('mylist')

        data = get_page_data(0, 1)
        title = data.get('name')
@ -672,20 +671,20 @@ class NiconicoUserIE(InfoExtractor):
        'playlist_mincount': 101,
    }
    _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
-    _api_headers = {
-        'X-Frontend-ID': '6',
-        'X-Frontend-Version': '0',
-        'X-Niconico-Language': 'en-us'
-    }
    _PAGE_SIZE = 100

+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
    def _entries(self, list_id, ):
        total_count = 1
        count = page_num = 0
        while count < total_count:
            json_parsed = self._download_json(
                self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
-                headers=self._api_headers,
+                headers=self._API_HEADERS,
                note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
            if not page_num:
                total_count = int_or_none(json_parsed['data'].get('totalCount'))