mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 06:21:01 +00:00
[bilibili] Add anthology support
Closes: #118 Co-authored by: animelover1984
This commit is contained in:
parent
beb4b92a66
commit
adc74b3c6d
1 changed files with 49 additions and 5 deletions
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
@ -15,6 +16,7 @@
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
@ -113,6 +115,13 @@ class BiliBiliIE(InfoExtractor):
|
||||||
# new BV video id format
|
# new BV video id format
|
||||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Anthology
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1bK411W797',
|
||||||
|
},
|
||||||
|
'playlist_count': 17,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||||
|
@ -139,9 +148,19 @@ def _real_extract(self, url):
|
||||||
page_id = mobj.group('page')
|
page_id = mobj.group('page')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||||
|
# If the video has no page argument, check to see if it's an anthology
|
||||||
|
if page_id is None:
|
||||||
|
if not self._downloader.params.get('noplaylist'):
|
||||||
|
r = self._extract_anthology_entries(bv_id, video_id, webpage)
|
||||||
|
if r is not None:
|
||||||
|
self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
|
||||||
|
return r
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
if 'anime/' not in url:
|
if 'anime/' not in url:
|
||||||
cid = self._search_regex(
|
cid = self._search_regex(
|
||||||
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
|
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid',
|
||||||
default=None
|
default=None
|
||||||
) or self._search_regex(
|
) or self._search_regex(
|
||||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||||
|
@ -224,7 +243,18 @@ def _real_extract(self, url):
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||||
group='title') + ('_p' + str(page_id) if page_id is not None else '')
|
group='title')
|
||||||
|
|
||||||
|
# Get part title for anthologies
|
||||||
|
if page_id is not None:
|
||||||
|
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
|
||||||
|
part_title = try_get(
|
||||||
|
self._download_json(
|
||||||
|
"https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
|
||||||
|
video_id, note='Extracting videos in anthology'),
|
||||||
|
lambda x: x['data'][int(page_id) - 1]['part'])
|
||||||
|
title = part_title or title
|
||||||
|
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||||
|
@ -234,7 +264,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
# TODO 'view_count' requires deobfuscating Javascript
|
# TODO 'view_count' requires deobfuscating Javascript
|
||||||
info = {
|
info = {
|
||||||
'id': str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
|
'id': compat_str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
|
||||||
'cid': cid,
|
'cid': cid,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -300,7 +330,7 @@ def get_comments():
|
||||||
|
|
||||||
global_info = {
|
global_info = {
|
||||||
'_type': 'multi_video',
|
'_type': 'multi_video',
|
||||||
'id': video_id,
|
'id': compat_str(video_id),
|
||||||
'bv_id': bv_id,
|
'bv_id': bv_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -312,6 +342,20 @@ def get_comments():
|
||||||
|
|
||||||
return global_info
|
return global_info
|
||||||
|
|
||||||
|
def _extract_anthology_entries(self, bv_id, video_id, webpage):
|
||||||
|
title = self._html_search_regex(
|
||||||
|
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||||
|
group='title')
|
||||||
|
json_data = self._download_json(
|
||||||
|
"https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
|
||||||
|
video_id, note='Extracting videos in anthology')
|
||||||
|
|
||||||
|
if len(json_data['data']) > 1:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
|
||||||
|
getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
|
||||||
|
|
||||||
def _get_video_id_set(self, id, is_bv):
|
def _get_video_id_set(self, id, is_bv):
|
||||||
query = {'bvid': id} if is_bv else {'aid': id}
|
query = {'bvid': id} if is_bv else {'aid': id}
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
|
@ -506,7 +550,7 @@ def _get_n_results(self, query, n):
|
||||||
|
|
||||||
videos = data['result']
|
videos = data['result']
|
||||||
for video in videos:
|
for video in videos:
|
||||||
e = self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
|
e = self.url_result(video['arcurl'], 'BiliBili', compat_str(video['aid']))
|
||||||
entries.append(e)
|
entries.append(e)
|
||||||
|
|
||||||
if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
|
if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
|
||||||
|
|
Loading…
Reference in a new issue