mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
[ie/BiliBiliBangumi] Fix extractors (#7337)
- Overhaul BiliBiliBangumi extractor for the site's new API - Add BiliBiliBangumiSeason extractor - Refactor BiliBiliBangumiMedia extractor Closes #6701, Closes #7400 Authored by: GD-Slime
This commit is contained in:
parent
92315c0377
commit
bdd0b75e3f
2 changed files with 85 additions and 45 deletions
|
@ -214,6 +214,7 @@
|
||||||
from .bilibili import (
|
from .bilibili import (
|
||||||
BiliBiliIE,
|
BiliBiliIE,
|
||||||
BiliBiliBangumiIE,
|
BiliBiliBangumiIE,
|
||||||
|
BiliBiliBangumiSeasonIE,
|
||||||
BiliBiliBangumiMediaIE,
|
BiliBiliBangumiMediaIE,
|
||||||
BiliBiliSearchIE,
|
BiliBiliSearchIE,
|
||||||
BilibiliCategoryIE,
|
BilibiliCategoryIE,
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
@ -135,6 +136,17 @@ def _get_all_children(self, reply):
|
||||||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||||
yield from children
|
yield from children
|
||||||
|
|
||||||
|
def _get_episodes_from_season(self, ss_id, url):
|
||||||
|
season_info = self._download_json(
|
||||||
|
'https://api.bilibili.com/pgc/web/season/section', ss_id,
|
||||||
|
note='Downloading season info', query={'season_id': ss_id},
|
||||||
|
headers={'Referer': url, **self.geo_verification_headers()})
|
||||||
|
|
||||||
|
for entry in traverse_obj(season_info, (
|
||||||
|
'result', 'main_section', 'episodes',
|
||||||
|
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||||
|
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliIE(BilibiliBaseIE):
|
class BiliBiliIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||||
|
@ -403,76 +415,93 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ss897',
|
'id': '267851',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'series': '神的记事本',
|
'series': '鬼灭之刃',
|
||||||
'season': '神的记事本',
|
'series_id': '4358',
|
||||||
'season_id': 897,
|
'season': '鬼灭之刃',
|
||||||
|
'season_id': '26801',
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'episode': '你与旅行包',
|
'episode': '残酷',
|
||||||
'episode_number': 2,
|
'episode_id': '267851',
|
||||||
'title': '神的记事本:第2话 你与旅行包',
|
'episode_number': 1,
|
||||||
'duration': 1428.487,
|
'title': '1 残酷',
|
||||||
'timestamp': 1310809380,
|
'duration': 1425.256,
|
||||||
'upload_date': '20110716',
|
'timestamp': 1554566400,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'upload_date': '20190406',
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||||
},
|
},
|
||||||
}, {
|
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
episode_id = video_id[2:]
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if '您所在的地区无法观看本片' in webpage:
|
if '您所在的地区无法观看本片' in webpage:
|
||||||
raise GeoRestrictedError('This video is restricted')
|
raise GeoRestrictedError('This video is restricted')
|
||||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||||
or '正在观看预览,大会员免费看全片' in webpage):
|
|
||||||
self.raise_login_required('This video is for premium members only')
|
self.raise_login_required('This video is for premium members only')
|
||||||
|
|
||||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||||
|
play_info = self._download_json(
|
||||||
|
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||||
|
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||||
|
headers=headers)
|
||||||
|
premium_only = play_info.get('code') == -10403
|
||||||
|
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||||
|
|
||||||
formats = self.extract_formats(play_info)
|
formats = self.extract_formats(play_info)
|
||||||
if (not formats and '成为大会员抢先看' in webpage
|
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||||
and play_info.get('durl') and not play_info.get('dash')):
|
|
||||||
self.raise_login_required('This video is for premium members only')
|
self.raise_login_required('This video is for premium members only')
|
||||||
|
|
||||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
bangumi_info = self._download_json(
|
||||||
|
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||||
|
query={'ep_id': episode_id}, headers=headers)['result']
|
||||||
|
|
||||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
episode_number, episode_info = next((
|
||||||
|
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||||
|
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||||
|
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||||
|
|
||||||
|
season_id = bangumi_info.get('season_id')
|
||||||
season_number = season_id and next((
|
season_number = season_id and next((
|
||||||
idx + 1 for idx, e in enumerate(
|
idx + 1 for idx, e in enumerate(
|
||||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||||
if e.get('season_id') == season_id
|
if e.get('season_id') == season_id
|
||||||
), None)
|
), None)
|
||||||
|
|
||||||
|
aid = episode_info.get('aid')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': traverse_obj(initial_state, 'h1Title'),
|
**traverse_obj(bangumi_info, {
|
||||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
'series': ('series', 'series_title', {str}),
|
||||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
'series_id': ('series', 'series_id', {str_or_none}),
|
||||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
'thumbnail': ('square_cover', {url_or_none}),
|
||||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
}),
|
||||||
'season_id': season_id,
|
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||||
|
'episode': episode_info.get('long_title'),
|
||||||
|
'episode_id': episode_id,
|
||||||
|
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||||
|
'season_id': str_or_none(season_id),
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
|
||||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
'subtitles': self.extract_subtitles(
|
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
'__post_extractor': self.extract_comments(aid),
|
||||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
'http_headers': headers,
|
||||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||||
|
@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
media_id = self._match_id(url)
|
media_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, media_id)
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
ss_id = self._search_json(
|
||||||
|
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||||
|
|
||||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||||
episode_list = self._download_json(
|
|
||||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
|
||||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
|
||||||
note='Downloading season info')['result']['main_section']['episodes']
|
|
||||||
|
|
||||||
return self.playlist_result((
|
|
||||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||||
for entry in episode_list), media_id)
|
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '26801'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 26
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
ss_id = self._match_id(url)
|
||||||
|
|
||||||
|
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||||
|
|
||||||
|
|
||||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||||
|
|
Loading…
Reference in a new issue