mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-17 01:25:11 +00:00
[crunchyroll] Convert to new subtitles system
This commit is contained in:
parent
a504ced097
commit
b5857f62e2
1 changed files with 35 additions and 31 deletions
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
@ -25,10 +25,9 @@
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
inc,
|
inc,
|
||||||
)
|
)
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
class CrunchyrollIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
|
@ -187,6 +186,38 @@ def ass_bool(strvalue):
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_id, webpage):
|
||||||
|
subtitles = {}
|
||||||
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||||
|
sub_page = self._download_webpage(
|
||||||
|
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||||
|
video_id, note='Downloading subtitles for ' + sub_name)
|
||||||
|
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||||
|
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||||
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||||
|
if not id or not iv or not data:
|
||||||
|
continue
|
||||||
|
id = int(id)
|
||||||
|
iv = base64.b64decode(iv)
|
||||||
|
data = base64.b64decode(data)
|
||||||
|
|
||||||
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
|
if not lang_code:
|
||||||
|
continue
|
||||||
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||||
|
subtitles[lang_code] = [
|
||||||
|
{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': self._convert_subtitles_to_srt(sub_root),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._convert_subtitles_to_ass(sub_root),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
@ -249,34 +280,7 @@ def _real_extract(self, url):
|
||||||
'format_id': video_format,
|
'format_id': video_format,
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
|
||||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
|
||||||
sub_page = self._download_webpage(
|
|
||||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
|
||||||
video_id, note='Downloading subtitles for ' + sub_name)
|
|
||||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
|
||||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
|
||||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
|
||||||
if not id or not iv or not data:
|
|
||||||
continue
|
|
||||||
id = int(id)
|
|
||||||
iv = base64.b64decode(iv)
|
|
||||||
data = base64.b64decode(data)
|
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
|
||||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
|
||||||
if not lang_code:
|
|
||||||
continue
|
|
||||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
|
||||||
if sub_format == 'ass':
|
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
|
||||||
else:
|
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
Loading…
Reference in a new issue