mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 03:03:01 +00:00
[udemy] Extract asset captions
This commit is contained in:
parent
0ce76801e8
commit
2fbd86352e
1 changed files with 17 additions and 1 deletions
|
@ -18,6 +18,7 @@
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
@ -105,7 +106,7 @@ def _download_lecture(self, course_id, lecture_id):
|
||||||
% (course_id, lecture_id),
|
% (course_id, lecture_id),
|
||||||
lecture_id, 'Downloading lecture JSON', query={
|
lecture_id, 'Downloading lecture JSON', query={
|
||||||
'fields[lecture]': 'title,description,view_html,asset',
|
'fields[lecture]': 'title,description,view_html,asset',
|
||||||
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,data',
|
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
|
||||||
})
|
})
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
|
@ -308,6 +309,21 @@ def extract_subtitles(track_list):
|
||||||
if isinstance(urls, dict):
|
if isinstance(urls, dict):
|
||||||
extract_formats(urls.get('Video'))
|
extract_formats(urls.get('Video'))
|
||||||
|
|
||||||
|
captions = asset.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for cc in captions:
|
||||||
|
if not isinstance(cc, dict):
|
||||||
|
continue
|
||||||
|
cc_url = cc.get('url')
|
||||||
|
if not cc_url or not isinstance(cc_url, compat_str):
|
||||||
|
continue
|
||||||
|
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
||||||
|
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
||||||
|
else subtitles)
|
||||||
|
sub_dict.setdefault(lang or 'en', []).append({
|
||||||
|
'url': cc_url,
|
||||||
|
})
|
||||||
|
|
||||||
view_html = lecture.get('view_html')
|
view_html = lecture.get('view_html')
|
||||||
if view_html:
|
if view_html:
|
||||||
view_html_urls = set()
|
view_html_urls = set()
|
||||||
|
|
Loading…
Reference in a new issue