mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-03 23:25:06 +00:00
[extractor/common] Recognize src
attribute from HTML5 media elements (#3899)
Authored by: Lesmiscore
This commit is contained in:
parent
ee27297f82
commit
222a230871
2 changed files with 23 additions and 2 deletions
|
@ -502,6 +502,24 @@ def test_parse_html5_media_entries(self):
|
||||||
}],
|
}],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# from https://0000.studio/
|
||||||
|
# with type attribute but without extension in URL
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._parse_html5_media_entries(
|
||||||
|
'https://0000.studio',
|
||||||
|
r'''
|
||||||
|
<video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
|
||||||
|
controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
|
||||||
|
</video>
|
||||||
|
''', None)[0],
|
||||||
|
{
|
||||||
|
'formats': [{
|
||||||
|
'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
|
||||||
def test_extract_jwplayer_data_realworld(self):
|
def test_extract_jwplayer_data_realworld(self):
|
||||||
# from http://www.suffolk.edu/sjc/
|
# from http://www.suffolk.edu/sjc/
|
||||||
expect_dict(
|
expect_dict(
|
||||||
|
|
|
@ -3197,7 +3197,8 @@ def parse_content_type(content_type):
|
||||||
return f
|
return f
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _media_formats(src, cur_media_type, type_info={}):
|
def _media_formats(src, cur_media_type, type_info=None):
|
||||||
|
type_info = type_info or {}
|
||||||
full_url = absolute_url(src)
|
full_url = absolute_url(src)
|
||||||
ext = type_info.get('ext') or determine_ext(full_url)
|
ext = type_info.get('ext') or determine_ext(full_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
|
@ -3215,6 +3216,7 @@ def _media_formats(src, cur_media_type, type_info={}):
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': full_url,
|
'url': full_url,
|
||||||
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||||
|
'ext': ext,
|
||||||
}]
|
}]
|
||||||
return is_plain_url, formats
|
return is_plain_url, formats
|
||||||
|
|
||||||
|
@ -3241,7 +3243,8 @@ def _media_formats(src, cur_media_type, type_info={}):
|
||||||
media_attributes = extract_attributes(media_tag)
|
media_attributes = extract_attributes(media_tag)
|
||||||
src = strip_or_none(media_attributes.get('src'))
|
src = strip_or_none(media_attributes.get('src'))
|
||||||
if src:
|
if src:
|
||||||
_, formats = _media_formats(src, media_type)
|
f = parse_content_type(media_attributes.get('type'))
|
||||||
|
_, formats = _media_formats(src, media_type, f)
|
||||||
media_info['formats'].extend(formats)
|
media_info['formats'].extend(formats)
|
||||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||||
if media_content:
|
if media_content:
|
||||||
|
|
Loading…
Reference in a new issue