mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
parent
28163422a6
commit
0c36dc00d7
2 changed files with 25 additions and 33 deletions
|
@ -1392,27 +1392,25 @@ def _twitter_search_player(self, html):
|
||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||||
json_ld_list = list(re.finditer(JSON_LD_RE, html))
|
"""Yield all json ld objects in the html"""
|
||||||
default = kwargs.get('default', NO_DEFAULT)
|
if default is not NO_DEFAULT:
|
||||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
fatal = False
|
||||||
# At the same time `default` may be passed that assumes `fatal=False`
|
for mobj in re.finditer(JSON_LD_RE, html):
|
||||||
# for _search_regex. Let's simulate the same behavior here as well.
|
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
|
||||||
fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
|
for json_ld in variadic(json_ld_item):
|
||||||
json_ld = []
|
if isinstance(json_ld, dict):
|
||||||
for mobj in json_ld_list:
|
yield json_ld
|
||||||
json_ld_item = self._parse_json(
|
|
||||||
mobj.group('json_ld'), video_id, fatal=fatal)
|
def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT):
|
||||||
if not json_ld_item:
|
"""Search for a video in any json ld in the html"""
|
||||||
continue
|
if default is not NO_DEFAULT:
|
||||||
if isinstance(json_ld_item, dict):
|
fatal = False
|
||||||
json_ld.append(json_ld_item)
|
info = self._json_ld(
|
||||||
elif isinstance(json_ld_item, (list, tuple)):
|
list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)),
|
||||||
json_ld.extend(json_ld_item)
|
video_id, fatal=fatal, expected_type=expected_type)
|
||||||
if json_ld:
|
if info:
|
||||||
json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
return info
|
||||||
if json_ld:
|
|
||||||
return json_ld
|
|
||||||
if default is not NO_DEFAULT:
|
if default is not NO_DEFAULT:
|
||||||
return default
|
return default
|
||||||
elif fatal:
|
elif fatal:
|
||||||
|
@ -1500,7 +1498,7 @@ def extract_video_object(e):
|
||||||
assert is_type(e, 'VideoObject')
|
assert is_type(e, 'VideoObject')
|
||||||
author = e.get('author')
|
author = e.get('author')
|
||||||
info.update({
|
info.update({
|
||||||
'url': traverse_obj(e, 'contentUrl', 'embedUrl', expected_type=url_or_none),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
'thumbnails': [{'url': url}
|
'thumbnails': [{'url': url}
|
||||||
|
|
|
@ -1,9 +1,5 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import int_or_none, qualities, traverse_obj, url_or_none
|
||||||
int_or_none,
|
|
||||||
qualities,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class NprIE(InfoExtractor):
|
class NprIE(InfoExtractor):
|
||||||
|
@ -74,10 +70,6 @@ def _real_extract(self, url):
|
||||||
})['list']['story'][0]
|
})['list']['story'][0]
|
||||||
playlist_title = story.get('title', {}).get('$text')
|
playlist_title = story.get('title', {}).get('$text')
|
||||||
|
|
||||||
# Fetch the JSON-LD from the npr page.
|
|
||||||
json_ld = self._search_json_ld(
|
|
||||||
self._download_webpage(url, playlist_id), playlist_id, 'NewsArticle', fatal=False)
|
|
||||||
|
|
||||||
KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
|
KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
|
||||||
quality = qualities(KNOWN_FORMATS)
|
quality = qualities(KNOWN_FORMATS)
|
||||||
|
|
||||||
|
@ -124,8 +116,10 @@ def _real_extract(self, url):
|
||||||
stream_url, stream_id, 'mp4', 'm3u8_native',
|
stream_url, stream_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
if not formats and json_ld.get('url'):
|
if not formats:
|
||||||
formats.extend(self._extract_m3u8_formats(json_ld['url'], media_id, 'mp4', m3u8_id='hls', fatal=False))
|
raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False)
|
||||||
|
m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue