mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
[WDR] extract jsonp-url by parsing data-extension of mediaLink
This commit is contained in:
parent
bec2c14f2c
commit
33a1ff7113
1 changed files with 12 additions and 5 deletions
|
@ -10,6 +10,7 @@
|
|||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
|
@ -21,8 +22,6 @@ class WDRIE(InfoExtractor):
|
|||
_PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
|
||||
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
|
||||
_JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
|
||||
|
@ -102,9 +101,13 @@ def _real_extract(self, url):
|
|||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None)
|
||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||
# for wdrmaus its in a link to the page in a multiline "videoLink"-tag
|
||||
json_metadata = self._html_search_regex(
|
||||
r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
|
||||
webpage, 'media link', default=None, flags=re.MULTILINE)
|
||||
|
||||
if not js_url:
|
||||
if not json_metadata:
|
||||
entries = [
|
||||
self.url_result(page_url + href[0], 'WDR')
|
||||
for href in re.findall(
|
||||
|
@ -117,8 +120,12 @@ def _real_extract(self, url):
|
|||
|
||||
raise ExtractorError('No downloadable streams found', expected=True)
|
||||
|
||||
media_link_obj = self._parse_json(json_metadata, display_id,
|
||||
transform_source=js_to_json)
|
||||
jsonp_url = media_link_obj['mediaObj']['url']
|
||||
|
||||
metadata = self._download_json(
|
||||
js_url, 'metadata', transform_source=strip_jsonp)
|
||||
jsonp_url, 'metadata', transform_source=strip_jsonp)
|
||||
|
||||
metadata_tracker_data = metadata['trackerData']
|
||||
metadata_media_resource = metadata['mediaResource']
|
||||
|
|
Loading…
Reference in a new issue