mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 03:03:01 +00:00
[arte] Prefer json URLs that contain the video id from the 'vid' parameter in the URL (fixes #7920)
This commit is contained in:
parent
261b4c23c7
commit
a8f1d167f6
1 changed files with 15 additions and 5 deletions
|
@ -68,6 +68,10 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||||
def _extract_url_info(cls, url):
|
def _extract_url_info(cls, url):
|
||||||
mobj = re.match(cls._VALID_URL, url)
|
mobj = re.match(cls._VALID_URL, url)
|
||||||
lang = mobj.group('lang')
|
lang = mobj.group('lang')
|
||||||
|
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
if 'vid' in query:
|
||||||
|
video_id = query['vid'][0]
|
||||||
|
else:
|
||||||
# This is not a real id, it can be for example AJT for the news
|
# This is not a real id, it can be for example AJT for the news
|
||||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
@ -79,9 +83,15 @@ def _real_extract(self, url):
|
||||||
return self._extract_from_webpage(webpage, video_id, lang)
|
return self._extract_from_webpage(webpage, video_id, lang)
|
||||||
|
|
||||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||||
|
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||||
|
ids = (video_id, '')
|
||||||
|
# some pages contain multiple videos (like
|
||||||
|
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||||
|
# so we first try to look for json URLs that contain the video id from
|
||||||
|
# the 'vid' parameter.
|
||||||
|
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||||
json_url = self._html_search_regex(
|
json_url = self._html_search_regex(
|
||||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
patterns, webpage, 'json vp url', default=None)
|
||||||
webpage, 'json vp url', default=None)
|
|
||||||
if not json_url:
|
if not json_url:
|
||||||
iframe_url = self._html_search_regex(
|
iframe_url = self._html_search_regex(
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||||
|
|
Loading…
Reference in a new issue