From 2a7c6befc16f72df5368cb4adccd1cd84fd432d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 May 2018 01:09:44 +0700 Subject: [PATCH] [indavideo] Fix extraction (closes #11221) --- youtube_dl/extractor/indavideo.py | 48 +++++++++++++++++++------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 15b766fb2..2946c7b84 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -15,7 +15,7 @@ class IndavideoEmbedIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P[\da-f]+)' _TESTS = [{ 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/', - 'md5': 'f79b009c66194acacd40712a6778acfa', + 'md5': 'c8a507a1c7410685f83a06eaeeaafeab', 'info_dict': { 'id': '1837039', 'ext': 'mp4', @@ -47,7 +47,14 @@ def _real_extract(self, url): title = video['title'] - video_urls = video.get('video_files', []) + video_urls = [] + + video_files = video.get('video_files') + if isinstance(video_files, list): + video_urls.extend(video_files) + elif isinstance(video_files, dict): + video_urls.extend(video_files.values()) + video_file = video.get('video_file') if video: video_urls.append(video_file) @@ -61,9 +68,22 @@ def _real_extract(self, url): video_urls.append(flv_url) filesh = video.get('filesh') - formats = [ - self.video_url_to_format(video_url, filesh) - for video_url in video_urls] + + formats = [] + for video_url in video_urls: + height = int_or_none(self._search_regex( + r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)) + if filesh: + if not height: + continue + token = filesh.get(compat_str(height)) + if token is None: + continue + video_url = update_url_query(video_url, {'token': token}) + formats.append({ + 'url': video_url, + 'height': height, + }) self._sort_formats(formats) timestamp = video.get('date') @@ -91,18 +111,6 @@ def _real_extract(self, url): 'formats': formats, } - def video_url_to_format(self, video_url, filesh): - height = int_or_none(self._search_regex( - r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)) - if height and filesh: - token = filesh.get(compat_str(height)) - if token is not None: - video_url = update_url_query(video_url, {'token': token}) - return { - 'url': video_url, - 'height': height, - } - class IndavideoIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P[^/#?]+)' @@ -122,7 +130,7 @@ class IndavideoIE(InfoExtractor): 'upload_date': '20140127', 'duration': 7, 'age_limit': 0, - 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], + 'tags': list, }, }, { 'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz', @@ -146,7 +154,9 @@ def _real_extract(self, url): webpage = self._download_webpage(url, display_id) embed_url = self._search_regex( - r']+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url') + (r']+\bsrc=(["\'])(?P(?:https?:)?//embed\.indavideo\.hu/player/video/.+?)\1', + r']+rel="video_src"[^>]+href="(?P.+?)"'), + webpage, 'embed url', group='url') return { '_type': 'url_transparent',