From e746021577f27fbc458daa0797f81a9d57c51a2c Mon Sep 17 00:00:00 2001 From: Bagira Date: Thu, 3 Nov 2016 12:53:45 +0100 Subject: [PATCH] [videa] Add extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/videa.py | 98 ++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/videa.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 811db6219..9898b9803 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1064,6 +1064,10 @@ from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE +from .videa import ( + VideaIE, + VideaEmbedIE, +) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py new file mode 100644 index 000000000..3d2e7e31c --- /dev/null +++ b/youtube_dl/extractor/videa.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + xpath_element, + xpath_text, + xpath_attr, + urlencode_postdata, + unescapeHTML, +) + + +class VideaIE(InfoExtractor): + _VALID_URL = r'https?://(?:.+?\.)?videa\.hu/videok/(?P[^#?]+)' + _TESTS = [{ + 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', + 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', + 'info_dict': { + 'id': '8YfIAjxwWGwT8HVQ', + 'display_id': '8YfIAjxwWGwT8HVQ', + 'ext': 'mp4', + 'title': 'Az őrült kígyász 285 kígyót enged szabadon', + 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', + 'duration': 21, + }, + }, { + 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._download_json("http://videa.hu/oembed/?" + urlencode_postdata({"url": url.split('?')[0], "format": "json"}), video_id) + video_url = self._search_regex( + r'src="(.+?)"', video_data.get('html'), 'embed url') + + return { + '_type': 'url_transparent', + 'url': video_url, + 'ie_key': 'VideaEmbed' + } + +class VideaEmbedIE(InfoExtractor): + _VALID_URL = r'(?Phttps?:)(?P//(?:.+?\.)?videa\.hu)/player(?:\?v=|/v/)(?P[^/#?]+)'; + _TESTS = [{ + 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', + 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', + 'info_dict': { + 'id': '8YfIAjxwWGwT8HVQ', + 'ext': 'mp4', + 'title': 'Az őrült kígyász 285 kígyót enged szabadon', + 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', + 'duration': 21 + }, + }, { + 'url': 'http://videa.hu/player?v=jAHDWfWSJH5XuFhH', + 'only_matching': True, + }]; + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//(?:.+?\.)?videa\.hu/player(?:\?v=|/v/)[^/#?]+)\1', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + protocol, base_url, display_id = re.search(self._VALID_URL, url).groups() + xml = self._download_xml(protocol + base_url + "/flvplayer_get_video_xml.php?v=" + display_id, display_id) + + medias = [] + + for xml_media in xml.findall('video') + xml.findall('audio'): + media_url = protocol + xpath_attr(xml_media, 'versions/version', 'video_url') + media = { + 'id': display_id, + 'ext': 'mp4', + 'title': xpath_text(xml_media, 'title', 'title', True), + 'duration': parse_duration(xpath_text(xml_media, 'duration')), + 'thumbnail': protocol + xpath_text(xml_media, 'still', 'still', True), + 'url': media_url, + } + medias.append(media) + + if len(medias) > 1: + self._downloader.report_warning( + 'found multiple medias; please ' + 'report this with the video URL to http://yt-dl.org/bug') + if not medias: + raise ExtractorError('No media entries found') + return medias[0]