From e186a9ec0394400a6996f98d197d0d14937a60c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 22:04:29 +0700 Subject: [PATCH] [videa] Add support for videa embeds --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ youtube_dl/extractor/videa.py | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 79d10a1d1..8503966a7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -76,6 +76,7 @@ from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE +from .videa import VideaIE class GenericIE(InfoExtractor): @@ -1422,6 +1423,15 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Videa embeds + 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html', + 'info_dict': { + 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style', + 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum', + }, + 'playlist_mincount': 2, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2358,6 +2368,11 @@ def _playlist_from_matches(matches, getter=None, ie=None): if dbtv_urls: return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + # Look for Videa embeds + videa_urls = VideaIE._extract_urls(webpage) + if videa_urls: + return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 039add86b..311df58f4 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -43,6 +45,12 @@ class VideaIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r']+src=(["\'])(?P(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url)