From 427cd050a3b64319c19e4596d8885378604e388e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 04:11:53 +0700 Subject: [PATCH] [extractor/generic] Improve kaltura embed detection (Closes #9911) --- youtube_dl/extractor/generic.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1592a8a3a..26a7d10be 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -920,6 +920,24 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # Kaltura embedded via quoted entry_id + 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures', + 'info_dict': { + 'id': '0_utuok90b', + 'ext': 'mp4', + 'title': '06_matthew_brender_raj_dutt', + 'timestamp': 1466638791, + 'upload_date': '20160622', + }, + 'add_ie': ['Kaltura'], + 'expected_warnings': [ + 'Could not send HEAD request' + ], + 'params': { + 'skip_download': True, + } + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -1909,7 +1927,7 @@ def _playlist_from_matches(matches, getter=None, ie=None): # Look for Kaltura embeds mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P['\"])wid(?P=q1)\s*:\s*(?P['\"])_?(?P[^'\"]+)(?P=q2),.*?(?P['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P['\"])(?P[^'\"]+)(?P=q4),", webpage) or - re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P["\'])(?P.+?)(?P=q2)', webpage)) + re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?(?P["\'])?entry_?[Ii]d(?P=q2)\s*:\s*(?P["\'])(?P.+?)(?P=q3)', webpage)) if mobj is not None: return self.url_result(smuggle_url( 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),