From f0f3a6c99d2834ca8af87be4978c0040c3744628 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 18:07:27 +0200 Subject: [PATCH 1/4] [rtvnhnl] Added new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rtvnhnl.py | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/rtvnhnl.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dad3ec87f..f026a4171 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -491,6 +491,7 @@ from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE +from .rtvnhnl import RtvnhNlIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rtvnhnl.py b/youtube_dl/extractor/rtvnhnl.py new file mode 100644 index 000000000..ce84900a0 --- /dev/null +++ b/youtube_dl/extractor/rtvnhnl.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RtvnhNlIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' + _TEST = { + 'params': { + 'hls_prefer_native': True + }, + + 'url': 'http://www.rtvnh.nl/video/131946', + 'md5': '6e1d0ab079e2a00b6161442d3ceacfc1', + 'info_dict': { + 'id': '131946', + 'ext': 'mp4', + 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', + 'thumbnail': 're:^https?://rtvnh-webfiles\.[^.]+\.amazonaws\.com/data/cache/[0-9]+/basedata/pf_image/[0-9.]+/[0-9\-a-f]+\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + meta = self._parse_json(self._download_webpage('http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) + formats = self._extract_smil_formats('http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) + + for item in meta['source']['fb']: + if item.get('type') == 'hls': + formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4')) + elif item.get('type') == '': + formats.append({'url': item['file']}) + + return { + 'id': video_id, + 'title': meta['title'].strip(), + 'thumbnail': meta['image'], + 'formats': formats + } From fb124e37419668c34b4056575614776b0c64b401 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 20:21:32 +0200 Subject: [PATCH 2/4] [rtvnhnl] Relax the thumbnail check --- youtube_dl/extractor/rtvnhnl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtvnhnl.py b/youtube_dl/extractor/rtvnhnl.py index ce84900a0..0921e2648 100644 --- a/youtube_dl/extractor/rtvnhnl.py +++ b/youtube_dl/extractor/rtvnhnl.py @@ -17,7 +17,7 @@ class RtvnhNlIE(InfoExtractor): 'id': '131946', 'ext': 'mp4', 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', - 'thumbnail': 're:^https?://rtvnh-webfiles\.[^.]+\.amazonaws\.com/data/cache/[0-9]+/basedata/pf_image/[0-9.]+/[0-9\-a-f]+\.jpg$' + 'thumbnail': 're:^http:.*\.jpg$' } } From d9ab5262b137962995af1b444f45f7f32dc33a77 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 20:26:13 +0200 Subject: [PATCH 3/4] [rtvnh] Renamed rtvnhnl -> rtvnh --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{rtvnhnl.py => rtvnh.py} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename youtube_dl/extractor/{rtvnhnl.py => rtvnh.py} (94%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f026a4171..9a6308723 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -491,7 +491,7 @@ from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE -from .rtvnhnl import RtvnhNlIE +from .rtvnh import RTVNHIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rtvnhnl.py b/youtube_dl/extractor/rtvnh.py similarity index 94% rename from youtube_dl/extractor/rtvnhnl.py rename to youtube_dl/extractor/rtvnh.py index 0921e2648..f5c0b94a8 100644 --- a/youtube_dl/extractor/rtvnhnl.py +++ b/youtube_dl/extractor/rtvnh.py @@ -4,7 +4,7 @@ from .common import InfoExtractor -class RtvnhNlIE(InfoExtractor): +class RTVNHIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' _TEST = { 'params': { @@ -17,7 +17,7 @@ class RtvnhNlIE(InfoExtractor): 'id': '131946', 'ext': 'mp4', 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', - 'thumbnail': 're:^http:.*\.jpg$' + 'thumbnail': 're:^https?:.*\.jpg$' } } From d7dbfc7cc18c2d54d7e1752def6c4710c58b49fc Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 20:51:28 +0200 Subject: [PATCH 4/4] Use native HLS implementation by default. --- youtube_dl/extractor/rtvnh.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index f5c0b94a8..2799f01a6 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -7,10 +7,6 @@ class RTVNHIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' _TEST = { - 'params': { - 'hls_prefer_native': True - }, - 'url': 'http://www.rtvnh.nl/video/131946', 'md5': '6e1d0ab079e2a00b6161442d3ceacfc1', 'info_dict': { @@ -28,7 +24,7 @@ def _real_extract(self, url): for item in meta['source']['fb']: if item.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4')) + formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']})