From 03f32a7eadf9d832aef55673edf38023a8daff95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 17 Jul 2015 00:14:38 +0600 Subject: [PATCH] [wnl] Add extractor for omroepwnl playlists --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/npo.py | 45 +++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f4f23521..1d55275dc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -390,6 +390,7 @@ NPORadioIE, NPORadioFragmentIE, TegenlichtVproIE, + WNLIE ) from .nrk import ( NRKIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index cf6a388e5..c6bf7619d 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -46,12 +46,15 @@ class NPOIE(NPOBaseIE): IE_NAME = 'npo' IE_DESC = 'npo.nl and ntr.nl' _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - npo\.nl/(?!live|radio)(?:[^/]+/){2}| - ntr\.nl/(?:[^/]+/){2,}| - omroepwnl\.nl/video/fragment/[^/]+__ + (?: + npo:| + https?:// + (?:www\.)? + (?: + npo\.nl/(?!live|radio)(?:[^/]+/){2}| + ntr\.nl/(?:[^/]+/){2,}| + omroepwnl\.nl/video/fragment/[^/]+__ + ) ) (?P[^/?#]+) ''' @@ -426,3 +429,33 @@ def _real_extract(self, url): info_page = self._download_json( 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) return self._get_info(info_page['mid']) + + +class WNLIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' + + _TEST = { + 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', + 'info_dict': { + 'id': 'vandaag-de-dag-6-mei', + 'title': 'Vandaag de Dag 6 mei', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id, 'NPO') + for video_id, part in re.findall( + r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) + ] + + playlist_title = self._html_search_regex( + r'(?s)]+class="subject"[^>]*>(.+?)', + webpage, 'playlist title') + + return self.playlist_result(entries, playlist_id, playlist_title)