From b0f001a6cbd220c8b10c0ce359f17072d6347a8f Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 21 Sep 2015 15:52:36 +0100 Subject: [PATCH] [canalc2] fix info extraction --- youtube_dl/extractor/canalc2.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index c4fefefe4..66a9ff093 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -8,34 +8,40 @@ class Canalc2IE(InfoExtractor): IE_NAME = 'canalc2.tv' - _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P\d+)' + _VALID_URL = r'https?://(www\.)?canalc2\.tv/video/(?P\d+)' _TEST = { - 'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', + 'url': 'http://www.canalc2.tv/video/12163', 'md5': '060158428b650f896c542dfbb3d6487f', 'info_dict': { 'id': '12163', 'ext': 'mp4', 'title': 'Terrasses du Numérique' + }, + 'params': { + 'skip_download': True, # Requires rtmpdump } } def _real_extract(self, url): - video_id = re.match(self._VALID_URL, url).group('id') - # We need to set the voir field for getting the file name - url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - file_name = self._search_regex( - r"so\.addVariable\('file','(.*?)'\);", - webpage, 'file name') - video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name + video_url = self._search_regex( + r'jwplayer\("Player"\).setup\({[^}]*file: "([^"]+)"', + webpage, 'video_url') + formats = [{'url': video_url}] + if video_url.startswith('rtmp://'): + rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?Pmp4:.+)$', video_url) + formats[0].update({ + 'app': rtmp.group('app'), + 'play_path': rtmp.group('play_path'), + }) title = self._html_search_regex( - r'class="evenement8">(.*?)', webpage, 'title') + r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') return { 'id': video_id, - 'ext': 'mp4', - 'url': video_url, + 'formats': formats, 'title': title, }