From fa35cdad02e1c40094f01c9f8e6529da2f021563 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 21 Apr 2014 05:47:52 +0200 Subject: [PATCH] [condenast|generic] Add support for condenast embeds (Fixes #2783) --- youtube_dl/extractor/condenast.py | 30 ++++++++++++++++++------------ youtube_dl/extractor/generic.py | 26 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 91c1c1348..ffbe4903b 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor): 'glamour': 'Glamour', 'wmagazine': 'W Magazine', 'vanityfair': 'Vanity Fair', + 'cnevids': 'Condé Nast', } - _VALID_URL = r'http://(video|www)\.(?P%s)\.com/(?Pwatch|series|video)/(?P.+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'http://(video|www|player)\.(?P%s)\.com/(?Pwatch|series|video|embed)/(?P[^/?#]+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) _TEST = { 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', - 'file': '5171b343c2b4c00dd0c1ccb3.mp4', 'md5': '1921f713ed48aabd715691f774c451f7', 'info_dict': { + 'id': '5171b343c2b4c00dd0c1ccb3', + 'ext': 'mp4', 'title': '3D Printed Speakers Lit With LED', 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', } @@ -55,12 +57,16 @@ def _extract_series(self, url, webpage): entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] return self.playlist_result(entries, playlist_title=title) - def _extract_video(self, webpage): - description = self._html_search_regex([r'
(.+?)
', - r'
(.+?)
', - ], - webpage, 'description', - fatal=False, flags=re.DOTALL) + def _extract_video(self, webpage, url_type): + if url_type != 'embed': + description = self._html_search_regex( + [ + r'
(.+?)
', + r'
(.+?)
', + ], + webpage, 'description', fatal=False, flags=re.DOTALL) + else: + description = None params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, 'player params', flags=re.DOTALL) video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') @@ -99,12 +105,12 @@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) site = mobj.group('site') url_type = mobj.group('type') - id = mobj.group('id') + item_id = mobj.group('id') - self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site]) - webpage = self._download_webpage(url, id) + self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) + webpage = self._download_webpage(url, item_id) if url_type == 'series': return self._extract_series(url, webpage) else: - return self._extract_video(webpage) + return self._extract_video(webpage, url_type) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f9b9d56d2..4b14cc5bf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -239,6 +239,16 @@ class GenericIE(InfoExtractor): 'uploader_id': 'rbctv_2012_4', }, }, + # Condé Nast embed + { + 'url': 'http://www.wired.com/2014/04/honda-asimo/', + 'md5': 'ba0dfe966fa007657bd1443ee672db0f', + 'info_dict': { + 'id': '53501be369702d3275860000', + 'ext': 'mp4', + 'title': 'Honda’s New Asimo Robot Is More Human Than Ever', + } + } ] def report_download_webpage(self, video_id): @@ -485,6 +495,22 @@ def _real_extract(self, url): if mobj: return self.url_result(mobj.group(1), 'BlipTV') + # Look for embedded condenast player + matches = re.findall( + r']*?content="(.*?bandcamp\.com.*?)"', webpage) if mobj is not None: