From 6a5af6acb9131d702b0d206242053b202440dbb9 Mon Sep 17 00:00:00 2001 From: Mats Date: Thu, 25 Sep 2014 16:25:53 +0200 Subject: [PATCH 1/2] [golem] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/golem.py | 131 +++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 youtube_dl/extractor/golem.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1f1fc0eb2..71fe38ca0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -135,6 +135,7 @@ from .gdcvault import GDCVaultIE from .generic import GenericIE from .godtube import GodTubeIE +from .golem import GolemIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py new file mode 100644 index 000000000..afb620b1c --- /dev/null +++ b/youtube_dl/extractor/golem.py @@ -0,0 +1,131 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import compat_urlparse + + +class GolemIE(InfoExtractor): + _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P.+?)/' + _TEST = { + 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', + 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', + 'info_dict': { + 'id': '14095', + 'format_id': 'high', + 'ext': 'mp4', + 'title': 'iPhone 6 und 6 Plus - Test', + 'duration': 300, + 'filesize': 65309548, + } + } + + _CONFIG = 'https://video.golem.de/xml/{}.xml' + _PREFIX = 'http://video.golem.de' + + def _warn(self, fmt, *args): + self.report_warning(fmt.format(*args), self._id) + + def _extract_format(self, elem): + format_id = elem.tag + + url = elem.findtext('./url') + if url == '': + self._warn("{}: url: empty, skipping", format_id) + return None + + fmt = { + 'format_id': format_id, + 'url': compat_urlparse.urljoin(self._PREFIX, url) + } + + try: + _, ext = elem.findtext('./filename', '').rsplit('.', 1) + except ValueError: + self._warn('{}: ext: missing extension', format_id) + else: + fmt['ext'] = ext + + filesize = elem.findtext('./filesize') + if filesize is not None: + try: + fmt['filesize'] = int(filesize) + except ValueError as e: + self._warn('{}: filesize: {}', format_id, e) + + width = elem.get('width') + if width is not None: + try: + fmt['width'] = int(width) + except ValueError as e: + self._warn('{}: width: {}', format_id, e) + + height = elem.get('height') + if height is not None: + try: + fmt['height'] = int(height) + except ValueError as e: + self._warn('{}: height: {}', format_id, e) + + return fmt + + def _extract_thumbnail(self, elem): + url = elem.findtext('./url') + if url == '': + return None + thumb = { + 'url': compat_urlparse.urljoin(self._PREFIX, url) + } + + width = elem.get('width') + if width is not None: + try: + thumb['width'] = int(width) + except ValueError as e: + self._warn('thumbnail: width: {}', e) + + height = elem.get('height') + if height is not None: + try: + thumb['height'] = int(height) + except ValueError as e: + self._warn('thumbnail: height: {}', e) + + return thumb + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + self._id = mobj.group('id') + + config = self._download_xml(self._CONFIG.format(self._id), self._id) + + info = { + 'id': self._id, + 'title': config.findtext('./title', 'golem') + } + + formats = [] + for e in config.findall('./*[url]'): + fmt = self._extract_format(e) + if fmt is not None: + formats.append(fmt) + self._sort_formats(formats) + info['formats'] = formats + + thumbnails = [] + for e in config.findall('.//teaser[url]'): + thumb = self._extract_thumbnail(e) + if thumb is not None: + thumbnails.append(thumb) + info['thumbnails'] = thumbnails + + playtime = config.findtext('./playtime') + if playtime is not None: + try: + info['duration'] = round(float(playtime)) + except ValueError as e: + self._warn('duration: {}', e) + + return info From 70752ccefd2dcb54d131644aea38c324c81ff168 Mon Sep 17 00:00:00 2001 From: Mats Date: Sat, 27 Sep 2014 19:35:55 +0200 Subject: [PATCH 2/2] [golem] Don't omit positional argument specifiers Required by Python 2.6. --- youtube_dl/extractor/golem.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index afb620b1c..6a64b5d95 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -22,7 +22,7 @@ class GolemIE(InfoExtractor): } } - _CONFIG = 'https://video.golem.de/xml/{}.xml' + _CONFIG = 'https://video.golem.de/xml/{0}.xml' _PREFIX = 'http://video.golem.de' def _warn(self, fmt, *args): @@ -33,7 +33,7 @@ def _extract_format(self, elem): url = elem.findtext('./url') if url == '': - self._warn("{}: url: empty, skipping", format_id) + self._warn("{0}: url: empty, skipping", format_id) return None fmt = { @@ -44,7 +44,7 @@ def _extract_format(self, elem): try: _, ext = elem.findtext('./filename', '').rsplit('.', 1) except ValueError: - self._warn('{}: ext: missing extension', format_id) + self._warn('{0}: ext: missing extension', format_id) else: fmt['ext'] = ext @@ -53,21 +53,21 @@ def _extract_format(self, elem): try: fmt['filesize'] = int(filesize) except ValueError as e: - self._warn('{}: filesize: {}', format_id, e) + self._warn('{0}: filesize: {1}', format_id, e) width = elem.get('width') if width is not None: try: fmt['width'] = int(width) except ValueError as e: - self._warn('{}: width: {}', format_id, e) + self._warn('{0}: width: {1}', format_id, e) height = elem.get('height') if height is not None: try: fmt['height'] = int(height) except ValueError as e: - self._warn('{}: height: {}', format_id, e) + self._warn('{0}: height: {1}', format_id, e) return fmt @@ -84,14 +84,14 @@ def _extract_thumbnail(self, elem): try: thumb['width'] = int(width) except ValueError as e: - self._warn('thumbnail: width: {}', e) + self._warn('thumbnail: width: {0}', e) height = elem.get('height') if height is not None: try: thumb['height'] = int(height) except ValueError as e: - self._warn('thumbnail: height: {}', e) + self._warn('thumbnail: height: {0}', e) return thumb @@ -126,6 +126,6 @@ def _real_extract(self, url): try: info['duration'] = round(float(playtime)) except ValueError as e: - self._warn('duration: {}', e) + self._warn('duration: {0}', e) return info