yt-dlp/youtube_dl/extractor/discovery.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    parse_duration,
    parse_iso8601,
)
from ..compat import compat_str


class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
    _TESTS = [{
        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
        'info_dict': {
            'id': '20769',
            'ext': 'mp4',
            'title': 'Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                            ' each other -- to the point of confusing Jamie\'s dog -- and '
                            'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
                            ' back.'),
            'duration': 156,
            'timestamp': 1303099200,
            'upload_date': '20110418',
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
        }
    }, {
        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
        'info_dict': {
            'id': 'mythbusters-the-simpsons',
            'title': 'MythBusters: The Simpsons',
        },
        'playlist_count': 9,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = self._download_json(url + '?flat=1', video_id)

        video_title = info.get('playlist_title') or info.get('video_title')

        entries = [{
            'id': compat_str(video_info['id']),
            'formats': self._extract_m3u8_formats(
                video_info['src'], video_id, ext='mp4',
                note='Download m3u8 information for video %d' % (idx + 1)),
            'title': video_info['title'],
            'description': video_info.get('description'),
            'duration': parse_duration(video_info.get('video_length')),
            'webpage_url': video_info.get('href'),
            'thumbnail': video_info.get('thumbnailURL'),
            'alt_title': video_info.get('secondary_title'),
            'timestamp': parse_iso8601(video_info.get('publishedDate')),
        } for idx, video_info in enumerate(info['playlist'])]

        return self.playlist_result(entries, video_id, video_title)
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`from ..utils import (`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`parse_duration,`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`parse_iso8601,`
			`)`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`from ..compat import compat_str`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00

			`class DiscoveryIE(InfoExtractor):`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]/[a-zA-Z0-9\-]/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`_TESTS = [{`
[discovery] Change default url URL does a redirect from dsc.discovery.com to www.discovery.com This commit fixes the correct URL. 2014-06-24 15:41:53 +00:00			`'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00			`'info_dict': {`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`'id': '20769',`
			`'ext': 'mp4',`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`'title': 'Mission Impossible Outtakes',`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00			`'description': ('Watch Jamie Hyneman and Adam Savage practice being'`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`' each other -- to the point of confusing Jamie\'s dog -- and '`
			`'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'`
			`' back.'),`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00			`'duration': 156,`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`'timestamp': 1303099200,`
			`'upload_date': '20110418',`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00			`},`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`'params': {`
			`'skip_download': True, # requires ffmpeg`
			`}`
			`}, {`
			`'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',`
			`'info_dict': {`
			`'id': 'mythbusters-the-simpsons',`
			`'title': 'MythBusters: The Simpsons',`
			`},`
			`'playlist_count': 9,`
			`}]`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00
			`def _real_extract(self, url):`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`video_id = self._match_id(url)`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`info = self._download_json(url + '?flat=1', video_id)`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`video_title = info.get('playlist_title') or info.get('video_title')`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`entries = [{`
			`'id': compat_str(video_info['id']),`
			`'formats': self._extract_m3u8_formats(`
			`video_info['src'], video_id, ext='mp4',`
			`note='Download m3u8 information for video %d' % (idx + 1)),`
			`'title': video_info['title'],`
			`'description': video_info.get('description'),`
			`'duration': parse_duration(video_info.get('video_length')),`
			`'webpage_url': video_info.get('href'),`
			`'thumbnail': video_info.get('thumbnailURL'),`
			`'alt_title': video_info.get('secondary_title'),`
			`'timestamp': parse_iso8601(video_info.get('publishedDate')),`
			`} for idx, video_info in enumerate(info['playlist'])]`

			`return self.playlist_result(entries, video_id, video_title)`