yt-dlp/yt_dlp/extractor/kanalplay.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    float_or_none,
    srt_subtitles_timecode,
)


class KanalPlayIE(InfoExtractor):
    IE_DESC = 'Kanal 5/9/11 Play'
    _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
        'info_dict': {
            'id': '3270012277',
            'ext': 'flv',
            'title': 'Saknar både dusch och avlopp',
            'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
            'duration': 2636.36,
        },
        'params': {
            # rtmp download
            'skip_download': True,
        }
    }, {
        'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
        'only_matching': True,
    }, {
        'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
        'only_matching': True,
    }]

    def _fix_subtitles(self, subs):
        return '\r\n\r\n'.join(
            '%s\r\n%s --> %s\r\n%s'
            % (
                num,
                srt_subtitles_timecode(item['startMillis'] / 1000.0),
                srt_subtitles_timecode(item['endMillis'] / 1000.0),
                item['text'],
            ) for num, item in enumerate(subs, 1))

    def _get_subtitles(self, channel_id, video_id):
        subs = self._download_json(
            'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
            video_id, 'Downloading subtitles JSON', fatal=False)
        return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        channel_id = mobj.group('channel_id')

        video = self._download_json(
            'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
            video_id)

        reasons_for_no_streams = video.get('reasonsForNoStreams')
        if reasons_for_no_streams:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
                expected=True)

        title = video['title']
        description = video.get('description')
        duration = float_or_none(video.get('length'), 1000)
        thumbnail = video.get('posterUrl')

        stream_base_url = video['streamBaseUrl']

        formats = [{
            'url': stream_base_url,
            'play_path': stream['source'],
            'ext': 'flv',
            'tbr': float_or_none(stream.get('bitrate'), 1000),
            'rtmp_real_time': True,
        } for stream in video['streams']]
        self._sort_formats(formats)

        subtitles = {}
        if video.get('hasSubtitle'):
            subtitles = self.extract_subtitles(channel_id, video_id)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
        }
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
			`from ..utils import (`
			`ExtractorError,`
			`float_or_none,`
[utils,common] Merge format_srt_time and _subtitles_timecode format_srt_time uses a comma as the delimiter between seconds and milliseconds while _subtitles_timecode uses a dot. All .srt examples I found on the Internet uses a comma, so I use a comma in the merged version. See http://matroska.org/technical/specs/subtitles/srt.html and http://devel.aegisub.org/wiki/SubtitleFormats/SRT 2015-05-12 05:04:54 +00:00			`srt_subtitles_timecode,`
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`)`


			`class KanalPlayIE(InfoExtractor):`
			`IE_DESC = 'Kanal 5/9/11 Play'`
			`_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5\|9\|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'`
			`_TESTS = [{`
			`'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',`
			`'info_dict': {`
[kanalplay] Fix test 2015-03-13 19:27:21 +00:00			`'id': '3270012277',`
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`'ext': 'flv',`
			`'title': 'Saknar både dusch och avlopp',`
[kanalplay] Fix test 2015-03-13 19:27:21 +00:00			`'description': 'md5:6023a95832a06059832ae93bc3c7efb7',`
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`'duration': 2636.36,`
			`},`
[kanalplay] Fix test 2015-03-13 19:27:21 +00:00			`'params': {`
			`# rtmp download`
			`'skip_download': True,`
			`}`
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`}, {`
			`'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',`
			`'only_matching': True,`
			`}, {`
			`'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',`
			`'only_matching': True,`
			`}]`

[kanalplay] Extract subtitles 2015-03-13 15:39:29 +00:00			`def _fix_subtitles(self, subs):`
			`return '\r\n\r\n'.join(`
			`'%s\r\n%s --> %s\r\n%s'`
			`% (`
			`num,`
[utils,common] Merge format_srt_time and _subtitles_timecode format_srt_time uses a comma as the delimiter between seconds and milliseconds while _subtitles_timecode uses a dot. All .srt examples I found on the Internet uses a comma, so I use a comma in the merged version. See http://matroska.org/technical/specs/subtitles/srt.html and http://devel.aegisub.org/wiki/SubtitleFormats/SRT 2015-05-12 05:04:54 +00:00			`srt_subtitles_timecode(item['startMillis'] / 1000.0),`
			`srt_subtitles_timecode(item['endMillis'] / 1000.0),`
[kanalplay] Extract subtitles 2015-03-13 15:39:29 +00:00			`item['text'],`
			`) for num, item in enumerate(subs, 1))`

			`def _get_subtitles(self, channel_id, video_id):`
			`subs = self._download_json(`
			`'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),`
			`video_id, 'Downloading subtitles JSON', fatal=False)`
[kanalplay] Use IETF language tag 2016-01-22 19:51:18 +00:00			`return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}`
[kanalplay] Extract subtitles 2015-03-13 15:39:29 +00:00
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`
			`channel_id = mobj.group('channel_id')`

			`video = self._download_json(`
			`'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),`
			`video_id)`

			`reasons_for_no_streams = video.get('reasonsForNoStreams')`
			`if reasons_for_no_streams:`
			`raise ExtractorError(`
			`'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),`
			`expected=True)`

			`title = video['title']`
			`description = video.get('description')`
			`duration = float_or_none(video.get('length'), 1000)`
			`thumbnail = video.get('posterUrl')`

			`stream_base_url = video['streamBaseUrl']`

			`formats = [{`
			`'url': stream_base_url,`
			`'play_path': stream['source'],`
			`'ext': 'flv',`
			`'tbr': float_or_none(stream.get('bitrate'), 1000),`
			`'rtmp_real_time': True,`
			`} for stream in video['streams']]`
			`self._sort_formats(formats)`

[kanalplay] Extract subtitles 2015-03-13 15:39:29 +00:00			`subtitles = {}`
			`if video.get('hasSubtitle'):`
			`subtitles = self.extract_subtitles(channel_id, video_id)`

[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`return {`
			`'id': video_id,`
			`'title': title,`
			`'description': description,`
			`'thumbnail': thumbnail,`
			`'duration': duration,`
			`'formats': formats,`
[kanalplay] Extract subtitles 2015-03-13 15:39:29 +00:00			`'subtitles': subtitles,`
[kanalplay] Add extractor (Closes #5188) 2015-03-13 14:51:44 +00:00			`}`