yt-dlp/youtube_dl/extractor/zattoo.py

# coding: utf-8
from __future__ import unicode_literals

import re
from uuid import uuid4

from .common import InfoExtractor
from ..compat import (
    compat_HTTPError,
    compat_str,
)
from ..utils import (
    ExtractorError,
    int_or_none,
    try_get,
    urlencode_postdata,
)


class ZattooBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'zattoo'
    _HOST_URL = 'https://zattoo.com'

    _power_guide_hash = None

    def _login(self):
        username, password = self._get_login_info()
        if not username or not password:
            self.raise_login_required(
                'A valid %s account is needed to access this media.'
                % self._NETRC_MACHINE)

        try:
            data = self._download_json(
                '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
                data=urlencode_postdata({
                    'login': username,
                    'password': password,
                    'remember': 'true',
                }), headers={
                    'Referer': '%s/login' % self._HOST_URL,
                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                })
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                raise ExtractorError(
                    'Unable to login: incorrect username and/or password',
                    expected=True)
            raise

        self._power_guide_hash = data['session']['power_guide_hash']

    def _real_initialize(self):
        webpage = self._download_webpage(
            self._HOST_URL, None, 'Downloading app token')
        app_token = self._html_search_regex(
            r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
            webpage, 'app token', group='token')
        app_version = self._html_search_regex(
            r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')

        # Will setup appropriate cookies
        self._request_webpage(
            '%s/zapi/v2/session/hello' % self._HOST_URL, None,
            'Opening session', data=urlencode_postdata({
                'client_app_token': app_token,
                'uuid': compat_str(uuid4()),
                'lang': 'en',
                'app_version': app_version,
                'format': 'json',
            }))

        self._login()

    def _extract_cid(self, video_id, channel_name):
        channel_groups = self._download_json(
            '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
                                               self._power_guide_hash),
            video_id, 'Downloading channel list',
            query={'details': False})['channel_groups']
        channel_list = []
        for chgrp in channel_groups:
            channel_list.extend(chgrp['channels'])
        try:
            return next(
                chan['cid'] for chan in channel_list
                if chan.get('cid') and (
                    chan.get('display_alias') == channel_name or
                    chan.get('cid') == channel_name))
        except StopIteration:
            raise ExtractorError('Could not extract channel id')

    def _extract_cid_and_video_info(self, video_id):
        data = self._download_json(
            '%s/zapi/program/details' % self._HOST_URL,
            video_id,
            'Downloading video information',
            query={
                'program_id': video_id,
                'complete': True
            })

        p = data['program']
        cid = p['cid']

        info_dict = {
            'id': video_id,
            'title': p.get('title') or p['episode_title'],
            'description': p.get('description'),
            'thumbnail': p.get('image_url'),
            'creator': p.get('channel_name'),
            'episode': p.get('episode_title'),
            'episode_number': int_or_none(p.get('episode_number')),
            'season_number': int_or_none(p.get('season_number')),
            'release_year': int_or_none(p.get('year')),
            'categories': try_get(p, lambda x: x['categories'], list),
        }

        return cid, info_dict

    def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
        postdata_common = {
            'https_watch_urls': True,
        }

        if is_live:
            postdata_common.update({'timeshift': 10800})
            url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
        elif record_id:
            url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
        else:
            url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)

        formats = []
        for stream_type in ('dash', 'hls', 'hls5', 'hds'):
            postdata = postdata_common.copy()
            postdata['stream_type'] = stream_type

            data = self._download_json(
                url, video_id, 'Downloading %s formats' % stream_type.upper(),
                data=urlencode_postdata(postdata), fatal=False)
            if not data:
                continue

            watch_urls = try_get(
                data, lambda x: x['stream']['watch_urls'], list)
            if not watch_urls:
                continue

            for watch in watch_urls:
                if not isinstance(watch, dict):
                    continue
                watch_url = watch.get('url')
                if not watch_url or not isinstance(watch_url, compat_str):
                    continue
                format_id_list = [stream_type]
                maxrate = watch.get('maxrate')
                if maxrate:
                    format_id_list.append(compat_str(maxrate))
                audio_channel = watch.get('audio_channel')
                if audio_channel:
                    format_id_list.append(compat_str(audio_channel))
                preference = 1 if audio_channel == 'A' else None
                format_id = '-'.join(format_id_list)
                if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
                    this_formats = self._extract_mpd_formats(
                        watch_url, video_id, mpd_id=format_id, fatal=False)
                elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
                    this_formats = self._extract_m3u8_formats(
                        watch_url, video_id, 'mp4',
                        entry_protocol='m3u8_native', m3u8_id=format_id,
                        fatal=False)
                elif stream_type == 'hds':
                    this_formats = self._extract_f4m_formats(
                        watch_url, video_id, f4m_id=format_id, fatal=False)
                elif stream_type == 'smooth_playready':
                    this_formats = self._extract_ism_formats(
                        watch_url, video_id, ism_id=format_id, fatal=False)
                else:
                    assert False
                for this_format in this_formats:
                    this_format['preference'] = preference
                formats.extend(this_formats)
        self._sort_formats(formats)
        return formats

    def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
        if is_live:
            cid = self._extract_cid(video_id, channel_name)
            info_dict = {
                'id': channel_name,
                'title': self._live_title(channel_name),
                'is_live': True,
            }
        else:
            cid, info_dict = self._extract_cid_and_video_info(video_id)
        formats = self._extract_formats(
            cid, video_id, record_id=record_id, is_live=is_live)
        info_dict['formats'] = formats
        return info_dict


class QuicklineBaseIE(ZattooBaseIE):
    _NETRC_MACHINE = 'quickline'
    _HOST_URL = 'https://mobiltv.quickline.com'


class QuicklineIE(QuicklineBaseIE):
    _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'

    _TEST = {
        'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
        'only_matching': True,
    }

    def _real_extract(self, url):
        channel_name, video_id = re.match(self._VALID_URL, url).groups()
        return self._extract_video(channel_name, video_id)


class QuicklineLiveIE(QuicklineBaseIE):
    _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'

    _TEST = {
        'url': 'https://mobiltv.quickline.com/watch/srf1',
        'only_matching': True,
    }

    @classmethod
    def suitable(cls, url):
        return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)

    def _real_extract(self, url):
        channel_name = video_id = self._match_id(url)
        return self._extract_video(channel_name, video_id, is_live=True)


class ZattooIE(ZattooBaseIE):
    _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'

    # Since regular videos are only available for 7 days and recorded videos
    # are only available for a specific user, we cannot have detailed tests.
    _TESTS = [{
        'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
        'only_matching': True,
    }, {
        'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
        return self._extract_video(channel_name, video_id, record_id)


class ZattooLiveIE(ZattooBaseIE):
    _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'

    _TEST = {
        'url': 'https://zattoo.com/watch/srf1',
        'only_matching': True,
    }

    @classmethod
    def suitable(cls, url):
        return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)

    def _real_extract(self, url):
        channel_name = video_id = self._match_id(url)
        return self._extract_video(channel_name, video_id, is_live=True)
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`from uuid import uuid4`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00
			`from .common import InfoExtractor`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`from ..compat import (`
			`compat_HTTPError,`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`compat_str,`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`)`
			`from ..utils import (`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`ExtractorError,`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`int_or_none,`
			`try_get,`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`urlencode_postdata,`
			`)`


			`class ZattooBaseIE(InfoExtractor):`
			`_NETRC_MACHINE = 'zattoo'`
			`_HOST_URL = 'https://zattoo.com'`

			`_power_guide_hash = None`

[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`def _login(self):`
remove unnecessary assignment parenthesis 2018-05-26 15:12:44 +00:00			`username, password = self._get_login_info()`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`if not username or not password:`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`self.raise_login_required(`
			`'A valid %s account is needed to access this media.'`
			`% self._NETRC_MACHINE)`

			`try:`
			`data = self._download_json(`
			`'%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',`
			`data=urlencode_postdata({`
			`'login': username,`
			`'password': password,`
			`'remember': 'true',`
			`}), headers={`
			`'Referer': '%s/login' % self._HOST_URL,`
			`'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',`
			`})`
			`except ExtractorError as e:`
			`if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:`
			`raise ExtractorError(`
			`'Unable to login: incorrect username and/or password',`
			`expected=True)`
			`raise`

			`self._power_guide_hash = data['session']['power_guide_hash']`

			`def _real_initialize(self):`
			`webpage = self._download_webpage(`
			`self._HOST_URL, None, 'Downloading app token')`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`app_token = self._html_search_regex(`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`r'appToken\s=\s(["\'])(?P<token>(?:(?!\1).)+?)\1',`
			`webpage, 'app token', group='token')`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`app_version = self._html_search_regex(`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')`

			`# Will setup appropriate cookies`
			`self._request_webpage(`
			`'%s/zapi/v2/session/hello' % self._HOST_URL, None,`
			`'Opening session', data=urlencode_postdata({`
			`'client_app_token': app_token,`
			`'uuid': compat_str(uuid4()),`
			`'lang': 'en',`
			`'app_version': app_version,`
			`'format': 'json',`
			`}))`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`self._login()`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00
			`def _extract_cid(self, video_id, channel_name):`
			`channel_groups = self._download_json(`
			`'%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,`
			`self._power_guide_hash),`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`video_id, 'Downloading channel list',`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`query={'details': False})['channel_groups']`
			`channel_list = []`
			`for chgrp in channel_groups:`
			`channel_list.extend(chgrp['channels'])`
			`try:`
			`return next(`
			`chan['cid'] for chan in channel_list`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`if chan.get('cid') and (`
			`chan.get('display_alias') == channel_name or`
			`chan.get('cid') == channel_name))`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`except StopIteration:`
			`raise ExtractorError('Could not extract channel id')`

			`def _extract_cid_and_video_info(self, video_id):`
			`data = self._download_json(`
			`'%s/zapi/program/details' % self._HOST_URL,`
			`video_id,`
			`'Downloading video information',`
			`query={`
			`'program_id': video_id,`
			`'complete': True`
			`})`

[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`p = data['program']`
			`cid = p['cid']`

[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`info_dict = {`
			`'id': video_id,`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`'title': p.get('title') or p['episode_title'],`
			`'description': p.get('description'),`
			`'thumbnail': p.get('image_url'),`
			`'creator': p.get('channel_name'),`
			`'episode': p.get('episode_title'),`
			`'episode_number': int_or_none(p.get('episode_number')),`
			`'season_number': int_or_none(p.get('season_number')),`
			`'release_year': int_or_none(p.get('year')),`
			`'categories': try_get(p, lambda x: x['categories'], list),`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`}`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`return cid, info_dict`

			`def _extract_formats(self, cid, video_id, record_id=None, is_live=False):`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`postdata_common = {`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`'https_watch_urls': True,`
			`}`

			`if is_live:`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`postdata_common.update({'timeshift': 10800})`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`elif record_id:`
			`url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)`
			`else:`
			`url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00
			`formats = []`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`for stream_type in ('dash', 'hls', 'hls5', 'hds'):`
			`postdata = postdata_common.copy()`
			`postdata['stream_type'] = stream_type`

			`data = self._download_json(`
			`url, video_id, 'Downloading %s formats' % stream_type.upper(),`
			`data=urlencode_postdata(postdata), fatal=False)`
			`if not data:`
			`continue`

			`watch_urls = try_get(`
			`data, lambda x: x['stream']['watch_urls'], list)`
			`if not watch_urls:`
			`continue`

			`for watch in watch_urls:`
			`if not isinstance(watch, dict):`
			`continue`
			`watch_url = watch.get('url')`
			`if not watch_url or not isinstance(watch_url, compat_str):`
			`continue`
			`format_id_list = [stream_type]`
			`maxrate = watch.get('maxrate')`
			`if maxrate:`
			`format_id_list.append(compat_str(maxrate))`
			`audio_channel = watch.get('audio_channel')`
			`if audio_channel:`
			`format_id_list.append(compat_str(audio_channel))`
			`preference = 1 if audio_channel == 'A' else None`
			`format_id = '-'.join(format_id_list)`
			`if stream_type in ('dash', 'dash_widevine', 'dash_playready'):`
			`this_formats = self._extract_mpd_formats(`
			`watch_url, video_id, mpd_id=format_id, fatal=False)`
			`elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):`
			`this_formats = self._extract_m3u8_formats(`
			`watch_url, video_id, 'mp4',`
			`entry_protocol='m3u8_native', m3u8_id=format_id,`
			`fatal=False)`
			`elif stream_type == 'hds':`
			`this_formats = self._extract_f4m_formats(`
			`watch_url, video_id, f4m_id=format_id, fatal=False)`
			`elif stream_type == 'smooth_playready':`
			`this_formats = self._extract_ism_formats(`
			`watch_url, video_id, ism_id=format_id, fatal=False)`
			`else:`
			`assert False`
			`for this_format in this_formats:`
			`this_format['preference'] = preference`
			`formats.extend(this_formats)`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`self._sort_formats(formats)`
			`return formats`

			`def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):`
			`if is_live:`
			`cid = self._extract_cid(video_id, channel_name)`
			`info_dict = {`
			`'id': channel_name,`
			`'title': self._live_title(channel_name),`
			`'is_live': True,`
			`}`
			`else:`
			`cid, info_dict = self._extract_cid_and_video_info(video_id)`
			`formats = self._extract_formats(`
			`cid, video_id, record_id=record_id, is_live=is_live)`
			`info_dict['formats'] = formats`
			`return info_dict`


			`class QuicklineBaseIE(ZattooBaseIE):`
			`_NETRC_MACHINE = 'quickline'`
			`_HOST_URL = 'https://mobiltv.quickline.com'`


			`class QuicklineIE(QuicklineBaseIE):`
			`_VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'`

[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`_TEST = {`
			`'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',`
			`'only_matching': True,`
			`}`

[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`def _real_extract(self, url):`
			`channel_name, video_id = re.match(self._VALID_URL, url).groups()`
			`return self._extract_video(channel_name, video_id)`


			`class QuicklineLiveIE(QuicklineBaseIE):`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`_VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'`

			`_TEST = {`
			`'url': 'https://mobiltv.quickline.com/watch/srf1',`
			`'only_matching': True,`
			`}`

			`@classmethod`
			`def suitable(cls, url):`
			`return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00
			`def _real_extract(self, url):`
			`channel_name = video_id = self._match_id(url)`
			`return self._extract_video(channel_name, video_id, is_live=True)`


			`class ZattooIE(ZattooBaseIE):`
			`_VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'`

			`# Since regular videos are only available for 7 days and recorded videos`
			`# are only available for a specific user, we cannot have detailed tests.`
			`_TESTS = [{`
			`'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',`
			`'only_matching': True,`
			`}]`

			`def _real_extract(self, url):`
			`channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()`
			`return self._extract_video(channel_name, video_id, record_id)`


			`class ZattooLiveIE(ZattooBaseIE):`
[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`_VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'`
[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00
			`_TEST = {`
			`'url': 'https://zattoo.com/watch/srf1',`
			`'only_matching': True,`
			`}`

[zattoo] Improve and simplify (closes #14676) 2018-04-30 18:48:21 +00:00			`@classmethod`
			`def suitable(cls, url):`
			`return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)`

[zattoo] Add extractor (closes #14668) 2017-11-05 17:07:35 +00:00			`def _real_extract(self, url):`
			`channel_name = video_id = self._match_id(url)`
			`return self._extract_video(channel_name, video_id, is_live=True)`