yt-dlp/yt_dlp/extractor/zee5.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    int_or_none,
    parse_age_limit,
    str_or_none,
    try_get,
    unified_strdate,
    unified_timestamp,
    url_or_none,
)


class Zee5IE(InfoExtractor):
    _VALID_URL = r'''(?x)
                     (?:
                        zee5:|
                        (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
                        (?:
                            (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
                            |movies/[^#/?]+
                        )/(?P<display_id>[^#/?]+)/
                     )
                     (?P<id>[^#/?]+)/?(?:$|[?#])
                     '''
    _TESTS = [{
        'url': 'https://www.zee5.com/movies/details/krishna-the-birth/0-0-63098',
        'info_dict': {
            'id': '0-0-63098',
            'ext': 'mp4',
            'display_id': 'krishna-the-birth',
            'title': 'Krishna - The Birth',
            'duration': 4368,
            'average_rating': 4,
            'description': compat_str,
            'alt_title': 'Krishna - The Birth',
            'uploader': 'Zee Entertainment Enterprises Ltd',
            'release_date': '20060101',
            'upload_date': '20060101',
            'timestamp': 1136073600,
            'thumbnail': 'https://akamaividz.zee5.com/resources/0-0-63098/list/270x152/0063098_list_80888170.jpg',
            'tags': list
        },
        'params': {
            'format': 'bv',
        },
    }, {
        'url': 'https://zee5.com/tvshows/details/krishna-balram/0-6-1871/episode-1-the-test-of-bramha/0-1-233402',
        'info_dict': {
            'id': '0-1-233402',
            'ext': 'mp4',
            'display_id': 'episode-1-the-test-of-bramha',
            'title': 'Episode 1 - The Test Of Bramha',
            'duration': 1336,
            'average_rating': 4,
            'description': compat_str,
            'alt_title': 'Episode 1 - The Test Of Bramha',
            'uploader': 'Green Gold',
            'release_date': '20090101',
            'upload_date': '20090101',
            'timestamp': 1230768000,
            'thumbnail': 'https://akamaividz.zee5.com/resources/0-1-233402/list/270x152/01233402_list.jpg',
            'series': 'Krishna Balram',
            'season_number': 1,
            'episode_number': 1,
            'tags': list,
        },
        'params': {
            'format': 'bv',
        },
    }, {
        'url': 'https://www.zee5.com/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
        'only_matching': True
    }, {
        'url': 'https://www.zee5.com/global/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
        'only_matching': True
    }]

    def _real_extract(self, url):
        video_id, display_id = re.match(self._VALID_URL, url).group('id', 'display_id')
        access_token_request = self._download_json(
            'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
            video_id, note='Downloading access token')
        token_request = self._download_json(
            'https://useraction.zee5.com/tokennd',
            video_id, note='Downloading video token')
        json_data = self._download_json(
            'https://gwapi.zee5.com/content/details/{}?translation=en&country=IN'.format(video_id),
            video_id, headers={'X-Access-Token': access_token_request['token']})
        m3u8_url = try_get(
            json_data,
            (lambda x: x['hls'][0], lambda x: x['video_details']['hls_url']),
            compat_str)
        formats = self._extract_m3u8_formats(
            'https://zee5vodnd.akamaized.net' + m3u8_url.replace('/drm', '/hls', 1) + token_request['video_token'],
            video_id, 'mp4', fatal=False)
        mpd_url = try_get(
            json_data,
            (lambda x: x['video'][0], lambda x: x['video_details']['url']),
            compat_str)
        formats += self._extract_mpd_formats(
            'https://zee5vod.akamaized.net' + mpd_url,
            video_id, fatal=False)

        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': json_data['title'],
            'formats': formats,
            'duration': int_or_none(json_data.get('duration')),
            'average_rating': int_or_none(json_data.get('rating')),
            'description': str_or_none(json_data.get('description')),
            'alt_title': str_or_none(json_data.get('original_title')),
            'uploader': str_or_none(json_data.get('content_owner')),
            'age_limit': parse_age_limit(json_data.get('age_rating')),
            'release_date': unified_strdate(json_data.get('release_date')),
            'timestamp': unified_timestamp(json_data.get('release_date')),
            'thumbnail': url_or_none(json_data.get('image_url')),
            'series': try_get(json_data, lambda x: x['tvshow_details']['title'], str),
            'season': try_get(json_data, lambda x: x['season_details']['title'], str),
            'season_number': int_or_none(try_get(json_data, lambda x: x['season_details']['index'])),
            'episode_number': int_or_none(try_get(json_data, lambda x: x['index'])),
            'tags': try_get(json_data, lambda x: x['tags'], list)
        }


class Zee5SeriesIE(InfoExtractor):
    IE_NAME = 'zee5:series'
    _VALID_URL = r'''(?x)
                     (?:
                        zee5:series:|
                        (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
                        (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
                     )
                     (?P<id>[^#/?]+)/?(?:$|[?#])
                     '''
    _TESTS = [{
        'url': 'https://www.zee5.com/kids/kids-shows/krishna-balram/0-6-1871',
        'playlist_mincount': 43,
        'info_dict': {
            'id': '0-6-1871',
        },
    }, {
        'url': 'https://www.zee5.com/tvshows/details/bhabi-ji-ghar-par-hai/0-6-199',
        'playlist_mincount': 1500,
        'info_dict': {
            'id': '0-6-199',
        },
    }, {
        'url': 'https://www.zee5.com/tvshows/details/agent-raghav-crime-branch/0-6-965',
        'playlist_mincount': 25,
        'info_dict': {
            'id': '0-6-965',
        },
    }, {
        'url': 'https://www.zee5.com/ta/tvshows/details/nagabhairavi/0-6-3201',
        'playlist_mincount': 3,
        'info_dict': {
            'id': '0-6-3201',
        },
    }, {
        'url': 'https://www.zee5.com/global/hi/tvshows/details/khwaabon-ki-zamin-par/0-6-270',
        'playlist_mincount': 150,
        'info_dict': {
            'id': '0-6-270',
        },
    }
    ]

    def _entries(self, show_id):
        access_token_request = self._download_json(
            'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
            show_id, note='Downloading access token')
        headers = {
            'X-Access-Token': access_token_request['token'],
            'Referer': 'https://www.zee5.com/',
        }
        show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id)

        page_num = 0
        show_json = self._download_json(show_url, video_id=show_id, headers=headers)
        for season in show_json.get('seasons') or []:
            season_id = try_get(season, lambda x: x['id'], compat_str)
            next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id)
            while next_url:
                page_num += 1
                episodes_json = self._download_json(
                    next_url, video_id=show_id, headers=headers,
                    note='Downloading JSON metadata page %d' % page_num)
                for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
                    video_id = episode.get('id')
                    yield self.url_result(
                        'zee5:%s' % video_id,
                        ie=Zee5IE.ie_key(), video_id=video_id)
                next_url = url_or_none(episodes_json.get('next_episode_api'))

    def _real_extract(self, url):
        show_id = self._match_id(url)
        return self.playlist_result(self._entries(show_id), playlist_id=show_id)
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`from ..compat import compat_str`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`from ..utils import (`
			`int_or_none,`
			`parse_age_limit,`
			`str_or_none,`
			`try_get,`
			`unified_strdate,`
			`unified_timestamp,`
			`url_or_none,`
			`)`


			`class Zee5IE(InfoExtractor):`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`_VALID_URL = r'''(?x)`
			`(?:`
			`zee5:\|`
			`(?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?`
			`(?:`
[zee5] Support zee5originals 2021-03-13 04:16:10 +00:00			`(?:tvshows\|kids\|zee5originals)(?:/[^#/?]+){3}`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`\|movies/[^#/?]+`
			`)/(?P<display_id>[^#/?]+)/`
			`)`
			`(?P<id>[^#/?]+)/?(?:$\|[?#])`
			`'''`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`_TESTS = [{`
			`'url': 'https://www.zee5.com/movies/details/krishna-the-birth/0-0-63098',`
			`'info_dict': {`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`'id': '0-0-63098',`
			`'ext': 'mp4',`
			`'display_id': 'krishna-the-birth',`
			`'title': 'Krishna - The Birth',`
			`'duration': 4368,`
			`'average_rating': 4,`
[zee5] Fix py2 compatibility 2021-05-02 18:18:35 +00:00			`'description': compat_str,`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`'alt_title': 'Krishna - The Birth',`
			`'uploader': 'Zee Entertainment Enterprises Ltd',`
			`'release_date': '20060101',`
			`'upload_date': '20060101',`
			`'timestamp': 1136073600,`
			`'thumbnail': 'https://akamaividz.zee5.com/resources/0-0-63098/list/270x152/0063098_list_80888170.jpg',`
			`'tags': list`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`},`
			`'params': {`
			`'format': 'bv',`
			`},`
			`}, {`
			`'url': 'https://zee5.com/tvshows/details/krishna-balram/0-6-1871/episode-1-the-test-of-bramha/0-1-233402',`
			`'info_dict': {`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`'id': '0-1-233402',`
			`'ext': 'mp4',`
			`'display_id': 'episode-1-the-test-of-bramha',`
			`'title': 'Episode 1 - The Test Of Bramha',`
			`'duration': 1336,`
			`'average_rating': 4,`
[zee5] Fix py2 compatibility 2021-05-02 18:18:35 +00:00			`'description': compat_str,`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`'alt_title': 'Episode 1 - The Test Of Bramha',`
			`'uploader': 'Green Gold',`
			`'release_date': '20090101',`
			`'upload_date': '20090101',`
			`'timestamp': 1230768000,`
			`'thumbnail': 'https://akamaividz.zee5.com/resources/0-1-233402/list/270x152/01233402_list.jpg',`
			`'series': 'Krishna Balram',`
			`'season_number': 1,`
			`'episode_number': 1,`
			`'tags': list,`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`},`
			`'params': {`
			`'format': 'bv',`
			`},`
[zee5] Improve regex 2021-03-09 09:41:33 +00:00			`}, {`
			`'url': 'https://www.zee5.com/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',`
			`'only_matching': True`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`}, {`
			`'url': 'https://www.zee5.com/global/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',`
			`'only_matching': True`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`}]`

			`def _real_extract(self, url):`
			`video_id, display_id = re.match(self._VALID_URL, url).group('id', 'display_id')`
			`access_token_request = self._download_json(`
			`'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`video_id, note='Downloading access token')`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`token_request = self._download_json(`
			`'https://useraction.zee5.com/tokennd',`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`video_id, note='Downloading video token')`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`json_data = self._download_json(`
			`'https://gwapi.zee5.com/content/details/{}?translation=en&country=IN'.format(video_id),`
			`video_id, headers={'X-Access-Token': access_token_request['token']})`
			`m3u8_url = try_get(`
			`json_data,`
			`(lambda x: x['hls'][0], lambda x: x['video_details']['hls_url']),`
[zee5] Fix py2 compatibility 2021-05-02 18:18:35 +00:00			`compat_str)`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`formats = self._extract_m3u8_formats(`
[zee5] Fix extraction for some URLs (#279) Closes: #278 2021-04-28 09:21:54 +00:00			`'https://zee5vodnd.akamaized.net' + m3u8_url.replace('/drm', '/hls', 1) + token_request['video_token'],`
[zee5] Fix m3u8 formats extension 2021-05-28 14:04:22 +00:00			`video_id, 'mp4', fatal=False)`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`mpd_url = try_get(`
			`json_data,`
			`(lambda x: x['video'][0], lambda x: x['video_details']['url']),`
[zee5] Fix py2 compatibility 2021-05-02 18:18:35 +00:00			`compat_str)`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`formats += self._extract_mpd_formats(`
[zee5] Fix extraction for some URLs (#279) Closes: #278 2021-04-28 09:21:54 +00:00			`'https://zee5vod.akamaized.net' + mpd_url,`
[Zee5] Add new extractor (#100) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan@gmail.com> 2021-02-22 17:20:43 +00:00			`video_id, fatal=False)`

			`self._sort_formats(formats)`
			`return {`
			`'id': video_id,`
			`'display_id': display_id,`
			`'title': json_data['title'],`
			`'formats': formats,`
			`'duration': int_or_none(json_data.get('duration')),`
			`'average_rating': int_or_none(json_data.get('rating')),`
			`'description': str_or_none(json_data.get('description')),`
			`'alt_title': str_or_none(json_data.get('original_title')),`
			`'uploader': str_or_none(json_data.get('content_owner')),`
			`'age_limit': parse_age_limit(json_data.get('age_rating')),`
			`'release_date': unified_strdate(json_data.get('release_date')),`
			`'timestamp': unified_timestamp(json_data.get('release_date')),`
			`'thumbnail': url_or_none(json_data.get('image_url')),`
			`'series': try_get(json_data, lambda x: x['tvshow_details']['title'], str),`
			`'season': try_get(json_data, lambda x: x['season_details']['title'], str),`
			`'season_number': int_or_none(try_get(json_data, lambda x: x['season_details']['index'])),`
			`'episode_number': int_or_none(try_get(json_data, lambda x: x['index'])),`
			`'tags': try_get(json_data, lambda x: x['tags'], list)`
			`}`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00

			`class Zee5SeriesIE(InfoExtractor):`
			`IE_NAME = 'zee5:series'`
			`_VALID_URL = r'''(?x)`
			`(?:`
			`zee5:series:\|`
			`(?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?`
[zee5] Support zee5originals 2021-03-13 04:16:10 +00:00			`(?:tvshows\|kids\|zee5originals)(?:/[^#/?]+){2}/`
[Zee5] Add Show Extractor (#160) Co-authored-by: Ashish <ashish@pop-os.localdomain> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> 2021-03-11 07:48:09 +00:00			`)`
			`(?P<id>[^#/?]+)/?(?:$\|[?#])`
			`'''`
			`_TESTS = [{`
			`'url': 'https://www.zee5.com/kids/kids-shows/krishna-balram/0-6-1871',`
			`'playlist_mincount': 43,`
			`'info_dict': {`
			`'id': '0-6-1871',`
			`},`
			`}, {`
			`'url': 'https://www.zee5.com/tvshows/details/bhabi-ji-ghar-par-hai/0-6-199',`
			`'playlist_mincount': 1500,`
			`'info_dict': {`
			`'id': '0-6-199',`
			`},`
			`}, {`
			`'url': 'https://www.zee5.com/tvshows/details/agent-raghav-crime-branch/0-6-965',`
			`'playlist_mincount': 25,`
			`'info_dict': {`
			`'id': '0-6-965',`
			`},`
			`}, {`
			`'url': 'https://www.zee5.com/ta/tvshows/details/nagabhairavi/0-6-3201',`
			`'playlist_mincount': 3,`
			`'info_dict': {`
			`'id': '0-6-3201',`
			`},`
			`}, {`
			`'url': 'https://www.zee5.com/global/hi/tvshows/details/khwaabon-ki-zamin-par/0-6-270',`
			`'playlist_mincount': 150,`
			`'info_dict': {`
			`'id': '0-6-270',`
			`},`
			`}`
			`]`

			`def _entries(self, show_id):`
			`access_token_request = self._download_json(`
			`'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',`
			`show_id, note='Downloading access token')`
			`headers = {`
			`'X-Access-Token': access_token_request['token'],`
			`'Referer': 'https://www.zee5.com/',`
			`}`
			`show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id)`

			`page_num = 0`
			`show_json = self._download_json(show_url, video_id=show_id, headers=headers)`
			`for season in show_json.get('seasons') or []:`
			`season_id = try_get(season, lambda x: x['id'], compat_str)`
			`next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id)`
			`while next_url:`
			`page_num += 1`
			`episodes_json = self._download_json(`
			`next_url, video_id=show_id, headers=headers,`
			`note='Downloading JSON metadata page %d' % page_num)`
			`for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:`
			`video_id = episode.get('id')`
			`yield self.url_result(`
			`'zee5:%s' % video_id,`
			`ie=Zee5IE.ie_key(), video_id=video_id)`
			`next_url = url_or_none(episodes_json.get('next_episode_api'))`

			`def _real_extract(self, url):`
			`show_id = self._match_id(url)`
			`return self.playlist_result(self._entries(show_id), playlist_id=show_id)`