yt-dlp/yt_dlp/extractor/teamcoco.py

import json

from .turner import TurnerBaseIE
from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    mimetype2ext,
    parse_duration,
    parse_iso8601,
    qualities,
)


class TeamcocoIE(TurnerBaseIE):
    _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
    _TESTS = [
        {
            'url': 'http://teamcoco.com/video/mary-kay-remote',
            'md5': '55d532f81992f5c92046ad02fec34d7d',
            'info_dict': {
                'id': '80187',
                'ext': 'mp4',
                'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
                'duration': 495.0,
                'upload_date': '20140402',
                'timestamp': 1396407600,
            }
        }, {
            'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
            'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
            'info_dict': {
                'id': '19705',
                'ext': 'mp4',
                'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
                'title': 'Louis C.K. Interview Pt. 1 11/3/11',
                'duration': 288,
                'upload_date': '20111104',
                'timestamp': 1320405840,
            }
        }, {
            'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
            'info_dict': {
                'id': '88748',
                'ext': 'mp4',
                'title': 'Timothy Olyphant Raises A Toast To “Justified”',
                'description': 'md5:15501f23f020e793aeca761205e42c24',
                'upload_date': '20150415',
                'timestamp': 1429088400,
            },
            'params': {
                'skip_download': True,  # m3u8 downloads
            }
        }, {
            'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
            'info_dict': {
                'id': '89341',
                'ext': 'mp4',
                'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
                'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
            },
            'params': {
                'skip_download': True,  # m3u8 downloads
            },
            'skip': 'This video is no longer available.',
        }, {
            'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
            'only_matching': True,
        }, {
            'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
            'only_matching': True,
        }, {
            'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
            'only_matching': True,
        }, {
            'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
            'only_matching': True,
        }, {
            'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
            'only_matching': True,
        }
    ]
    _RECORD_TEMPL = '''id
        title
        teaser
        publishOn
        thumb {
          preview
        }
        tags {
          name
        }
        duration
        turnerMediaId
        turnerMediaAuthToken'''

    def _graphql_call(self, query_template, object_type, object_id):
        find_object = 'find' + object_type
        return self._download_json(
            'https://teamcoco.com/graphql', object_id, data=json.dumps({
                'query': query_template % (find_object, object_id)
            }).encode(), headers={
                'Content-Type': 'application/json',
            })['data'][find_object]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        response = self._graphql_call('''{
  %%s(slug: "%%s") {
    ... on RecordSlug {
      record {
        %s
      }
    }
    ... on PageSlug {
      child {
        id
      }
    }
    ... on NotFoundSlug {
      status
    }
  }
}''' % self._RECORD_TEMPL, 'Slug', display_id)
        if response.get('status'):
            raise ExtractorError('This video is no longer available.', expected=True)

        child = response.get('child')
        if child:
            record = self._graphql_call('''{
  %%s(id: "%%s") {
    ... on Video {
      %s
    }
  }
}''' % self._RECORD_TEMPL, 'Record', child['id'])
        else:
            record = response['record']
        video_id = record['id']

        info = {
            'id': video_id,
            'display_id': display_id,
            'title': record['title'],
            'thumbnail': record.get('thumb', {}).get('preview'),
            'description': record.get('teaser'),
            'duration': parse_duration(record.get('duration')),
            'timestamp': parse_iso8601(record.get('publishOn')),
        }

        media_id = record.get('turnerMediaId')
        if media_id:
            self._initialize_geo_bypass({
                'countries': ['US'],
            })
            info.update(self._extract_ngtv_info(media_id, {
                'accessToken': record['turnerMediaAuthToken'],
                'accessTokenType': 'jws',
            }))
        else:
            video_sources = self._download_json(
                'https://teamcoco.com/_truman/d/' + video_id,
                video_id)['meta']['src']
            if isinstance(video_sources, dict):
                video_sources = video_sources.values()

            formats = []
            get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
            for src in video_sources:
                if not isinstance(src, dict):
                    continue
                src_url = src.get('src')
                if not src_url:
                    continue
                format_id = src.get('label')
                ext = determine_ext(src_url, mimetype2ext(src.get('type')))
                if format_id == 'hls' or ext == 'm3u8':
                    # compat_urllib_parse.urljoin does not work here
                    if src_url.startswith('/'):
                        src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
                    formats.extend(self._extract_m3u8_formats(
                        src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
                else:
                    if src_url.startswith('/mp4:protected/'):
                        # TODO Correct extraction for these files
                        continue
                    tbr = int_or_none(self._search_regex(
                        r'(\d+)k\.mp4', src_url, 'tbr', default=None))

                    formats.append({
                        'url': src_url,
                        'ext': ext,
                        'tbr': tbr,
                        'format_id': format_id,
                        'quality': get_quality(format_id),
                    })
            self._sort_formats(formats)
            info['formats'] = formats

        return info
[teamcoco] Rewrite preload data extraction Idea: "puncture" some consecutive fragments and check whether the b64decode result of a punctured string is a valid JSON or not. It's a O(N^3) algorithm, but should be fast for a small N (less than 30 fragments in all test cases) 2015-05-14 18:17:22 +00:00			`import json`
[Teamcoco] Move into own file 2013-06-23 20:31:50 +00:00
[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00			`from .turner import TurnerBaseIE`
[teamcoco] Fix extraction 2015-04-09 20:54:53 +00:00			`from ..utils import (`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`determine_ext,`
[teamcoco] Fix extraction 2015-04-09 20:54:53 +00:00			`ExtractorError,`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`int_or_none,`
			`mimetype2ext,`
			`parse_duration,`
			`parse_iso8601,`
[teamcoco] Fix extraction 2015-04-09 20:54:53 +00:00			`qualities,`
			`)`
[Teamcoco] Move into own file 2013-06-23 20:31:50 +00:00

[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00			`class TeamcocoIE(TurnerBaseIE):`
[teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) 2019-04-05 07:26:04 +00:00			`_VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'`
Add a test for the new URL pages Add a test for the pages with the video_id in the URL. 2014-04-04 17:52:35 +00:00			`_TESTS = [`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`{`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`'url': 'http://teamcoco.com/video/mary-kay-remote',`
			`'md5': '55d532f81992f5c92046ad02fec34d7d',`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`'info_dict': {`
[teamcoco] Modernize and fix extraction 2015-02-01 14:00:54 +00:00			`'id': '80187',`
			`'ext': 'mp4',`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`'title': 'Conan Becomes A Mary Kay Beauty Consultant',`
Use _family_friendly_search for determining age_limit 2015-02-08 15:45:38 +00:00			`'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`'duration': 495.0,`
			`'upload_date': '20140402',`
			`'timestamp': 1396407600,`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`}`
			`}, {`
			`'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',`
			`'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',`
			`'info_dict': {`
[teamcoco] Modernize and fix extraction 2015-02-01 14:00:54 +00:00			`'id': '19705',`
			`'ext': 'mp4',`
[teamcoco] Fix extraction Also, use a single style of quotes 2015-02-21 20:19:39 +00:00			`'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',`
			`'title': 'Louis C.K. Interview Pt. 1 11/3/11',`
[teamcoco] Extract duration 2015-04-09 23:03:38 +00:00			`'duration': 288,`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`'upload_date': '20111104',`
			`'timestamp': 1320405840,`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`}`
[teamcoco] Fix "preload" data extraction (fixes #5179) 2015-04-15 11:56:21 +00:00			`}, {`
			`'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',`
			`'info_dict': {`
			`'id': '88748',`
			`'ext': 'mp4',`
			`'title': 'Timothy Olyphant Raises A Toast To “Justified”',`
			`'description': 'md5:15501f23f020e793aeca761205e42c24',`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`'upload_date': '20150415',`
			`'timestamp': 1429088400,`
[teamcoco] Fix "preload" data extraction (fixes #5179) 2015-04-15 11:56:21 +00:00			`},`
			`'params': {`
			`'skip_download': True, # m3u8 downloads`
			`}`
[teamcoco] Handle incomplete m3u8 URLs (fixes #5798) There are 2 TODOs. I don't know how to handle these cases correctly. 2015-06-05 14:55:29 +00:00			`}, {`
			`'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',`
			`'info_dict': {`
			`'id': '89341',`
			`'ext': 'mp4',`
			`'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',`
			`'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',`
			`},`
			`'params': {`
			`'skip_download': True, # m3u8 downloads`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`},`
			`'skip': 'This video is no longer available.',`
[teamcoco] improve _VALID_URL regex(#16484) 2018-05-19 11:19:05 +00:00			`}, {`
			`'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',`
			`'only_matching': True,`
[teamcoco] relax _VALID_URL regex and add a fallback for format extraction(fixes #16484) 2018-05-19 12:05:51 +00:00			`}, {`
			`'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',`
			`'only_matching': True,`
			`}, {`
			`'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',`
			`'only_matching': True,`
			`}, {`
			`'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',`
			`'only_matching': True,`
[teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) 2019-04-05 07:26:04 +00:00			`}, {`
			`'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',`
			`'only_matching': True,`
Add a test for the new URL pages Add a test for the pages with the video_id in the URL. 2014-04-04 17:52:35 +00:00			`}`
			`]`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`_RECORD_TEMPL = '''id`
			`title`
			`teaser`
			`publishOn`
			`thumb {`
			`preview`
			`}`
			`tags {`
			`name`
			`}`
			`duration`
			`turnerMediaId`
			`turnerMediaAuthToken'''`
[Teamcoco] Move into own file 2013-06-23 20:31:50 +00:00
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`def _graphql_call(self, query_template, object_type, object_id):`
			`find_object = 'find' + object_type`
			`return self._download_json(`
[teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) 2019-04-05 07:26:04 +00:00			`'https://teamcoco.com/graphql', object_id, data=json.dumps({`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`'query': query_template % (find_object, object_id)`
[teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) 2019-04-05 07:26:04 +00:00			`}).encode(), headers={`
			`'Content-Type': 'application/json',`
			`})['data'][find_object]`
[teamcoco] Simplify ID management (Closes #2715) 2014-04-07 13:24:12 +00:00
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`def _real_extract(self, url):`
			`display_id = self._match_id(url)`

			`response = self._graphql_call('''{`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`%%s(slug: "%%s") {`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`... on RecordSlug {`
			`record {`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`%s`
			`}`
			`}`
			`... on PageSlug {`
			`child {`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`id`
			`}`
			`}`
			`... on NotFoundSlug {`
			`status`
			`}`
			`}`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`}''' % self._RECORD_TEMPL, 'Slug', display_id)`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`if response.get('status'):`
			`raise ExtractorError('This video is no longer available.', expected=True)`

[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`child = response.get('child')`
			`if child:`
			`record = self._graphql_call('''{`
			`%%s(id: "%%s") {`
			`... on Video {`
			`%s`
			`}`
			`}`
			`}''' % self._RECORD_TEMPL, 'Record', child['id'])`
			`else:`
			`record = response['record']`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`video_id = record['id']`

[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00			`info = {`
[teamcoco] Use unicode_literals 2014-01-17 02:15:09 +00:00			`'id': video_id,`
[teamcoco] Simplify ID management (Closes #2715) 2014-04-07 13:24:12 +00:00			`'display_id': display_id,`
[teamcoco] fix extraction(closes #16374) 2018-05-10 07:19:32 +00:00			`'title': record['title'],`
			`'thumbnail': record.get('thumb', {}).get('preview'),`
			`'description': record.get('teaser'),`
			`'duration': parse_duration(record.get('duration')),`
			`'timestamp': parse_iso8601(record.get('publishOn')),`
[teamcoco] Parse the xml file and extract all the formats 2013-11-03 16:48:12 +00:00			`}`
[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00
			`media_id = record.get('turnerMediaId')`
			`if media_id:`
			`self._initialize_geo_bypass({`
			`'countries': ['US'],`
			`})`
			`info.update(self._extract_ngtv_info(media_id, {`
			`'accessToken': record['turnerMediaAuthToken'],`
			`'accessTokenType': 'jws',`
			`}))`
			`else:`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`video_sources = self._download_json(`
[teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) 2019-04-05 07:26:04 +00:00			`'https://teamcoco.com/_truman/d/' + video_id,`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`video_id)['meta']['src']`
			`if isinstance(video_sources, dict):`
			`video_sources = video_sources.values()`
[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00
			`formats = []`
			`get_quality = qualities(['low', 'sd', 'hd', 'uhd'])`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`for src in video_sources:`
[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00			`if not isinstance(src, dict):`
			`continue`
			`src_url = src.get('src')`
			`if not src_url:`
			`continue`
[teamcoco] add support for new videos(closes #23054) 2019-11-12 09:51:54 +00:00			`format_id = src.get('label')`
[teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 12:21:07 +00:00			`ext = determine_ext(src_url, mimetype2ext(src.get('type')))`
			`if format_id == 'hls' or ext == 'm3u8':`
			`# compat_urllib_parse.urljoin does not work here`
			`if src_url.startswith('/'):`
			`src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url`
			`formats.extend(self._extract_m3u8_formats(`
			`src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))`
			`else:`
			`if src_url.startswith('/mp4:protected/'):`
			`# TODO Correct extraction for these files`
			`continue`
			`tbr = int_or_none(self._search_regex(`
			`r'(\d+)k\.mp4', src_url, 'tbr', default=None))`

			`formats.append({`
			`'url': src_url,`
			`'ext': ext,`
			`'tbr': tbr,`
			`'format_id': format_id,`
			`'quality': get_quality(format_id),`
			`})`
			`self._sort_formats(formats)`
			`info['formats'] = formats`

			`return info`