yt-dlp/yt_dlp/extractor/beeg.py

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    str_or_none,
    traverse_obj,
    try_get,
    unified_timestamp,
)


class BeegIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://beeg.com/-0983946056129650',
        'md5': '51d235147c4627cfce884f844293ff88',
        'info_dict': {
            'id': '0983946056129650',
            'ext': 'mp4',
            'title': 'sucked cock and fucked in a private plane',
            'duration': 927,
            'tags': list,
            'age_limit': 18,
            'upload_date': '20220131',
            'timestamp': 1643656455,
            'display_id': '2540839',
        },
    }, {
        'url': 'https://beeg.com/-0599050563103750?t=4-861',
        'md5': 'bd8b5ea75134f7f07fad63008db2060e',
        'info_dict': {
            'id': '0599050563103750',
            'ext': 'mp4',
            'title': 'Bad Relatives',
            'duration': 2060,
            'tags': list,
            'age_limit': 18,
            'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
            'timestamp': 1643623200,
            'display_id': '2569965',
            'upload_date': '20220131',
        },
    }, {
        # api/v6 v2
        'url': 'https://beeg.com/1941093077?t=911-1391',
        'only_matching': True,
    }, {
        # api/v6 v2 w/o t
        'url': 'https://beeg.com/1277207756',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        video = self._download_json(
            f'https://store.externulls.com/facts/file/{video_id}',
            video_id, f'Downloading JSON for {video_id}')

        fc_facts = video.get('fc_facts')
        first_fact = {}
        for fact in fc_facts:
            if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
                first_fact = fact

        resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')

        formats = []
        for format_id, video_uri in resources.items():
            if not video_uri:
                continue
            height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
            current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
            for f in current_formats:
                f['height'] = height
            formats.extend(current_formats)

        return {
            'id': video_id,
            'display_id': str_or_none(first_fact.get('id')),
            'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
            'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
            'timestamp': unified_timestamp(first_fact.get('fc_created')),
            'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
            'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
            'formats': formats,
            'age_limit': self._rta_search(webpage),
        }
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`from .common import InfoExtractor`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`from ..utils import (`
			`int_or_none,`
[cleanup] Fix infodict returned fields (#8906) Authored by: seproDev 2024-03-08 22:36:41 +00:00			`str_or_none,`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`traverse_obj,`
			`try_get,`
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`unified_timestamp,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`)`
Add support for beeg.com 2014-08-31 09:57:10 +00:00

			`class BeegIE(InfoExtractor):`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'`
[beeg] Add support for beeg.porn (closes #20306) 2019-03-10 21:33:27 +00:00			`_TESTS = [{`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`'url': 'https://beeg.com/-0983946056129650',`
			`'md5': '51d235147c4627cfce884f844293ff88',`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`'info_dict': {`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`'id': '0983946056129650',`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`'ext': 'mp4',`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`'title': 'sucked cock and fucked in a private plane',`
			`'duration': 927,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`'tags': list,`
[beeg] Add age_limit 2014-09-01 21:13:04 +00:00			`'age_limit': 18,`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`'upload_date': '20220131',`
			`'timestamp': 1643656455,`
[cleanup] Fix infodict returned fields (#8906) Authored by: seproDev 2024-03-08 22:36:41 +00:00			`'display_id': '2540839',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`},`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`}, {`
			`'url': 'https://beeg.com/-0599050563103750?t=4-861',`
			`'md5': 'bd8b5ea75134f7f07fad63008db2060e',`
			`'info_dict': {`
			`'id': '0599050563103750',`
			`'ext': 'mp4',`
			`'title': 'Bad Relatives',`
			`'duration': 2060,`
			`'tags': list,`
			`'age_limit': 18,`
			`'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',`
			`'timestamp': 1643623200,`
[cleanup] Fix infodict returned fields (#8906) Authored by: seproDev 2024-03-08 22:36:41 +00:00			`'display_id': '2569965',`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`'upload_date': '20220131',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`},`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`}, {`
			`# api/v6 v2`
			`'url': 'https://beeg.com/1941093077?t=911-1391',`
			`'only_matching': True,`
[beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) 2019-07-11 16:37:09 +00:00			`}, {`
			`# api/v6 v2 w/o t`
			`'url': 'https://beeg.com/1277207756',`
			`'only_matching': True,`
[beeg] Add support for beeg.porn (closes #20306) 2019-03-10 21:33:27 +00:00			`}]`
Add support for beeg.com 2014-08-31 09:57:10 +00:00
			`def _real_extract(self, url):`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`video_id = self._match_id(url)`
[beeg] Extract all formats 2014-09-02 13:54:00 +00:00
[beeg] Improve extraction 2016-04-07 16:40:35 +00:00			`webpage = self._download_webpage(url, video_id)`

[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`video = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`f'https://store.externulls.com/facts/file/{video_id}',`
			`video_id, f'Downloading JSON for {video_id}')`
[beeg] Improve extraction 2016-04-07 16:40:35 +00:00
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`fc_facts = video.get('fc_facts')`
			`first_fact = {}`
			`for fact in fc_facts:`
			`if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):`
			`first_fact = fact`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')`
[beeg] API v5 (Closes #7846) 2015-12-11 20:52:20 +00:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`formats = []`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`for format_id, video_uri in resources.items():`
			`if not video_uri:`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`continue`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))`
			`current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))`
			`for f in current_formats:`
			`f['height'] = height`
			`formats.extend(current_formats)`
PEP8 applied 2014-11-23 19:41:03 +00:00
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`return {`
			`'id': video_id,`
[cleanup] Fix infodict returned fields (#8906) Authored by: seproDev 2024-03-08 22:36:41 +00:00			`'display_id': str_or_none(first_fact.get('id')),`
[beeg] Fix extractor (#2616) Closes #2592 Authored by: Bricio 2022-02-11 18:05:23 +00:00			`'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),`
			`'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),`
			`'timestamp': unified_timestamp(first_fact.get('fc_created')),`
			`'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),`
			`'tags': traverse_obj(video, ('tags', ..., 'tg_name')),`
[beeg] Extract all formats 2014-09-02 13:54:00 +00:00			`'formats': formats,`
[beeg] Improve extraction 2016-04-07 16:40:35 +00:00			`'age_limit': self._rta_search(webpage),`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`}`