yt-dlp/youtube_dl/extractor/beeg.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import (
    compat_str,
    compat_urlparse,
)
from ..utils import (
    int_or_none,
    unified_timestamp,
)


class BeegIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
    _TESTS = [{
        # api/v6 v1
        'url': 'http://beeg.com/5416503',
        'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
        'info_dict': {
            'id': '5416503',
            'ext': 'mp4',
            'title': 'Sultry Striptease',
            'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
            'timestamp': 1391813355,
            'upload_date': '20140207',
            'duration': 383,
            'tags': list,
            'age_limit': 18,
        }
    }, {
        # api/v6 v2
        'url': 'https://beeg.com/1941093077?t=911-1391',
        'only_matching': True,
    }, {
        # api/v6 v2 w/o t
        'url': 'https://beeg.com/1277207756',
        'only_matching': True,
    }, {
        'url': 'https://beeg.porn/video/5416503',
        'only_matching': True,
    }, {
        'url': 'https://beeg.porn/5416503',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        beeg_version = self._search_regex(
            r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
            default='1546225636701')

        if len(video_id) >= 10:
            query = {
                'v': 2,
            }
            qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
            t = qs.get('t', [''])[0].split('-')
            if len(t) > 1:
                query.update({
                    's': t[0],
                    'e': t[1],
                })
        else:
            query = {'v': 1}

        for api_path in ('', 'api.'):
            video = self._download_json(
                'https://%sbeeg.com/api/v6/%s/video/%s'
                % (api_path, beeg_version, video_id), video_id,
                fatal=api_path == 'api.', query=query)
            if video:
                break

        formats = []
        for format_id, video_url in video.items():
            if not video_url:
                continue
            height = self._search_regex(
                r'^(\d+)[pP]$', format_id, 'height', default=None)
            if not height:
                continue
            formats.append({
                'url': self._proto_relative_url(
                    video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
                'format_id': format_id,
                'height': int(height),
            })
        self._sort_formats(formats)

        title = video['title']
        video_id = compat_str(video.get('id') or video_id)
        display_id = video.get('code')
        description = video.get('desc')
        series = video.get('ps_name')

        timestamp = unified_timestamp(video.get('date'))
        duration = int_or_none(video.get('duration'))

        tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'series': series,
            'timestamp': timestamp,
            'duration': duration,
            'tags': tags,
            'formats': formats,
            'age_limit': self._rta_search(webpage),
        }
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`from ..compat import (`
			`compat_str,`
			`compat_urlparse,`
			`)`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`from ..utils import (`
			`int_or_none,`
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`unified_timestamp,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`)`
Add support for beeg.com 2014-08-31 09:57:10 +00:00

			`class BeegIE(InfoExtractor):`
[beeg] Add support for beeg.porn (closes #20306) 2019-03-10 21:33:27 +00:00			`_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com\|porn(?:/video)?)/(?P<id>\d+)'`
			`_TESTS = [{`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`# api/v6 v1`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`'url': 'http://beeg.com/5416503',`
[beeg] Update test 2017-04-27 20:14:11 +00:00			`'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`'info_dict': {`
			`'id': '5416503',`
			`'ext': 'mp4',`
			`'title': 'Sultry Striptease',`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',`
			`'timestamp': 1391813355,`
			`'upload_date': '20140207',`
			`'duration': 383,`
			`'tags': list,`
[beeg] Add age_limit 2014-09-01 21:13:04 +00:00			`'age_limit': 18,`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`}`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`}, {`
			`# api/v6 v2`
			`'url': 'https://beeg.com/1941093077?t=911-1391',`
			`'only_matching': True,`
[beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) 2019-07-11 16:37:09 +00:00			`}, {`
			`# api/v6 v2 w/o t`
			`'url': 'https://beeg.com/1277207756',`
			`'only_matching': True,`
[beeg] Add support for beeg.porn (closes #20306) 2019-03-10 21:33:27 +00:00			`}, {`
			`'url': 'https://beeg.porn/video/5416503',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://beeg.porn/5416503',`
			`'only_matching': True,`
			`}]`
Add support for beeg.com 2014-08-31 09:57:10 +00:00
			`def _real_extract(self, url):`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`video_id = self._match_id(url)`
[beeg] Extract all formats 2014-09-02 13:54:00 +00:00
[beeg] Improve extraction 2016-04-07 16:40:35 +00:00			`webpage = self._download_webpage(url, video_id)`

[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`beeg_version = self._search_regex(`
			`r'beeg_version\s=\s([\da-zA-Z_-]+)', webpage, 'beeg version',`
			`default='1546225636701')`
[beeg] Improve extraction 2016-04-07 16:40:35 +00:00
[beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) 2019-07-11 16:37:09 +00:00			`if len(video_id) >= 10:`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`query = {`
			`'v': 2,`
			`}`
[beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) 2019-07-11 16:37:09 +00:00			`qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)`
			`t = qs.get('t', [''])[0].split('-')`
			`if len(t) > 1:`
			`query.update({`
			`'s': t[0],`
			`'e': t[1],`
			`})`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`else:`
			`query = {'v': 1}`

[beeg] Fix extraction (closes #14403) 2017-10-03 21:27:42 +00:00			`for api_path in ('', 'api.'):`
			`video = self._download_json(`
			`'https://%sbeeg.com/api/v6/%s/video/%s'`
			`% (api_path, beeg_version, video_id), video_id,`
[beeg] Add support for api/v6 v2 URLs (closes #21511) 2019-06-24 16:01:43 +00:00			`fatal=api_path == 'api.', query=query)`
[beeg] Fix extraction (closes #14403) 2017-10-03 21:27:42 +00:00			`if video:`
			`break`
[beeg] API v5 (Closes #7846) 2015-12-11 20:52:20 +00:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`formats = []`
			`for format_id, video_url in video.items():`
[beeg] Skip empty URLs (Closes #7392) 2015-11-07 00:23:00 +00:00			`if not video_url:`
			`continue`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`height = self._search_regex(`
			`r'^(\d+)[pP]$', format_id, 'height', default=None)`
			`if not height:`
			`continue`
			`formats.append({`
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`'url': self._proto_relative_url(`
			`video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`'format_id': format_id,`
			`'height': int(height),`
			`})`
[beeg] Extract all formats 2014-09-02 13:54:00 +00:00			`self._sort_formats(formats)`
Add support for beeg.com 2014-08-31 09:57:10 +00:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`title = video['title']`
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`video_id = compat_str(video.get('id') or video_id)`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`display_id = video.get('code')`
			`description = video.get('desc')`
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`series = video.get('ps_name')`
PEP8 applied 2014-11-23 19:41:03 +00:00
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`timestamp = unified_timestamp(video.get('date'))`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`duration = int_or_none(video.get('duration'))`
Add support for beeg.com 2014-08-31 09:57:10 +00:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None`
Add support for beeg.com 2014-08-31 09:57:10 +00:00
			`return {`
			`'id': video_id,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`'display_id': display_id,`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`'title': title,`
			`'description': description,`
[beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 13:59:53 +00:00			`'series': series,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 15:04:39 +00:00			`'timestamp': timestamp,`
			`'duration': duration,`
			`'tags': tags,`
[beeg] Extract all formats 2014-09-02 13:54:00 +00:00			`'formats': formats,`
[beeg] Improve extraction 2016-04-07 16:40:35 +00:00			`'age_limit': self._rta_search(webpage),`
Add support for beeg.com 2014-08-31 09:57:10 +00:00			`}`