yt-dlp/yt_dlp/extractor/cloudy.py

from .common import InfoExtractor
from ..utils import (
    str_to_int,
    unified_strdate,
)


class CloudyIE(InfoExtractor):
    _IE_DESC = 'cloudy.ec'
    _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
    _TESTS = [{
        'url': 'https://www.cloudy.ec/v/af511e2527aac',
        'md5': '29832b05028ead1b58be86bf319397ca',
        'info_dict': {
            'id': 'af511e2527aac',
            'ext': 'mp4',
            'title': 'Funny Cats and Animals Compilation june 2013',
            'upload_date': '20130913',
            'view_count': int,
        }
    }, {
        'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://www.cloudy.ec/embed.php', video_id, query={
                'id': video_id,
                'playerPage': 1,
                'autoplay': 1,
            })

        info = self._parse_html5_media_entries(url, webpage, video_id)[0]

        webpage = self._download_webpage(
            'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)

        if webpage:
            info.update({
                'title': self._search_regex(
                    r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
                'upload_date': unified_strdate(self._search_regex(
                    r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
                    'upload date', fatal=False)),
                'view_count': str_to_int(self._search_regex(
                    r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
            })

        if not info.get('title'):
            info['title'] = video_id

        info['id'] = video_id

        return info
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 10:23:40 +00:00			`from ..utils import (`
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`str_to_int,`
			`unified_strdate,`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 10:23:40 +00:00			`)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00

			`class CloudyIE(InfoExtractor):`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`_IE_DESC = 'cloudy.ec'`
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/\|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'`
			`_TESTS = [{`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`'url': 'https://www.cloudy.ec/v/af511e2527aac',`
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`'md5': '29832b05028ead1b58be86bf319397ca',`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`'info_dict': {`
			`'id': 'af511e2527aac',`
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`'ext': 'mp4',`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`'title': 'Funny Cats and Animals Compilation june 2013',`
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`'upload_date': '20130913',`
			`'view_count': int,`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`}`
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`}, {`
			`'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',`
			`'only_matching': True,`
			`}]`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`webpage = self._download_webpage(`
[cloudy] Fix extraction (closes #13737) 2017-07-26 16:12:43 +00:00			`'https://www.cloudy.ec/embed.php', video_id, query={`
			`'id': video_id,`
			`'playerPage': 1,`
			`'autoplay': 1,`
			`})`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`info = self._parse_html5_media_entries(url, webpage, video_id)[0]`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`webpage = self._download_webpage(`
			`'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`if webpage:`
			`info.update({`
			`'title': self._search_regex(`
			`r'<h\d[^>]*>([^<]+)<', webpage, 'title'),`
			`'upload_date': unified_strdate(self._search_regex(`
			`r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,`
			`'upload date', fatal=False)),`
			`'view_count': str_to_int(self._search_regex(`
			`r'([\d,.]+) views<', webpage, 'view count', fatal=False)),`
			`})`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`if not info.get('title'):`
			`info['title'] = video_id`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`info['id'] = video_id`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Fix extraction (closes #12525) 2017-03-22 16:48:06 +00:00			`return info`