yt-dlp/yt_dlp/extractor/vshare.py

from .common import InfoExtractor
from ..utils import ExtractorError, decode_packed_codes


class VShareIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
    _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
    _TESTS = [{
        'url': 'https://vshare.io/d/0f64ce6',
        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
        'info_dict': {
            'id': '0f64ce6',
            'title': 'vl14062007715967',
            'ext': 'mp4',
        }
    }, {
        'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
        'only_matching': True,
    }]

    def _extract_packed(self, webpage):
        packed = self._search_regex(
            r'(eval\(function.+)', webpage, 'packed code')
        unpacked = decode_packed_codes(packed)
        digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits')
        digits = [int(digit) for digit in digits.split(',')]
        key_digit = self._search_regex(
            r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
        chars = [chr(d - int(key_digit)) for d in digits]
        return ''.join(chars)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
            video_id, headers={'Referer': url})

        title = self._html_extract_title(webpage)
        title = title.split(' - ')[0]

        error = self._html_search_regex(
            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
            'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        info = self._parse_html5_media_entries(
            url, '<video>%s</video>' % self._extract_packed(webpage),
            video_id)[0]

        info.update({
            'id': video_id,
            'title': title,
        })

        return info
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00			`from .common import InfoExtractor`
[compat] Remove more functions Removing any more will require changes to a large number of extractors 2022-06-24 08:10:17 +00:00			`from ..utils import ExtractorError, decode_packed_codes`
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00

			`class VShareIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'`
[extractors] Use new framework for existing embeds (#4307) `Brightcove` is difficult to migrate because it's subclasses may depend on the signature of the current functions. So it is left as-is for now Note: Tests have not been migrated 2022-08-01 01:23:25 +00:00			`_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']`
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00			`_TESTS = [{`
			`'url': 'https://vshare.io/d/0f64ce6',`
[vshare] Fix extraction (closes #14473) 2017-11-14 15:34:45 +00:00			`'md5': '17b39f55b5497ae8b59f5fbce8e35886',`
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00			`'info_dict': {`
			`'id': '0f64ce6',`
			`'title': 'vl14062007715967',`
			`'ext': 'mp4',`
			`}`
			`}, {`
			`'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',`
			`'only_matching': True,`
			`}]`

[vshare] Fix extraction (closes #14473) 2017-11-14 15:34:45 +00:00			`def _extract_packed(self, webpage):`
[vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 15:49:25 +00:00			`packed = self._search_regex(`
			`r'(eval\(function.+)', webpage, 'packed code')`
[vshare] Fix extraction (closes #14473) 2017-11-14 15:34:45 +00:00			`unpacked = decode_packed_codes(packed)`
[cleanup] Misc 2023-07-06 14:39:42 +00:00			`digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits')`
[vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 15:49:25 +00:00			`digits = [int(digit) for digit in digits.split(',')]`
			`key_digit = self._search_regex(`
			`r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')`
[compat] Remove more functions Removing any more will require changes to a large number of extractors 2022-06-24 08:10:17 +00:00			`chars = [chr(d - int(key_digit)) for d in digits]`
[vshare] Fix extraction (closes #14473) 2017-11-14 15:34:45 +00:00			`return ''.join(chars)`

[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

			`webpage = self._download_webpage(`
[vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 15:49:25 +00:00			`'https://vshare.io/v/%s/width-650/height-430/1' % video_id,`
[vshare] Pass Referer to download request (closes #19205, closes #19221) 2019-02-14 16:40:46 +00:00			`video_id, headers={'Referer': url})`
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00
[cleanup] Use `_html_extract_title` 2022-04-04 08:27:35 +00:00			`title = self._html_extract_title(webpage)`
[vshare] Fix extraction (closes #14473) 2017-11-14 15:34:45 +00:00			`title = title.split(' - ')[0]`
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00
[vshare] Capture and output error message 2017-11-14 15:39:54 +00:00			`error = self._html_search_regex(`
			`r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,`
			`'error', default=None)`
			`if error:`
			`raise ExtractorError(error, expected=True)`

[vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 15:49:25 +00:00			`info = self._parse_html5_media_entries(`
			`url, '<video>%s</video>' % self._extract_packed(webpage),`
			`video_id)[0]`

			`info.update({`
[vshare] Add extractor (closes #12278) 2017-04-03 20:05:18 +00:00			`'id': video_id,`
			`'title': title,`
[vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 15:49:25 +00:00			`})`
[vshare] Fix extraction (closes #14473) 2017-11-14 15:34:45 +00:00
[vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 15:49:25 +00:00			`return info`