yt-dlp/yt_dlp/extractor/xfileshare.py

import re

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    decode_packed_codes,
    determine_ext,
    int_or_none,
    js_to_json,
    urlencode_postdata,
)


# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
def aa_decode(aa_code):
    symbol_table = [
        ('7', '((ﾟｰﾟ) + (o^_^o))'),
        ('6', '((o^_^o) +(o^_^o))'),
        ('5', '((ﾟｰﾟ) + (ﾟΘﾟ))'),
        ('2', '((o^_^o) - (ﾟΘﾟ))'),
        ('4', '(ﾟｰﾟ)'),
        ('3', '(o^_^o)'),
        ('1', '(ﾟΘﾟ)'),
        ('0', '(c^_^o)'),
    ]
    delim = '(ﾟДﾟ)[ﾟεﾟ]+'
    ret = ''
    for aa_char in aa_code.split(delim):
        for val, pat in symbol_table:
            aa_char = aa_char.replace(pat, val)
        aa_char = aa_char.replace('+ ', '')
        m = re.match(r'^\d+', aa_char)
        if m:
            ret += chr(int(m.group(0), 8))
        else:
            m = re.match(r'^u([\da-f]+)', aa_char)
            if m:
                ret += chr(int(m.group(1), 16))
    return ret


class XFileShareIE(InfoExtractor):
    _SITES = (
        (r'aparat\.cam', 'Aparat'),
        (r'clipwatching\.com', 'ClipWatching'),
        (r'gounlimited\.to', 'GoUnlimited'),
        (r'govid\.me', 'GoVid'),
        (r'holavid\.com', 'HolaVid'),
        (r'streamty\.com', 'Streamty'),
        (r'thevideobee\.to', 'TheVideoBee'),
        (r'uqload\.com', 'Uqload'),
        (r'vidbom\.com', 'VidBom'),
        (r'vidlo\.us', 'vidlo'),
        (r'vidlocker\.xyz', 'VidLocker'),
        (r'vidshare\.tv', 'VidShare'),
        (r'vup\.to', 'VUp'),
        (r'wolfstream\.tv', 'WolfStream'),
        (r'xvideosharing\.com', 'XVideoSharing'),
    )

    IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
    _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
                  % '|'.join(site for site in list(zip(*_SITES))[0]))
    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]

    _FILE_NOT_FOUND_REGEXES = (
        r'>(?:404 - )?File Not Found<',
        r'>The file was removed by administrator<',
    )

    _TESTS = [{
        'url': 'https://uqload.com/dltx1wztngdz',
        'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
        'info_dict': {
            'id': 'dltx1wztngdz',
            'ext': 'mp4',
            'title': 'Rick Astley Never Gonna Give You mp4',
            'thumbnail': r're:https://.*\.jpg'
        }
    }, {
        'url': 'http://xvideosharing.com/fq65f94nd2ve',
        'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
        'info_dict': {
            'id': 'fq65f94nd2ve',
            'ext': 'mp4',
            'title': 'sample',
            'thumbnail': r're:http://.*\.jpg',
        },
    }, {
        'url': 'https://aparat.cam/n4d6dh0wvlpr',
        'only_matching': True,
    }, {
        'url': 'https://wolfstream.tv/nthme29v9u2x',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        host, video_id = self._match_valid_url(url).groups()

        url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
        webpage = self._download_webpage(url, video_id)

        if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        fields = self._hidden_inputs(webpage)

        if fields.get('op') == 'download1':
            countdown = int_or_none(self._search_regex(
                r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
                webpage, 'countdown', default=None))
            if countdown:
                self._sleep(countdown, video_id)

            webpage = self._download_webpage(
                url, video_id, 'Downloading video page',
                data=urlencode_postdata(fields), headers={
                    'Referer': url,
                    'Content-type': 'application/x-www-form-urlencoded',
                })

        title = (self._search_regex(
            (r'style="z-index: [0-9]+;">([^<]+)</span>',
             r'<td nowrap>([^<]+)</td>',
             r'h4-fine[^>]*>([^<]+)<',
             r'>Watch (.+)[ <]',
             r'<h2 class="video-page-head">([^<]+)</h2>',
             r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<',  # streamin.to
             r'title\s*:\s*"([^"]+)"'),  # govid.me
            webpage, 'title', default=None) or self._og_search_title(
            webpage, default=None) or video_id).strip()

        for regex, func in (
                (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
                (r'(ﾟ.+)', aa_decode)):
            obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
            if obf_code:
                webpage = webpage.replace(obf_code, func(obf_code))

        formats = []

        jwplayer_data = self._search_regex(
            [
                r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
                r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
            ], webpage,
            'jwplayer data', default=None)
        if jwplayer_data:
            jwplayer_data = self._parse_json(
                jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
            if jwplayer_data:
                formats = self._parse_jwplayer_data(
                    jwplayer_data, video_id, False,
                    m3u8_id='hls', mpd_id='dash')['formats']

        if not formats:
            urls = []
            for regex in (
                    r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
                    r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
                    r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
                    r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
                for mobj in re.finditer(regex, webpage):
                    video_url = mobj.group('url')
                    if video_url not in urls:
                        urls.append(video_url)

            sources = self._search_regex(
                r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
            if sources:
                urls.extend(self._parse_json(sources, video_id))

            formats = []
            for video_url in urls:
                if determine_ext(video_url) == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        video_url, video_id, 'mp4',
                        entry_protocol='m3u8_native', m3u8_id='hls',
                        fatal=False))
                else:
                    formats.append({
                        'url': video_url,
                        'format_id': 'sd',
                    })

        thumbnail = self._search_regex(
            [
                r'<video[^>]+poster="([^"]+)"',
                r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
            ], webpage, 'thumbnail', default=None)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'formats': formats,
            'http_headers': {'Referer': url}
        }
[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00			`import re`

			`from .common import InfoExtractor`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 11:24:42 +00:00			`from ..utils import (`
[compat] Remove more functions Removing any more will require changes to a large number of extractors 2022-06-24 08:10:17 +00:00			`ExtractorError,`
[xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) 2016-06-12 18:11:04 +00:00			`decode_packed_codes,`
[xfileshare] Improve extraction and extract hls formats 2017-04-01 11:55:48 +00:00			`determine_ext,`
[gorillavid] Generalize extraction with countdown timeout and support faststream.in (Closes #4297) 2014-11-26 14:02:40 +00:00			`int_or_none,`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`js_to_json,`
Use urlencode_postdata across the codebase 2016-03-25 20:19:24 +00:00			`urlencode_postdata,`
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00			`)`

[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58`
			`def aa_decode(aa_code):`
			`symbol_table = [`
			`('7', '((ﾟｰﾟ) + (o^_^o))'),`
			`('6', '((o^_^o) +(o^_^o))'),`
			`('5', '((ﾟｰﾟ) + (ﾟΘﾟ))'),`
			`('2', '((o^_^o) - (ﾟΘﾟ))'),`
			`('4', '(ﾟｰﾟ)'),`
			`('3', '(o^_^o)'),`
			`('1', '(ﾟΘﾟ)'),`
			`('0', '(c^_^o)'),`
			`]`
			`delim = '(ﾟДﾟ)[ﾟεﾟ]+'`
			`ret = ''`
			`for aa_char in aa_code.split(delim):`
			`for val, pat in symbol_table:`
			`aa_char = aa_char.replace(pat, val)`
			`aa_char = aa_char.replace('+ ', '')`
			`m = re.match(r'^\d+', aa_char)`
			`if m:`
[compat] Remove more functions Removing any more will require changes to a large number of extractors 2022-06-24 08:10:17 +00:00			`ret += chr(int(m.group(0), 8))`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`else:`
			`m = re.match(r'^u([\da-f]+)', aa_char)`
			`if m:`
[compat] Remove more functions Removing any more will require changes to a large number of extractors 2022-06-24 08:10:17 +00:00			`ret += chr(int(m.group(1), 16))`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`return ret`


[gorillavid] Rename to xfileshare 2015-11-11 16:58:39 +00:00			`class XFileShareIE(InfoExtractor):`
[xfileshare] Refactor _VALID_URL and remove ded sites 2016-05-03 09:35:32 +00:00			`_SITES = (`
Update to ytdl-2021.01.08 2021-01-08 16:14:50 +00:00			`(r'aparat\.cam', 'Aparat'),`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`(r'clipwatching\.com', 'ClipWatching'),`
			`(r'gounlimited\.to', 'GoUnlimited'),`
			`(r'govid\.me', 'GoVid'),`
			`(r'holavid\.com', 'HolaVid'),`
			`(r'streamty\.com', 'Streamty'),`
[xfileshare] Add support for gorillavid.com and daclips.com (closes #12776) 2017-04-18 16:54:36 +00:00			`(r'thevideobee\.to', 'TheVideoBee'),`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`(r'uqload\.com', 'Uqload'),`
[xfileshare] Add support for gorillavid.com and daclips.com (closes #12776) 2017-04-18 16:54:36 +00:00			`(r'vidbom\.com', 'VidBom'),`
			`(r'vidlo\.us', 'vidlo'),`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`(r'vidlocker\.xyz', 'VidLocker'),`
			`(r'vidshare\.tv', 'VidShare'),`
			`(r'vup\.to', 'VUp'),`
Update to ytdl-commit-a726009 [blinkx] Remove extractor https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0 2021-05-06 16:01:20 +00:00			`(r'wolfstream\.tv', 'WolfStream'),`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`(r'xvideosharing\.com', 'XVideoSharing'),`
[xfileshare] Refactor _VALID_URL and remove ded sites 2016-05-03 09:35:32 +00:00			`)`

			`IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'`
[xfileshare] Add support for gorillavid.com and daclips.com (closes #12776) 2017-04-18 16:54:36 +00:00			`% '\|'.join(site for site in list(zip(*_SITES))[0]))`
[extractors] Use new framework for existing embeds (#4307) `Brightcove` is difficult to migrate because it's subclasses may depend on the signature of the current functions. So it is left as-is for now Note: Tests have not been migrated 2022-08-01 01:23:25 +00:00			`_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.?)\1' % '\|'.join(site for site in list(zip(_SITES))[0])]`
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00
[xfileshare] Improve removed videos detection 2016-06-12 18:19:54 +00:00			`_FILE_NOT_FOUND_REGEXES = (`
			`r'>(?:404 - )?File Not Found<',`
			`r'>The file was removed by administrator<',`
			`)`
[gorillavid] Add check for non existing videos 2014-10-05 18:48:01 +00:00
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00			`_TESTS = [{`
[extractor/xfileshare] Add Referer (#4494) Authored by: Galiley 2022-08-01 22:31:51 +00:00			`'url': 'https://uqload.com/dltx1wztngdz',`
			`'md5': '3cfbb65e4c90e93d7b37bcb65a595557',`
			`'info_dict': {`
			`'id': 'dltx1wztngdz',`
			`'ext': 'mp4',`
			`'title': 'Rick Astley Never Gonna Give You mp4',`
			`'thumbnail': r're:https://.*\.jpg'`
			`}`
			`}, {`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`'url': 'http://xvideosharing.com/fq65f94nd2ve',`
			`'md5': '4181f63957e8fe90ac836fa58dc3c8a6',`
[gorillavid] Add support for daclips.in (Closes #3213) 2014-07-11 09:05:16 +00:00			`'info_dict': {`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`'id': 'fq65f94nd2ve',`
[gorillavid] Add support for daclips.in (Closes #3213) 2014-07-11 09:05:16 +00:00			`'ext': 'mp4',`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`'title': 'sample',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 12:08:07 +00:00			`'thumbnail': r're:http://.*\.jpg',`
[xfileshare] Improve extraction and extract hls formats 2017-04-01 11:55:48 +00:00			`},`
Update to ytdl-2021.01.08 2021-01-08 16:14:50 +00:00			`}, {`
			`'url': 'https://aparat.cam/n4d6dh0wvlpr',`
			`'only_matching': True,`
Update to ytdl-commit-a726009 [blinkx] Remove extractor https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0 2021-05-06 16:01:20 +00:00			`}, {`
			`'url': 'https://wolfstream.tv/nthme29v9u2x',`
			`'only_matching': True,`
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00			`}]`
[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00
			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 01:41:24 +00:00			`host, video_id = self._match_valid_url(url).groups()`
[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)`
[gorillavid] Build correct URL 2015-09-06 01:23:53 +00:00			`webpage = self._download_webpage(url, video_id)`
[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00
[xfileshare] Improve removed videos detection 2016-06-12 18:19:54 +00:00			`if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):`
[gorillavid] Add check for non existing videos 2014-10-05 18:48:01 +00:00			`raise ExtractorError('Video %s does not exist' % video_id, expected=True)`

[extractor/common] Improve _form_hidden_inputs and rename to _hidden_inputs 2015-07-14 16:36:30 +00:00			`fields = self._hidden_inputs(webpage)`
PEP8 applied 2014-11-23 19:41:03 +00:00
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`if fields.get('op') == 'download1':`
[gorillavid] Generalize extraction with countdown timeout and support faststream.in (Closes #4297) 2014-11-26 14:02:40 +00:00			`countdown = int_or_none(self._search_regex(`
			`r'<span id="countdown_str">(?:[Ww]ait)?\s<span id="cxc">(\d+)</span>\s(?:seconds?)?</span>',`
			`webpage, 'countdown', default=None))`
			`if countdown:`
			`self._sleep(countdown, video_id)`

[xfileshare] Modernize and pass referrer 2017-06-11 17:14:04 +00:00			`webpage = self._download_webpage(`
			`url, video_id, 'Downloading video page',`
			`data=urlencode_postdata(fields), headers={`
			`'Referer': url,`
			`'Content-type': 'application/x-www-form-urlencoded',`
			`})`
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00
[gorillavid] Add test for vidto.me and strip title 2015-11-11 16:47:28 +00:00			`title = (self._search_regex(`
[xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) 2016-09-18 00:22:06 +00:00			`(r'style="z-index: [0-9]+;">([^<]+)</span>',`
[gorillavid] Add support for vidto.me 2015-11-11 16:44:03 +00:00			`r'<td nowrap>([^<]+)</td>',`
[xfileshare] Add support for powerwatch (Closes #8628) 2016-02-22 11:37:00 +00:00			`r'h4-fine[^>]*>([^<]+)<',`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`r'>Watch (.+)[ <]',`
[xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) 2016-09-18 00:22:06 +00:00			`r'<h2 class="video-page-head">([^<]+)</h2>',`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`r'<h2 style="[^"]color:#403f3d[^"]"[^>]*>([^<]+)<', # streamin.to`
			`r'title\s:\s"([^"]+)"'), # govid.me`
[xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) 2016-09-18 00:22:06 +00:00			`webpage, 'title', default=None) or self._og_search_title(`
			`webpage, default=None) or video_id).strip()`
[xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) 2016-06-12 18:11:04 +00:00
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`for regex, func in (`
			`(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),`
			`(r'(ﾟ.+)', aa_decode)):`
			`obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)`
			`if obf_code:`
			`webpage = webpage.replace(obf_code, func(obf_code))`

			`formats = []`

			`jwplayer_data = self._search_regex(`
			`[`
			`r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',`
			`r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',`
			`], webpage,`
			`'jwplayer data', default=None)`
			`if jwplayer_data:`
			`jwplayer_data = self._parse_json(`
			`jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)`
			`if jwplayer_data:`
			`formats = self._parse_jwplayer_data(`
			`jwplayer_data, video_id, False,`
			`m3u8_id='hls', mpd_id='dash')['formats']`

			`if not formats:`
[xfileshare] Improve extraction and extract hls formats 2017-04-01 11:55:48 +00:00			`urls = []`
			`for regex in (`
[xfileshare] Extend format regex (closes #13536) 2017-07-02 01:00:22 +00:00			`r'(?:file\|src)\s:\s(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8\|mp4\|flv)(?:(?!\1).)*)\1',`
[xfileshare] Improve extraction and extract hls formats 2017-04-01 11:55:48 +00:00			`r'file_link\s=\s(["\'])(?P<url>http(?:(?!\1).)+)\1',`
			`r'addVariable\((\\?["\'])file\1\s,\s(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',`
			`r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8\|mp4\|flv)(?:(?!\1).)*)\1'):`
			`for mobj in re.finditer(regex, webpage):`
			`video_url = mobj.group('url')`
			`if video_url not in urls:`
			`urls.append(video_url)`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00
			`sources = self._search_regex(`
			`r'sources\s:\s(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)`
			`if sources:`
			`urls.extend(self._parse_json(sources, video_id))`

[xfileshare] Improve extraction and extract hls formats 2017-04-01 11:55:48 +00:00			`formats = []`
			`for video_url in urls:`
			`if determine_ext(video_url) == 'm3u8':`
			`formats.extend(self._extract_m3u8_formats(`
			`video_url, video_id, 'mp4',`
			`entry_protocol='m3u8_native', m3u8_id='hls',`
			`fatal=False))`
			`else:`
			`formats.append({`
			`'url': video_url,`
			`'format_id': 'sd',`
			`})`
[xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) 2016-06-12 18:11:04 +00:00
[gorillavid] Generalize extraction with countdown timeout and support faststream.in (Closes #4297) 2014-11-26 14:02:40 +00:00			`thumbnail = self._search_regex(`
[xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 2019-10-17 12:26:45 +00:00			`[`
			`r'<video[^>]+poster="([^"]+)"',`
			`r'(?:image\|poster)\s:\s["\'](http[^"\']+)["\'],',`
			`], webpage, 'thumbnail', default=None)`
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00
			`return {`
[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00			`'id': video_id,`
			`'title': title,`
[GorillaVid] improve extractor 2014-06-17 13:18:46 +00:00			`'thumbnail': thumbnail,`
			`'formats': formats,`
[extractor/xfileshare] Add Referer (#4494) Authored by: Galiley 2022-08-01 22:31:51 +00:00			`'http_headers': {'Referer': url}`
[GorillaVid] Added GorillaVid extractor 2014-06-08 02:09:45 +00:00			`}`