yt-dlp/yt_dlp/extractor/bigflix.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_b64decode,
    compat_urllib_parse_unquote,
)


class BigflixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
    _TESTS = [{
        # 2 formats
        'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
        'info_dict': {
            'id': '16070',
            'ext': 'mp4',
            'title': 'Madarasapatinam',
            'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
            'formats': 'mincount:2',
        },
        'params': {
            'skip_download': True,
        }
    }, {
        # multiple formats
        'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
            r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
            webpage, 'title')

        def decode_url(quoted_b64_url):
            return compat_b64decode(compat_urllib_parse_unquote(
                quoted_b64_url)).decode('utf-8')

        formats = []
        for height, encoded_url in re.findall(
                r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
            video_url = decode_url(encoded_url)
            f = {
                'url': video_url,
                'format_id': '%sp' % height,
                'height': int(height),
            }
            if video_url.startswith('rtmp'):
                f['ext'] = 'flv'
            formats.append(f)

        file_url = self._search_regex(
            r'file=([^&]+)', webpage, 'video url', default=None)
        if file_url:
            video_url = decode_url(file_url)
            if all(f['url'] != video_url for f in formats):
                formats.append({
                    'url': decode_url(file_url),
                })

        self._sort_formats(formats)

        description = self._html_search_meta('description', webpage)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats
        }
[Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. 2016-01-09 23:31:50 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`import re`
[Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. 2016-01-09 23:31:50 +00:00
			`from .common import InfoExtractor`
Switch codebase to use compat_b64decode 2018-01-23 15:23:12 +00:00			`from ..compat import (`
			`compat_b64decode,`
			`compat_urllib_parse_unquote,`
			`)`
[Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. 2016-01-09 23:31:50 +00:00

			`class BigflixIE(InfoExtractor):`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'`
			`_TESTS = [{`
[bigflix] Improve formats extraction 2016-01-10 04:49:27 +00:00			`# 2 formats`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',`
			`'info_dict': {`
			`'id': '16070',`
			`'ext': 'mp4',`
			`'title': 'Madarasapatinam',`
[bigflix] Update tests 2016-07-26 14:44:53 +00:00			`'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`'formats': 'mincount:2',`
			`},`
			`'params': {`
			`'skip_download': True,`
			`}`
[bigflix] Improve formats extraction 2016-01-10 04:49:27 +00:00			`}, {`
			`# multiple formats`
			`'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',`
			`'only_matching': True,`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`}]`
[Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. 2016-01-09 23:31:50 +00:00
			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

			`webpage = self._download_webpage(url, video_id)`

			`title = self._html_search_regex(`
			`r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',`
			`webpage, 'title')`

[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`def decode_url(quoted_b64_url):`
Switch codebase to use compat_b64decode 2018-01-23 15:23:12 +00:00			`return compat_b64decode(compat_urllib_parse_unquote(`
			`quoted_b64_url)).decode('utf-8')`
[bigflix] Improve formats extraction 2016-01-10 04:49:27 +00:00
			`formats = []`
			`for height, encoded_url in re.findall(`
[bigflix] Use correct indentation to make flake8 happy 2016-01-10 13:26:27 +00:00			`r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):`
[bigflix] Improve formats extraction 2016-01-10 04:49:27 +00:00			`video_url = decode_url(encoded_url)`
			`f = {`
			`'url': video_url,`
			`'format_id': '%sp' % height,`
			`'height': int(height),`
			`}`
			`if video_url.startswith('rtmp'):`
			`f['ext'] = 'flv'`
			`formats.append(f)`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00
[bigflix] Improve formats extraction 2016-01-10 04:49:27 +00:00			`file_url = self._search_regex(`
			`r'file=([^&]+)', webpage, 'video url', default=None)`
			`if file_url:`
			`video_url = decode_url(file_url)`
			`if all(f['url'] != video_url for f in formats):`
			`formats.append({`
			`'url': decode_url(file_url),`
			`})`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00
[bigflix] Improve formats extraction 2016-01-10 04:49:27 +00:00			`self._sort_formats(formats)`
[Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. 2016-01-09 23:31:50 +00:00
			`description = self._html_search_meta('description', webpage)`

			`return {`
			`'id': video_id,`
			`'title': title,`
			`'description': description,`
[bigflix] Extract all formats 2016-01-10 04:31:36 +00:00			`'formats': formats`
[Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. 2016-01-09 23:31:50 +00:00			`}`