yt-dlp/yt_dlp/extractor/dropbox.py

import os.path
import re

from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
    ExtractorError,
    traverse_obj,
    try_get,
    url_basename,
)


class DropboxIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
    _TESTS = [
        {
            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
            'info_dict': {
                'id': 'nelirfsxnmcfbfh',
                'ext': 'mp4',
                'title': 'youtube-dl test video \'ä"BaW_jenozKc'
            }
        }, {
            'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
            'only_matching': True,
        },
    ]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        fn = compat_urllib_parse_unquote(url_basename(url))
        title = os.path.splitext(fn)[0]

        password = self.get_param('videopassword')
        if (self._og_search_title(webpage) == 'Dropbox - Password Required'
                or 'Enter the password for this link' in webpage):

            if password:
                content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
                payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
                response = self._download_json(
                    'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'),
                    headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})

                if response.get('status') != 'authed':
                    raise ExtractorError('Authentication failed!', expected=True)
                webpage = self._download_webpage(url, video_id)
            elif self._get_cookies('https://dropbox.com').get('sm_auth'):
                webpage = self._download_webpage(url, video_id)
            else:
                raise ExtractorError('Password protected video, use --video-password <password>', expected=True)

        info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
                                      contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
        transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)

        # downloads enabled we can get the original file
        if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):
            video_url = re.sub(r'[?&]dl=0', '', url)
            video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
            formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles
        }
[dropbox] Correct test case (#2171) 2014-01-19 05:16:40 +00:00			`import os.path`
Added dropbox support. issue #2055 2014-01-18 15:15:53 +00:00			`import re`

			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 10:23:40 +00:00			`from ..compat import compat_urllib_parse_unquote`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 10:44:10 +00:00			`from ..utils import (`
			`ExtractorError,`
			`traverse_obj,`
			`try_get,`
			`url_basename,`
			`)`
Added dropbox support. issue #2055 2014-01-18 15:15:53 +00:00
[dropbox] PEP8 and simplify (#2171) 2014-01-19 05:14:24 +00:00
Added support for Dropbox 2014-01-19 04:50:26 +00:00			`class DropboxIE(InfoExtractor):`
[dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. 2014-09-21 11:40:22 +00:00			`_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`_TESTS = [`
			`{`
Completely change project name to yt-dlp (#85) * All modules and binary names are changed * All documentation references changed * yt-dlp no longer loads youtube-dlc config files * All URLs changed to point to organization account Co-authored-by: Pccode66 Co-authored-by: pukkandan 2021-02-24 18:45:56 +00:00			`'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`'info_dict': {`
			`'id': 'nelirfsxnmcfbfh',`
			`'ext': 'mp4',`
Completely change project name to yt-dlp (#85) * All modules and binary names are changed * All documentation references changed * yt-dlp no longer loads youtube-dlc config files * All URLs changed to point to organization account Co-authored-by: Pccode66 Co-authored-by: pukkandan 2021-02-24 18:45:56 +00:00			`'title': 'youtube-dl test video \'ä"BaW_jenozKc'`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`}`
			`}, {`
			`'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',`
			`'only_matching': True,`
			`},`
[dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. 2014-09-21 11:40:22 +00:00			`]`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 05:14:24 +00:00
			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 01:41:24 +00:00			`mobj = self._match_valid_url(url)`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 05:14:24 +00:00			`video_id = mobj.group('id')`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 10:44:10 +00:00			`webpage = self._download_webpage(url, video_id)`
[dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. 2014-09-21 11:40:22 +00:00			`fn = compat_urllib_parse_unquote(url_basename(url))`
[dropbox] Fix test and add support for spaces in filenames 2014-07-21 10:57:40 +00:00			`title = os.path.splitext(fn)[0]`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 10:44:10 +00:00
			`password = self.get_param('videopassword')`
			`if (self._og_search_title(webpage) == 'Dropbox - Password Required'`
			`or 'Enter the password for this link' in webpage):`

			`if password:`
			`content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')`
			`payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'`
			`response = self._download_json(`
			`'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'),`
			`headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})`

			`if response.get('status') != 'authed':`
			`raise ExtractorError('Authentication failed!', expected=True)`
			`webpage = self._download_webpage(url, video_id)`
			`elif self._get_cookies('https://dropbox.com').get('sm_auth'):`
			`webpage = self._download_webpage(url, video_id)`
			`else:`
			`raise ExtractorError('Password protected video, use --video-password <password>', expected=True)`

[extractor/dropbox] Extract the correct `mountComponent` 2022-06-19 01:15:35 +00:00			`info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,`
[extractor] Make search_json able to parse lists Now `contains_pattern` can be set to `\[.+\]` 2022-10-03 11:20:27 +00:00			`contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 10:44:10 +00:00			`transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)`
			`formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)`

			`# downloads enabled we can get the original file`
			`if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):`
			`video_url = re.sub(r'[?&]dl=0', '', url)`
			`video_url += ('?' if '?' not in video_url else '&') + 'dl=1'`
			`formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 05:14:24 +00:00
			`return {`
			`'id': video_id,`
			`'title': title,`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 10:44:10 +00:00			`'formats': formats,`
			`'subtitles': subtitles`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 05:14:24 +00:00			`}`