[ie/vimeo] Fix password-protected video extraction (#10341)

Closes #6603 Authored by: bashonly
2024-12-22 06:00:00 +00:00 · 2024-07-05 13:32:53 -05:00 · 2024-07-05 13:32:53 -05:00 · c1c9bb4adb
commit c1c9bb4adb
parent 6075a029db
1 changed files with 44 additions and 43 deletions
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@ -1,6 +1,7 @@
 import base64
 import functools
 import itertools
+import json
 import re
 import urllib.parse

@ -14,6 +15,7 @@
    determine_ext,
    get_element_by_class,
    int_or_none,
+    join_nonempty,
    js_to_json,
    merge_dicts,
    parse_filesize,
@ -84,29 +86,23 @@ def _get_video_password(self):
                expected=True)
        return password

-    def _verify_video_password(self, url, video_id, password, token, vuid):
-        if url.startswith('http://'):
-            # vimeo only supports https now, but the user can give an http url
-            url = url.replace('http://', 'https://')
-        self._set_vimeo_cookie('vuid', vuid)
-        return self._download_webpage(
-            url + '/password', video_id, 'Verifying the password',
-            'Wrong password', data=urlencode_postdata({
-                'password': password,
-                'token': token,
-            }), headers={
-                'Content-Type': 'application/x-www-form-urlencoded',
-                'Referer': url,
-            })
-
-    def _extract_xsrft_and_vuid(self, webpage):
-        xsrft = self._search_regex(
-            r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
-            webpage, 'login token', group='xsrft')
-        vuid = self._search_regex(
-            r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
-            webpage, 'vuid', group='vuid')
-        return xsrft, vuid
+    def _verify_video_password(self, video_id, password, token):
+        url = f'https://vimeo.com/{video_id}'
+        try:
+            return self._download_webpage(
+                f'{url}/password', video_id,
+                'Submitting video password', data=json.dumps({
+                    'password': password,
+                    'token': token,
+                }, separators=(',', ':')).encode(), headers={
+                    'Accept': '*/*',
+                    'Content-Type': 'application/json',
+                    'Referer': url,
+                }, impersonate=True)
+        except ExtractorError as error:
+            if isinstance(error.cause, HTTPError) and error.cause.status == 418:
+                raise ExtractorError('Wrong password', expected=True)
+            raise

    def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
        vimeo_config = self._search_regex(
@ -745,21 +741,34 @@ def _verify_player_video_password(self, url, video_id, headers):
            raise ExtractorError('Wrong video password', expected=True)
        return checked

-    def _extract_from_api(self, video_id, unlisted_hash=None):
-        token = self._download_json(
-            'https://vimeo.com/_rv/jwt', video_id, headers={
-                'X-Requested-With': 'XMLHttpRequest',
-            })['token']
-        api_url = 'https://api.vimeo.com/videos/' + video_id
-        if unlisted_hash:
-            api_url += ':' + unlisted_hash
-        video = self._download_json(
-            api_url, video_id, headers={
-                'Authorization': 'jwt ' + token,
+    def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
+        return self._download_json(
+            join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
+            video_id, 'Downloading API JSON', headers={
+                'Authorization': f'jwt {jwt_token}',
                'Accept': 'application/json',
            }, query={
                'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
            })
+
+    def _extract_from_api(self, video_id, unlisted_hash=None):
+        viewer = self._download_json(
+            'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
+
+        for retry in (False, True):
+            try:
+                video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
+            except ExtractorError as e:
+                if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
+                    and 'password' in traverse_obj(
+                        e.cause.response.read(),
+                        ({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
+                )):
+                    self._verify_video_password(
+                        video_id, self._get_video_password(), viewer['xsrft'])
+                    continue
+                raise
+
        info = self._parse_config(self._download_json(
            video['config_url'], video_id), video_id)
        get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
@ -865,12 +874,6 @@ def _real_extract(self, url):
                    redirect_url, video_id, headers)
            return self._parse_config(config, video_id)

-        if re.search(r'<form[^>]+?id="pw_form"', webpage):
-            video_password = self._get_video_password()
-            token, vuid = self._extract_xsrft_and_vuid(webpage)
-            webpage = self._verify_video_password(
-                redirect_url, video_id, video_password, token, vuid)
-
        vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
        if vimeo_config:
            seed_status = vimeo_config.get('seed_status') or {}
@ -1290,9 +1293,7 @@ def _real_extract(self, url):
            video_password = self._get_video_password()
            viewer = self._download_json(
                'https://vimeo.com/_rv/viewer', video_id)
-            webpage = self._verify_video_password(
-                'https://vimeo.com/' + video_id, video_id,
-                video_password, viewer['xsrft'], viewer['vuid'])
+            webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
            clip_page_config = self._parse_json(self._search_regex(
                r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
                webpage, 'clip page config'), video_id)