[extractor/cda] Support premium and misc improvements (#5529)

* Fix cache for non-ASCII key * Improve error messages * Better UA for fingerprint bypass Authored by: selfisekai
2024-12-22 06:00:00 +00:00 · 2022-12-27 20:57:26 +01:00 · 2022-12-27 20:57:26 +01:00 · da8d2de208
commit da8d2de208
parent 15e9e578c0
2 changed files with 44 additions and 12 deletions
--- a/yt_dlp/cache.py
+++ b/yt_dlp/cache.py
@ -5,6 +5,7 @@
 import re
 import shutil
 import traceback
+import urllib.parse

 from .utils import expand_path, traverse_obj, version_tuple, write_json_file
 from .version import __version__
@ -22,11 +23,9 @@ def _get_root_dir(self):
        return expand_path(res)

    def _get_cache_fn(self, section, key, dtype):
-        assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
-            'invalid section %r' % section
-        assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
-        return os.path.join(
-            self._get_root_dir(), section, f'{key}.{dtype}')
+        assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
+        key = urllib.parse.quote(key, safe='').replace('%', ',')  # encode non-ascii characters
+        return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')

    @property
    def enabled(self):
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@ -4,6 +4,7 @@
 import hashlib
 import hmac
 import json
+import random
 import re

 from .common import InfoExtractor
@ -27,11 +28,10 @@ class CDAIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
    _NETRC_MACHINE = 'cdapl'

-    _BASE_URL = 'http://www.cda.pl/'
+    _BASE_URL = 'https://www.cda.pl'
    _BASE_API_URL = 'https://api.cda.pl'
    _API_HEADERS = {
        'Accept': 'application/vnd.cda.public+json',
-        'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
    }
    # hardcoded in the app
    _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
@ -101,6 +101,38 @@ def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
            }, **kwargs)

    def _perform_login(self, username, password):
+        app_version = random.choice((
+            '1.2.88 build 15306',
+            '1.2.174 build 18469',
+        ))
+        android_version = random.randrange(8, 14)
+        phone_model = random.choice((
+            # x-kom.pl top selling Android smartphones, as of 2022-12-26
+            # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
+            'ASUS ZenFone 8',
+            'Motorola edge 20 5G',
+            'Motorola edge 30 neo 5G',
+            'Motorola moto g22',
+            'OnePlus Nord 2T 5G',
+            'Samsung Galaxy A32 SM‑A325F',
+            'Samsung Galaxy M13',
+            'Samsung Galaxy S20 FE 5G',
+            'Xiaomi 11T',
+            'Xiaomi POCO M4 Pro',
+            'Xiaomi Redmi 10',
+            'Xiaomi Redmi 10C',
+            'Xiaomi Redmi 9C NFC',
+            'Xiaomi Redmi Note 10 Pro',
+            'Xiaomi Redmi Note 11 Pro',
+            'Xiaomi Redmi Note 11',
+            'Xiaomi Redmi Note 11S 5G',
+            'Xiaomi Redmi Note 11S',
+            'realme 10',
+            'realme 9 Pro+',
+            'vivo Y33s',
+        ))
+        self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
+
        cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
        if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
            self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
@ -138,9 +170,6 @@ def _api_extract(self, video_id):
        meta = self._download_json(
            f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']

-        if meta.get('premium') and not meta.get('premium_free'):
-            self.report_drm(video_id)
-
        uploader = traverse_obj(meta, 'author', 'login')

        formats = [{
@ -151,6 +180,10 @@ def _api_extract(self, video_id):
            'filesize': quality.get('length'),
        } for quality in meta['qualities'] if quality.get('file')]

+        if meta.get('premium') and not meta.get('premium_free') and not formats:
+            raise ExtractorError(
+                'Video requires CDA Premium - subscription needed', expected=True)
+
        return {
            'id': video_id,
            'title': meta.get('title'),
@ -167,10 +200,10 @@ def _api_extract(self, video_id):
    def _web_extract(self, video_id, url):
        self._set_cookie('cda.pl', 'cda.player', 'html5')
        webpage = self._download_webpage(
-            self._BASE_URL + '/video/' + video_id, video_id)
+            f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)

        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
-            raise ExtractorError('This video is only available for premium users.', expected=True)
+            self.raise_login_required('This video is only available for premium users')

        if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
            self.raise_geo_restricted()