[ie/youtube] Remove `android` from default clients (#9553 )

Closes #9554 Authored by: coletdjnz, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
[ie/youtube] Fix comments extraction (#9775 )
2024-05-17 16:03:02 +00:00 · 2024-05-17 14:37:30 +00:00 · 2024-05-17 14:33:12 +00:00 · 2024-05-17 14:28:36 +00:00 · 2024-05-17 06:20:13 +00:00 · 2024-05-16 22:41:34 +00:00
65 changed files with 2091 additions and 1543 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -12,6 +12,9 @@ on:
      unix:
        default: true
        type: boolean
+      linux_static:
+        default: true
+        type: boolean
      linux_arm:
        default: true
        type: boolean
@ -27,9 +30,6 @@ on:
      windows32:
        default: true
        type: boolean
-      meta_files:
-        default: true
-        type: boolean
      origin:
        required: false
        default: ''
@ -52,7 +52,11 @@ on:
        default: stable
        type: string
      unix:
-        description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip
+        description: yt-dlp, yt-dlp.tar.gz
+        default: true
+        type: boolean
+      linux_static:
+        description: yt-dlp_linux
        default: true
        type: boolean
      linux_arm:
@ -75,10 +79,6 @@ on:
        description: yt-dlp_x86.exe
        default: true
        type: boolean
-      meta_files:
-        description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
-        default: true
-        type: boolean
      origin:
        description: Origin
        required: false
@ -112,27 +112,9 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: "3.10"
-      - uses: conda-incubator/setup-miniconda@v3
-        with:
-          miniforge-variant: Mambaforge
-          use-mamba: true
-          channels: conda-forge
-          auto-update-conda: true
-          activate-environment: ""
-          auto-activate-base: false
      - name: Install Requirements
        run: |
          sudo apt -y install zip pandoc man sed
-          cat > ./requirements.txt << EOF
-          python=3.10.*
-          pyinstaller
-          brotli-python
-          EOF
-          python devscripts/install_deps.py --print \
-            --exclude brotli --exclude brotlicffi \
-            --include secretstorage >> ./requirements.txt
-          mamba create -n build --file ./requirements.txt
-
      - name: Prepare
        run: |
          python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
@ -141,30 +123,15 @@ jobs:
      - name: Build Unix platform-independent binary
        run: |
          make all tar
-      - name: Build Unix standalone binary
-        shell: bash -l {0}
-        run: |
-          unset LD_LIBRARY_PATH  # Harmful; set by setup-python
-          conda activate build
-          python -m bundle.pyinstaller --onedir
-          (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
-          python -m bundle.pyinstaller
-          mv ./dist/yt-dlp_linux ./yt-dlp_linux
-          mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
-
      - name: Verify --update-to
        if: vars.UPDATE_TO_VERIFICATION
        run: |
-          binaries=("yt-dlp" "yt-dlp_linux")
-          for binary in "${binaries[@]}"; do
-            chmod +x ./${binary}
-            cp ./${binary} ./${binary}_downgraded
-            version="$(./${binary} --version)"
-            ./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
-            downgraded_version="$(./${binary}_downgraded --version)"
-            [[ "$version" != "$downgraded_version" ]]
-          done
-
+          chmod +x ./yt-dlp
+          cp ./yt-dlp ./yt-dlp_downgraded
+          version="$(./yt-dlp --version)"
+          ./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+          downgraded_version="$(./yt-dlp_downgraded --version)"
+          [[ "$version" != "$downgraded_version" ]]
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
@ -172,8 +139,39 @@ jobs:
          path: |
            yt-dlp
            yt-dlp.tar.gz
-            yt-dlp_linux
-            yt-dlp_linux.zip
+          compression-level: 0
+
+  linux_static:
+    needs: process
+    if: inputs.linux_static
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build static executable
+        env:
+          channel: ${{ inputs.channel }}
+          origin: ${{ needs.process.outputs.origin }}
+          version: ${{ inputs.version }}
+        run: |
+          mkdir ~/build
+          cd bundle/docker
+          docker compose up --build static
+          sudo chown "${USER}:docker" ~/build/yt-dlp_linux
+      - name: Verify --update-to
+        if: vars.UPDATE_TO_VERIFICATION
+        run: |
+          chmod +x ~/build/yt-dlp_linux
+          cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded
+          version="$(~/build/yt-dlp_linux --version)"
+          ~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+          downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)"
+          [[ "$version" != "$downgraded_version" ]]
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-bin-${{ github.job }}
+          path: |
+            ~/build/yt-dlp_linux
          compression-level: 0

  linux_arm:
@ -254,7 +252,7 @@ jobs:
          # We need to fuse our own universal2 wheels for curl_cffi
          python3 -m pip install -U --user delocate
          mkdir curl_cffi_whls curl_cffi_universal2
-          python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
+          python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
          for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
            python3 -m pip download \
              --only-binary=:all: \
@ -300,7 +298,7 @@ jobs:
  macos_legacy:
    needs: process
    if: inputs.macos_legacy
-    runs-on: macos-latest
+    runs-on: macos-12

    steps:
      - uses: actions/checkout@v4
@ -362,7 +360,7 @@ jobs:
      - name: Install Requirements
        run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
          python devscripts/install_deps.py -o --include build
-          python devscripts/install_deps.py --include py2exe --include curl_cffi
+          python devscripts/install_deps.py --include py2exe --include curl-cffi
          python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"

      - name: Prepare
@ -447,10 +445,11 @@ jobs:
          compression-level: 0

  meta_files:
-    if: inputs.meta_files && always() && !cancelled()
+    if: always() && !cancelled()
    needs:
      - process
      - unix
+      - linux_static
      - linux_arm
      - macos
      - macos_legacy
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -53,7 +53,7 @@ jobs:
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install test requirements
-      run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
+      run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
    - name: Run tests
      continue-on-error: False
      run: |
--- a/README.md
+++ b/README.md
@ -202,7 +202,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
 The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. 

 * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
-  * Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
+  * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
  * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds


@ -666,7 +666,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
                                    The name of the browser to load cookies
                                    from. Currently supported browsers are:
                                    brave, chrome, chromium, edge, firefox,
-                                    opera, safari, vivaldi. Optionally, the
+                                    opera, safari, vivaldi, whale. Optionally, the
                                    KEYRING used for decrypting Chromium cookies
                                    on Linux, the name/path of the PROFILE to
                                    load cookies from, and the CONTAINER name
@ -1760,7 +1760,7 @@ The following extractors use this feature:
 #### youtube
 * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
 * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
 * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
 * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
 * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@ -1813,8 +1813,8 @@ The following extractors use this feature:
 * `app_name`: Default app name to use with mobile API calls, e.g. `trill`
 * `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
 * `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
-* `aid`: Default app ID to use with API calls, e.g. `1180`
-* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
+* `aid`: Default app ID to use with mobile API calls, e.g. `1180`
+* `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`

 #### rokfinchannel
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
@ -1837,6 +1837,9 @@ The following extractors use this feature:
 #### jiosaavn
 * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`

+#### afreecatvlive
+* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
+
 **Note**: These options may be changed/removed in the future without concern for backward compatibility

 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
--- a/bundle/docker/compose.yml
+++ b/bundle/docker/compose.yml
@ -0,0 +1,10 @@
+services:
+  static:
+    build: static
+    environment:
+      channel: ${channel}
+      origin: ${origin}
+      version: ${version}
+    volumes:
+      - ~/build:/build
+      - ../..:/yt-dlp
--- a/bundle/docker/static/Dockerfile
+++ b/bundle/docker/static/Dockerfile
@ -0,0 +1,21 @@
+FROM alpine:3.19 as base
+
+RUN apk --update add --no-cache \
+        build-base \
+        python3 \
+        pipx \
+    ;
+
+RUN pipx install pyinstaller
+# Requires above step to prepare the shared venv
+RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel
+RUN apk --update add --no-cache \
+        scons \
+        patchelf \
+        binutils \
+    ;
+RUN pipx install staticx
+
+WORKDIR /yt-dlp
+COPY entrypoint.sh /entrypoint.sh
+ENTRYPOINT /entrypoint.sh
--- a/bundle/docker/static/entrypoint.sh
+++ b/bundle/docker/static/entrypoint.sh
@ -0,0 +1,13 @@
+#!/bin/ash
+set -e
+
+source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
+python -m devscripts.install_deps --include secretstorage
+python -m devscripts.make_lazy_extractors
+python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
+python -m bundle.pyinstaller
+deactivate
+
+source ~/.local/share/pipx/venvs/staticx/bin/activate
+staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux
+deactivate
--- a/pyproject.toml
+++ b/pyproject.toml
@ -53,7 +53,7 @@ dependencies = [

 [project.optional-dependencies]
 default = []
-curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
+curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
 secretstorage = [
    "cffi",
    "secretstorage",
--- a/test/conftest.py
+++ b/test/conftest.py
@ -1,4 +1,3 @@
-import functools
 import inspect

 import pytest
@ -10,7 +9,9 @@ from yt_dlp.utils._utils import _YDLLogger as FakeLogger

@pytest.fixture
 def handler(request):
-    RH_KEY = request.param
+    RH_KEY = getattr(request, 'param', None)
+    if not RH_KEY:
+        return
    if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
        handler = RH_KEY
    elif RH_KEY in _REQUEST_HANDLERS:
@ -18,9 +19,46 @@ def handler(request):
    else:
        pytest.skip(f'{RH_KEY} request handler is not available')

-    return functools.partial(handler, logger=FakeLogger)
+    class HandlerWrapper(handler):
+        RH_KEY = handler.RH_KEY
+
+        def __init__(self, *args, **kwargs):
+            super().__init__(logger=FakeLogger, *args, **kwargs)
+
+    return HandlerWrapper


-def validate_and_send(rh, req):
-    rh.validate(req)
-    return rh.send(req)
+@pytest.fixture(autouse=True)
+def skip_handler(request, handler):
+    """usage: pytest.mark.skip_handler('my_handler', 'reason')"""
+    for marker in request.node.iter_markers('skip_handler'):
+        if marker.args[0] == handler.RH_KEY:
+            pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
+
+
+@pytest.fixture(autouse=True)
+def skip_handler_if(request, handler):
+    """usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')"""
+    for marker in request.node.iter_markers('skip_handler_if'):
+        if marker.args[0] == handler.RH_KEY and marker.args[1](request):
+            pytest.skip(marker.args[2] if len(marker.args) > 2 else '')
+
+
+@pytest.fixture(autouse=True)
+def skip_handlers_if(request, handler):
+    """usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')"""
+    for marker in request.node.iter_markers('skip_handlers_if'):
+        if handler and marker.args[0](request, handler):
+            pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "skip_handler(handler): skip test for the given handler",
+    )
+    config.addinivalue_line(
+        "markers", "skip_handler_if(handler): skip test for the given handler if condition is true"
+    )
+    config.addinivalue_line(
+        "markers", "skip_handlers_if(handler): skip test for handlers when the condition is true"
+    )
--- a/test/helper.py
+++ b/test/helper.py
@ -338,3 +338,8 @@ def http_server_port(httpd):
 def verify_address_availability(address):
    if find_available_port(address) is None:
        pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
+
+
+def validate_and_send(rh, req):
+    rh.validate(req)
+    return rh.send(req)
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -1906,6 +1906,15 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
            expected_status=TEAPOT_RESPONSE_STATUS)
        self.assertEqual(content, TEAPOT_RESPONSE_BODY)

+    def test_search_nextjs_data(self):
+        data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
+        self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
+        self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
+        self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
+        self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
+        with self.assertRaises(DeprecationWarning):
+            self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_http_proxy.py
+++ b/test/test_http_proxy.py
@ -0,0 +1,379 @@
+import abc
+import base64
+import contextlib
+import functools
+import json
+import os
+import random
+import ssl
+import threading
+from http.server import BaseHTTPRequestHandler
+from socketserver import ThreadingTCPServer
+
+import pytest
+
+from test.helper import http_server_port, verify_address_availability
+from test.test_networking import TEST_DIR
+from test.test_socks import IPv6ThreadingTCPServer
+from yt_dlp.dependencies import urllib3
+from yt_dlp.networking import Request
+from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
+
+
+class HTTPProxyAuthMixin:
+
+    def proxy_auth_error(self):
+        self.send_response(407)
+        self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
+        self.end_headers()
+        return False
+
+    def do_proxy_auth(self, username, password):
+        if username is None and password is None:
+            return True
+
+        proxy_auth_header = self.headers.get('Proxy-Authorization', None)
+        if proxy_auth_header is None:
+            return self.proxy_auth_error()
+
+        if not proxy_auth_header.startswith('Basic '):
+            return self.proxy_auth_error()
+
+        auth = proxy_auth_header[6:]
+
+        try:
+            auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
+        except Exception:
+            return self.proxy_auth_error()
+
+        if auth_username != (username or '') or auth_password != (password or ''):
+            return self.proxy_auth_error()
+        return True
+
+
+class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
+    def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
+        self.username = username
+        self.password = password
+        self.proxy_info = proxy_info
+        super().__init__(*args, **kwargs)
+
+    def do_GET(self):
+        if not self.do_proxy_auth(self.username, self.password):
+            self.server.close_request(self.request)
+            return
+        if self.path.endswith('/proxy_info'):
+            payload = json.dumps(self.proxy_info or {
+                'client_address': self.client_address,
+                'connect': False,
+                'connect_host': None,
+                'connect_port': None,
+                'headers': dict(self.headers),
+                'path': self.path,
+                'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
+            })
+            self.send_response(200)
+            self.send_header('Content-Type', 'application/json; charset=utf-8')
+            self.send_header('Content-Length', str(len(payload)))
+            self.end_headers()
+            self.wfile.write(payload.encode())
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+        self.server.close_request(self.request)
+
+
+if urllib3:
+    import urllib3.util.ssltransport
+
+    class SSLTransport(urllib3.util.ssltransport.SSLTransport):
+        """
+        Modified version of urllib3 SSLTransport to support server side SSL
+
+        This allows us to chain multiple TLS connections.
+        """
+        def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
+            self.incoming = ssl.MemoryBIO()
+            self.outgoing = ssl.MemoryBIO()
+
+            self.suppress_ragged_eofs = suppress_ragged_eofs
+            self.socket = socket
+
+            self.sslobj = ssl_context.wrap_bio(
+                self.incoming,
+                self.outgoing,
+                server_hostname=server_hostname,
+                server_side=server_side
+            )
+            self._ssl_io_loop(self.sslobj.do_handshake)
+
+        @property
+        def _io_refs(self):
+            return self.socket._io_refs
+
+        @_io_refs.setter
+        def _io_refs(self, value):
+            self.socket._io_refs = value
+
+        def shutdown(self, *args, **kwargs):
+            self.socket.shutdown(*args, **kwargs)
+else:
+    SSLTransport = None
+
+
+class HTTPSProxyHandler(HTTPProxyHandler):
+    def __init__(self, request, *args, **kwargs):
+        certfn = os.path.join(TEST_DIR, 'testcert.pem')
+        sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+        sslctx.load_cert_chain(certfn, None)
+        if isinstance(request, ssl.SSLSocket):
+            request = SSLTransport(request, ssl_context=sslctx, server_side=True)
+        else:
+            request = sslctx.wrap_socket(request, server_side=True)
+        super().__init__(request, *args, **kwargs)
+
+
+class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
+    protocol_version = 'HTTP/1.1'
+    default_request_version = 'HTTP/1.1'
+
+    def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
+        self.username = username
+        self.password = password
+        self.request_handler = request_handler
+        super().__init__(*args, **kwargs)
+
+    def do_CONNECT(self):
+        if not self.do_proxy_auth(self.username, self.password):
+            self.server.close_request(self.request)
+            return
+        self.send_response(200)
+        self.end_headers()
+        proxy_info = {
+            'client_address': self.client_address,
+            'connect': True,
+            'connect_host': self.path.split(':')[0],
+            'connect_port': int(self.path.split(':')[1]),
+            'headers': dict(self.headers),
+            'path': self.path,
+            'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
+        }
+        self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
+        self.server.close_request(self.request)
+
+
+class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
+    def __init__(self, request, *args, **kwargs):
+        certfn = os.path.join(TEST_DIR, 'testcert.pem')
+        sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+        sslctx.load_cert_chain(certfn, None)
+        request = sslctx.wrap_socket(request, server_side=True)
+        self._original_request = request
+        super().__init__(request, *args, **kwargs)
+
+    def do_CONNECT(self):
+        super().do_CONNECT()
+        self.server.close_request(self._original_request)
+
+
+@contextlib.contextmanager
+def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
+    server = server_thread = None
+    try:
+        bind_address = bind_ip or '127.0.0.1'
+        server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
+        server = server_type(
+            (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
+        server_port = http_server_port(server)
+        server_thread = threading.Thread(target=server.serve_forever)
+        server_thread.daemon = True
+        server_thread.start()
+        if '.' not in bind_address:
+            yield f'[{bind_address}]:{server_port}'
+        else:
+            yield f'{bind_address}:{server_port}'
+    finally:
+        server.shutdown()
+        server.server_close()
+        server_thread.join(2.0)
+
+
+class HTTPProxyTestContext(abc.ABC):
+    REQUEST_HANDLER_CLASS = None
+    REQUEST_PROTO = None
+
+    def http_server(self, server_class, *args, **kwargs):
+        return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
+
+    @abc.abstractmethod
+    def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
+        """return a dict of proxy_info"""
+
+
+class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
+    # Standard HTTP Proxy for http requests
+    REQUEST_HANDLER_CLASS = HTTPProxyHandler
+    REQUEST_PROTO = 'http'
+
+    def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+        request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
+        handler.validate(request)
+        return json.loads(handler.send(request).read().decode())
+
+
+class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
+    # HTTP Connect proxy, for https requests
+    REQUEST_HANDLER_CLASS = HTTPSProxyHandler
+    REQUEST_PROTO = 'https'
+
+    def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+        request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
+        handler.validate(request)
+        return json.loads(handler.send(request).read().decode())
+
+
+CTX_MAP = {
+    'http': HTTPProxyHTTPTestContext,
+    'https': HTTPProxyHTTPSTestContext,
+}
+
+
+@pytest.fixture(scope='module')
+def ctx(request):
+    return CTX_MAP[request.param]()
+
+
+@pytest.mark.parametrize(
+    'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
+@pytest.mark.parametrize('ctx', ['http'], indirect=True)  # pure http proxy can only support http
+class TestHTTPProxy:
+    def test_http_no_auth(self, handler, ctx):
+        with ctx.http_server(HTTPProxyHandler) as server_address:
+            with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['connect'] is False
+                assert 'Proxy-Authorization' not in proxy_info['headers']
+
+    def test_http_auth(self, handler, ctx):
+        with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
+            with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert 'Proxy-Authorization' in proxy_info['headers']
+
+    def test_http_bad_auth(self, handler, ctx):
+        with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
+            with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
+                with pytest.raises(HTTPError) as exc_info:
+                    ctx.proxy_info_request(rh)
+                assert exc_info.value.response.status == 407
+                exc_info.value.response.close()
+
+    def test_http_source_address(self, handler, ctx):
+        with ctx.http_server(HTTPProxyHandler) as server_address:
+            source_address = f'127.0.0.{random.randint(5, 255)}'
+            verify_address_availability(source_address)
+            with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
+                         source_address=source_address) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['client_address'][0] == source_address
+
+    @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
+    def test_https(self, handler, ctx):
+        with ctx.http_server(HTTPSProxyHandler) as server_address:
+            with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['connect'] is False
+                assert 'Proxy-Authorization' not in proxy_info['headers']
+
+    @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
+    def test_https_verify_failed(self, handler, ctx):
+        with ctx.http_server(HTTPSProxyHandler) as server_address:
+            with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+                # Accept SSLError as may not be feasible to tell if it is proxy or request error.
+                # note: if request proto also does ssl verification, this may also be the error of the request.
+                # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
+                with pytest.raises((ProxyError, SSLError)):
+                    ctx.proxy_info_request(rh)
+
+    def test_http_with_idn(self, handler, ctx):
+        with ctx.http_server(HTTPProxyHandler) as server_address:
+            with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
+                assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
+
+
+@pytest.mark.parametrize(
+    'handler,ctx', [
+        ('Requests', 'https'),
+        ('CurlCFFI', 'https'),
+    ], indirect=True)
+class TestHTTPConnectProxy:
+    def test_http_connect_no_auth(self, handler, ctx):
+        with ctx.http_server(HTTPConnectProxyHandler) as server_address:
+            with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['connect'] is True
+                assert 'Proxy-Authorization' not in proxy_info['headers']
+
+    def test_http_connect_auth(self, handler, ctx):
+        with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
+            with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert 'Proxy-Authorization' in proxy_info['headers']
+
+    @pytest.mark.skip_handler(
+        'Requests',
+        'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374'
+    )
+    def test_http_connect_bad_auth(self, handler, ctx):
+        with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
+            with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
+                with pytest.raises(ProxyError):
+                    ctx.proxy_info_request(rh)
+
+    def test_http_connect_source_address(self, handler, ctx):
+        with ctx.http_server(HTTPConnectProxyHandler) as server_address:
+            source_address = f'127.0.0.{random.randint(5, 255)}'
+            verify_address_availability(source_address)
+            with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
+                         source_address=source_address,
+                         verify=False) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['client_address'][0] == source_address
+
+    @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
+    def test_https_connect_proxy(self, handler, ctx):
+        with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
+            with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert proxy_info['connect'] is True
+                assert 'Proxy-Authorization' not in proxy_info['headers']
+
+    @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
+    def test_https_connect_verify_failed(self, handler, ctx):
+        with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
+            with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+                # Accept SSLError as may not be feasible to tell if it is proxy or request error.
+                # note: if request proto also does ssl verification, this may also be the error of the request.
+                # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
+                with pytest.raises((ProxyError, SSLError)):
+                    ctx.proxy_info_request(rh)
+
+    @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
+    def test_https_connect_proxy_auth(self, handler, ctx):
+        with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
+            with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
+                proxy_info = ctx.proxy_info_request(rh)
+                assert proxy_info['proxy'] == server_address
+                assert 'Proxy-Authorization' in proxy_info['headers']
--- a/test/test_networking.py
+++ b/test/test_networking.py
@ -6,6 +6,8 @@ import sys

 import pytest

+from yt_dlp.networking.common import Features
+
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 import gzip
@ -27,8 +29,12 @@ import zlib
 from email.message import Message
 from http.cookiejar import CookieJar

-from test.conftest import validate_and_send
-from test.helper import FakeYDL, http_server_port, verify_address_availability
+from test.helper import (
+    FakeYDL,
+    http_server_port,
+    validate_and_send,
+    verify_address_availability,
+)
 from yt_dlp.cookies import YoutubeDLCookieJar
 from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
 from yt_dlp.networking import (
@ -62,21 +68,6 @@ from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))


-def _build_proxy_handler(name):
-    class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
-        proxy_name = name
-
-        def log_message(self, format, *args):
-            pass
-
-        def do_GET(self):
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/plain; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
-    return HTTPTestRequestHandler
-
-
 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
    protocol_version = 'HTTP/1.1'
    default_request_version = 'HTTP/1.1'
@ -317,8 +308,9 @@ class TestRequestHandlerBase:
        cls.https_server_thread.start()


+@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
 class TestHTTPRequestHandler(TestRequestHandlerBase):
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
+
    def test_verify_cert(self, handler):
        with handler() as rh:
            with pytest.raises(CertificateVerifyError):
@ -329,7 +321,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert r.status == 200
            r.close()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_ssl_error(self, handler):
        # HTTPS server with too old TLS version
        # XXX: is there a better way to test this than to create a new server?
@ -347,7 +338,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
                validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
            assert not issubclass(exc_info.type, CertificateVerifyError)

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_percent_encode(self, handler):
        with handler() as rh:
            # Unicode characters should be encoded with uppercase percent-encoding
@ -359,7 +349,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.status == 200
            res.close()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    @pytest.mark.parametrize('path', [
        '/a/b/./../../headers',
        '/redirect_dotsegments',
@ -375,15 +364,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
            res.close()

-    # Not supported by CurlCFFI (non-standard)
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+    @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
    def test_unicode_path_redirection(self, handler):
        with handler() as rh:
            r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
            assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
            r.close()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_raise_http_error(self, handler):
        with handler() as rh:
            for bad_status in (400, 500, 599, 302):
@ -393,7 +380,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            # Should not raise an error
            validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_response_url(self, handler):
        with handler() as rh:
            # Response url should be that of the last url in redirect chain
@ -405,7 +391,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            res2.close()

    # Covers some basic cases we expect some level of consistency between request handlers for
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    @pytest.mark.parametrize('redirect_status,method,expected', [
        # A 303 must either use GET or HEAD for subsequent request
        (303, 'POST', ('', 'GET', False)),
@ -447,7 +432,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert expected[1] == res.headers.get('method')
            assert expected[2] == ('content-length' in headers.decode().lower())

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_request_cookie_header(self, handler):
        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
        with handler() as rh:
@ -480,19 +464,16 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert b'cookie: test=ytdlp' not in data.lower()
            assert b'cookie: test=test3' in data.lower()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_redirect_loop(self, handler):
        with handler() as rh:
            with pytest.raises(HTTPError, match='redirect loop'):
                validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_incompleteread(self, handler):
        with handler(timeout=2) as rh:
            with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
                validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_cookies(self, handler):
        cookiejar = YoutubeDLCookieJar()
        cookiejar.set_cookie(http.cookiejar.Cookie(
@ -509,7 +490,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
                rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
            assert b'cookie: test=ytdlp' in data.lower()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_headers(self, handler):

        with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
@ -525,7 +505,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert b'test2: test2' not in data
            assert b'test3: test3' in data

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_read_timeout(self, handler):
        with handler() as rh:
            # Default timeout is 20 seconds, so this should go through
@ -541,7 +520,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            validate_and_send(
                rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_connect_timeout(self, handler):
        # nothing should be listening on this port
        connect_timeout_url = 'http://10.255.255.255'
@ -560,7 +538,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
                    rh, Request(connect_timeout_url, extensions={'timeout': 0.01}))
                assert 0.01 <= time.time() - now < 20

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_source_address(self, handler):
        source_address = f'127.0.0.{random.randint(5, 255)}'
        # on some systems these loopback addresses we need for testing may not be available
@ -572,13 +549,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert source_address == data

    # Not supported by CurlCFFI
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+    @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
    def test_gzip_trailing_garbage(self, handler):
        with handler() as rh:
            data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
            assert data == '<html><video src="/vid.mp4" /></html>'

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+    @pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
    @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
    def test_brotli(self, handler):
        with handler() as rh:
@ -589,7 +566,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.headers.get('Content-Encoding') == 'br'
            assert res.read() == b'<html><video src="/vid.mp4" /></html>'

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_deflate(self, handler):
        with handler() as rh:
            res = validate_and_send(
@ -599,7 +575,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.headers.get('Content-Encoding') == 'deflate'
            assert res.read() == b'<html><video src="/vid.mp4" /></html>'

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_gzip(self, handler):
        with handler() as rh:
            res = validate_and_send(
@ -609,7 +584,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.headers.get('Content-Encoding') == 'gzip'
            assert res.read() == b'<html><video src="/vid.mp4" /></html>'

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_multiple_encodings(self, handler):
        with handler() as rh:
            for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
@ -620,8 +594,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
                assert res.headers.get('Content-Encoding') == pair
                assert res.read() == b'<html><video src="/vid.mp4" /></html>'

-    # Not supported by curl_cffi
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+    @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
    def test_unsupported_encoding(self, handler):
        with handler() as rh:
            res = validate_and_send(
@ -631,7 +604,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.headers.get('Content-Encoding') == 'unsupported'
            assert res.read() == b'raw'

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_read(self, handler):
        with handler() as rh:
            res = validate_and_send(
@ -642,83 +614,48 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
            assert res.read().decode().endswith('\n\n')
            assert res.read() == b''

+    def test_request_disable_proxy(self, handler):
+        for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
+            # Given the handler is configured with a proxy
+            with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+                # When a proxy is explicitly set to None for the request
+                res = validate_and_send(
+                    rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
+                # Then no proxy should be used
+                res.close()
+                assert res.status == 200

-class TestHTTPProxy(TestRequestHandlerBase):
-    # Note: this only tests http urls over non-CONNECT proxy
-    @classmethod
-    def setup_class(cls):
-        super().setup_class()
-        # HTTP Proxy server
-        cls.proxy = http.server.ThreadingHTTPServer(
-            ('127.0.0.1', 0), _build_proxy_handler('normal'))
-        cls.proxy_port = http_server_port(cls.proxy)
-        cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
-        cls.proxy_thread.daemon = True
-        cls.proxy_thread.start()
-
-        # Geo proxy server
-        cls.geo_proxy = http.server.ThreadingHTTPServer(
-            ('127.0.0.1', 0), _build_proxy_handler('geo'))
-        cls.geo_port = http_server_port(cls.geo_proxy)
-        cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
-        cls.geo_proxy_thread.daemon = True
-        cls.geo_proxy_thread.start()
-
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
-    def test_http_proxy(self, handler):
-        http_proxy = f'http://127.0.0.1:{self.proxy_port}'
-        geo_proxy = f'http://127.0.0.1:{self.geo_port}'
-
-        # Test global http proxy
-        # Test per request http proxy
-        # Test per request http proxy disables proxy
-        url = 'http://foo.com/bar'
-
-        # Global HTTP proxy
-        with handler(proxies={'http': http_proxy}) as rh:
-            res = validate_and_send(rh, Request(url)).read().decode()
-            assert res == f'normal: {url}'
-
-            # Per request proxy overrides global
-            res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
-            assert res == f'geo: {url}'
-
-            # and setting to None disables all proxies for that request
-            real_url = f'http://127.0.0.1:{self.http_port}/headers'
-            res = validate_and_send(
-                rh, Request(real_url, proxies={'http': None})).read().decode()
-            assert res != f'normal: {real_url}'
-            assert 'Accept' in res
-
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
+    @pytest.mark.skip_handlers_if(
+        lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
    def test_noproxy(self, handler):
-        with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
-            # NO_PROXY
-            for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
-                nop_response = validate_and_send(
-                    rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
-                    'utf-8')
-                assert 'Accept' in nop_response
+        for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
+            # Given the handler is configured with a proxy
+            with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+                for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
+                    # When request no proxy includes the request url host
+                    nop_response = validate_and_send(
+                        rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
+                    # Then the proxy should not be used
+                    assert nop_response.status == 200
+                    nop_response.close()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
+    @pytest.mark.skip_handlers_if(
+        lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
    def test_allproxy(self, handler):
-        url = 'http://foo.com/bar'
-        with handler() as rh:
-            response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
-                'utf-8')
-            assert response == f'normal: {url}'
+        # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
+        # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
+        with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
+            with pytest.raises(TransportError):
+                validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
-    def test_http_proxy_with_idn(self, handler):
-        with handler(proxies={
-            'http': f'http://127.0.0.1:{self.proxy_port}',
-        }) as rh:
-            url = 'http://中文.tw/'
-            response = rh.send(Request(url)).read().decode()
-            # b'xn--fiq228c' is '中文'.encode('idna')
-            assert response == 'normal: http://xn--fiq228c.tw/'
+        with handler(timeout=0.1) as rh:
+            with pytest.raises(TransportError):
+                validate_and_send(
+                    rh, Request(
+                        f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()


+@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
 class TestClientCertificate:
    @classmethod
    def setup_class(cls):
@ -745,27 +682,23 @@ class TestClientCertificate:
        ) as rh:
            validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_certificate_combined_nopass(self, handler):
        self._run_test(handler, client_cert={
            'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
        })

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_certificate_nocombined_nopass(self, handler):
        self._run_test(handler, client_cert={
            'client_certificate': os.path.join(self.certdir, 'client.crt'),
            'client_certificate_key': os.path.join(self.certdir, 'client.key'),
        })

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_certificate_combined_pass(self, handler):
        self._run_test(handler, client_cert={
            'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
            'client_certificate_password': 'foobar',
        })

-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
    def test_certificate_nocombined_pass(self, handler):
        self._run_test(handler, client_cert={
            'client_certificate': os.path.join(self.certdir, 'client.crt'),
@ -785,6 +718,25 @@ class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
                assert res.status == 200
                assert std_headers['user-agent'].lower() not in res.read().decode().lower()

+    def test_response_extensions(self, handler):
+        with handler() as rh:
+            for target in rh.supported_targets:
+                request = Request(
+                    f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
+                res = validate_and_send(rh, request)
+                assert res.extensions['impersonate'] == rh._get_request_target(request)
+
+    def test_http_error_response_extensions(self, handler):
+        with handler() as rh:
+            for target in rh.supported_targets:
+                request = Request(
+                    f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
+                try:
+                    validate_and_send(rh, request)
+                except HTTPError as e:
+                    res = e.response
+                assert res.extensions['impersonate'] == rh._get_request_target(request)
+

 class TestRequestHandlerMisc:
    """Misc generic tests for request handlers, not related to request or validation testing"""
@ -805,8 +757,8 @@ class TestRequestHandlerMisc:
        assert len(logging_handlers) == before_count


+@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
 class TestUrllibRequestHandler(TestRequestHandlerBase):
-    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
    def test_file_urls(self, handler):
        # See https://github.com/ytdl-org/youtube-dl/issues/8227
        tf = tempfile.NamedTemporaryFile(delete=False)
@ -828,7 +780,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):

        os.unlink(tf.name)

-    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
    def test_http_error_returns_content(self, handler):
        # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
        def get_response():
@ -841,7 +792,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):

        assert get_response().read() == b'<html></html>'

-    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
    def test_verify_cert_error_text(self, handler):
        # Check the output of the error message
        with handler() as rh:
@ -851,7 +801,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
            ):
                validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))

-    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
    @pytest.mark.parametrize('req,match,version_check', [
        # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
        # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
@ -1183,7 +1132,7 @@ class TestRequestHandlerValidation:
    ]

    PROXY_SCHEME_TESTS = [
-        # scheme, expected to fail
+        # proxy scheme, expected to fail
        ('Urllib', 'http', [
            ('http', False),
            ('https', UnsupportedRequest),
@ -1209,30 +1158,41 @@ class TestRequestHandlerValidation:
            ('socks5', False),
            ('socks5h', False),
        ]),
+        ('Websockets', 'ws', [
+            ('http', UnsupportedRequest),
+            ('https', UnsupportedRequest),
+            ('socks4', False),
+            ('socks4a', False),
+            ('socks5', False),
+            ('socks5h', False),
+        ]),
        (NoCheckRH, 'http', [('http', False)]),
        (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
-        ('Websockets', 'ws', [('http', UnsupportedRequest)]),
        (NoCheckRH, 'http', [('http', False)]),
        (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
    ]

    PROXY_KEY_TESTS = [
-        # key, expected to fail
-        ('Urllib', [
-            ('all', False),
-            ('unrelated', False),
+        # proxy key, proxy scheme, expected to fail
+        ('Urllib', 'http', [
+            ('all', 'http', False),
+            ('unrelated', 'http', False),
        ]),
-        ('Requests', [
-            ('all', False),
-            ('unrelated', False),
+        ('Requests', 'http', [
+            ('all', 'http', False),
+            ('unrelated', 'http', False),
        ]),
-        ('CurlCFFI', [
-            ('all', False),
-            ('unrelated', False),
+        ('CurlCFFI', 'http', [
+            ('all', 'http', False),
+            ('unrelated', 'http', False),
        ]),
-        (NoCheckRH, [('all', False)]),
-        (HTTPSupportedRH, [('all', UnsupportedRequest)]),
-        (HTTPSupportedRH, [('no', UnsupportedRequest)]),
+        ('Websockets', 'ws', [
+            ('all', 'socks5', False),
+            ('unrelated', 'socks5', False),
+        ]),
+        (NoCheckRH, 'http', [('all', 'http', False)]),
+        (HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
+        (HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
    ]

    EXTENSION_TESTS = [
@ -1274,28 +1234,54 @@ class TestRequestHandlerValidation:
        ]),
    ]

+    @pytest.mark.parametrize('handler,fail,scheme', [
+        ('Urllib', False, 'http'),
+        ('Requests', False, 'http'),
+        ('CurlCFFI', False, 'http'),
+        ('Websockets', False, 'ws')
+    ], indirect=['handler'])
+    def test_no_proxy(self, handler, fail, scheme):
+        run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
+        run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
+
+    @pytest.mark.parametrize('handler,scheme', [
+        ('Urllib', 'http'),
+        (HTTPSupportedRH, 'http'),
+        ('Requests', 'http'),
+        ('CurlCFFI', 'http'),
+        ('Websockets', 'ws')
+    ], indirect=['handler'])
+    def test_empty_proxy(self, handler, scheme):
+        run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
+        run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
+
+    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
+    @pytest.mark.parametrize('handler,scheme', [
+        ('Urllib', 'http'),
+        (HTTPSupportedRH, 'http'),
+        ('Requests', 'http'),
+        ('CurlCFFI', 'http'),
+        ('Websockets', 'ws')
+    ], indirect=['handler'])
+    def test_invalid_proxy_url(self, handler, scheme, proxy_url):
+        run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
+
    @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
        (handler_tests[0], scheme, fail, handler_kwargs)
        for handler_tests in URL_SCHEME_TESTS
        for scheme, fail, handler_kwargs in handler_tests[1]
-
    ], indirect=['handler'])
    def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
        run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))

-    @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler'])
-    def test_no_proxy(self, handler, fail):
-        run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
-        run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
-
-    @pytest.mark.parametrize('handler,proxy_key,fail', [
-        (handler_tests[0], proxy_key, fail)
+    @pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
+        (handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
        for handler_tests in PROXY_KEY_TESTS
-        for proxy_key, fail in handler_tests[1]
+        for proxy_key, proxy_scheme, fail in handler_tests[2]
    ], indirect=['handler'])
-    def test_proxy_key(self, handler, proxy_key, fail):
-        run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
-        run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
+    def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
+        run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
+        run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})

    @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
        (handler_tests[0], handler_tests[1], scheme, fail)
@ -1306,16 +1292,6 @@ class TestRequestHandlerValidation:
        run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
        run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})

-    @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True)
-    def test_empty_proxy(self, handler):
-        run_validation(handler, False, Request('http://', proxies={'http': None}))
-        run_validation(handler, False, Request('http://'), proxies={'http': None})
-
-    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
-    @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
-    def test_invalid_proxy_url(self, handler, proxy_url):
-        run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
-
    @pytest.mark.parametrize('handler,scheme,extensions,fail', [
        (handler_tests[0], handler_tests[1], extensions, fail)
        for handler_tests in EXTENSION_TESTS
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -2059,7 +2059,22 @@ Line 1
        assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')

    @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
-    def test_Popen_windows_escaping(self):
+    def test_windows_escaping(self):
+        tests = [
+            'test"&',
+            '%CMDCMDLINE:~-1%&',
+            'a\nb',
+            '"',
+            '\\',
+            '!',
+            '^!',
+            'a \\ b',
+            'a \\" b',
+            'a \\ b\\',
+            # We replace \r with \n
+            ('a\r\ra', 'a\n\na'),
+        ]
+
        def run_shell(args):
            stdout, stderr, error = Popen.run(
                args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@ -2067,15 +2082,15 @@ Line 1
            assert not error
            return stdout

-        # Test escaping
-        assert run_shell(['echo', 'test"&']) == '"test""&"\n'
-        assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
-        assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
-        assert run_shell(['echo', '"']) == '""""\n'
-        assert run_shell(['echo', '\\']) == '\\\n'
-        # Test if delayed expansion is disabled
-        assert run_shell(['echo', '^!']) == '"^!"\n'
-        assert run_shell('echo "^!"') == '"^!"\n'
+        for argument in tests:
+            if isinstance(argument, str):
+                expected = argument
+            else:
+                argument, expected = argument
+
+            args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
+            assert run_shell(args) == expected
+            assert run_shell(shell_quote(args, shell=True)) == expected


 if __name__ == '__main__':
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@ -7,6 +7,7 @@ import sys
 import pytest

 from test.helper import verify_address_availability
+from yt_dlp.networking.common import Features

 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

@ -18,7 +19,7 @@ import random
 import ssl
 import threading

-from yt_dlp import socks
+from yt_dlp import socks, traverse_obj
 from yt_dlp.cookies import YoutubeDLCookieJar
 from yt_dlp.dependencies import websockets
 from yt_dlp.networking import Request
@ -114,6 +115,7 @@ def ws_validate_and_send(rh, req):


@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
+@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
 class TestWebsSocketRequestHandlerConformance:
    @classmethod
    def setup_class(cls):
@ -129,7 +131,6 @@ class TestWebsSocketRequestHandlerConformance:
        cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
        cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_basic_websockets(self, handler):
        with handler() as rh:
            ws = ws_validate_and_send(rh, Request(self.ws_base_url))
@ -141,7 +142,6 @@ class TestWebsSocketRequestHandlerConformance:

    # https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
    @pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_send_types(self, handler, msg, opcode):
        with handler() as rh:
            ws = ws_validate_and_send(rh, Request(self.ws_base_url))
@ -149,7 +149,6 @@ class TestWebsSocketRequestHandlerConformance:
            assert int(ws.recv()) == opcode
            ws.close()

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_verify_cert(self, handler):
        with handler() as rh:
            with pytest.raises(CertificateVerifyError):
@ -160,14 +159,12 @@ class TestWebsSocketRequestHandlerConformance:
            assert ws.status == 101
            ws.close()

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_ssl_error(self, handler):
        with handler(verify=False) as rh:
            with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
                ws_validate_and_send(rh, Request(self.bad_wss_host))
            assert not issubclass(exc_info.type, CertificateVerifyError)

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    @pytest.mark.parametrize('path,expected', [
        # Unicode characters should be encoded with uppercase percent-encoding
        ('/中文', '/%E4%B8%AD%E6%96%87'),
@ -182,7 +179,6 @@ class TestWebsSocketRequestHandlerConformance:
            assert ws.status == 101
            ws.close()

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_remove_dot_segments(self, handler):
        with handler() as rh:
            # This isn't a comprehensive test,
@ -195,7 +191,6 @@ class TestWebsSocketRequestHandlerConformance:

    # We are restricted to known HTTP status codes in http.HTTPStatus
    # Redirects are not supported for websockets
-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    @pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
    def test_raise_http_error(self, handler, status):
        with handler() as rh:
@ -203,7 +198,6 @@ class TestWebsSocketRequestHandlerConformance:
                ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
            assert exc_info.value.status == status

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    @pytest.mark.parametrize('params,extensions', [
        ({'timeout': sys.float_info.min}, {}),
        ({}, {'timeout': sys.float_info.min}),
@ -213,7 +207,6 @@ class TestWebsSocketRequestHandlerConformance:
            with pytest.raises(TransportError):
                ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_cookies(self, handler):
        cookiejar = YoutubeDLCookieJar()
        cookiejar.set_cookie(http.cookiejar.Cookie(
@ -239,7 +232,6 @@ class TestWebsSocketRequestHandlerConformance:
            assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
            ws.close()

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_source_address(self, handler):
        source_address = f'127.0.0.{random.randint(5, 255)}'
        verify_address_availability(source_address)
@ -249,7 +241,6 @@ class TestWebsSocketRequestHandlerConformance:
            assert source_address == ws.recv()
            ws.close()

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_response_url(self, handler):
        with handler() as rh:
            url = f'{self.ws_base_url}/something'
@ -257,7 +248,6 @@ class TestWebsSocketRequestHandlerConformance:
            assert ws.url == url
            ws.close()

-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_request_headers(self, handler):
        with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
            # Global Headers
@ -293,7 +283,6 @@ class TestWebsSocketRequestHandlerConformance:
            'client_certificate_password': 'foobar',
        }
    ))
-    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_mtls(self, handler, client_cert):
        with handler(
            # Disable client-side validation of unacceptable self-signed testcert.pem
@ -303,6 +292,44 @@ class TestWebsSocketRequestHandlerConformance:
        ) as rh:
            ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()

+    def test_request_disable_proxy(self, handler):
+        for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
+            # Given handler is configured with a proxy
+            with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+                # When a proxy is explicitly set to None for the request
+                ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None}))
+                # Then no proxy should be used
+                assert ws.status == 101
+                ws.close()
+
+    @pytest.mark.skip_handlers_if(
+        lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
+    def test_noproxy(self, handler):
+        for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
+            # Given the handler is configured with a proxy
+            with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+                for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'):
+                    # When request no proxy includes the request url host
+                    ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy}))
+                    # Then the proxy should not be used
+                    assert ws.status == 101
+                    ws.close()
+
+    @pytest.mark.skip_handlers_if(
+        lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
+    def test_allproxy(self, handler):
+        supported_proto = traverse_obj(handler._SUPPORTED_PROXY_SCHEMES, 0, default='ws')
+        # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
+        # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
+        with handler(proxies={'all': f'{supported_proto}://10.255.255.255'}, timeout=0.1) as rh:
+            with pytest.raises(TransportError):
+                ws_validate_and_send(rh, Request(self.ws_base_url)).close()
+
+        with handler(timeout=0.1) as rh:
+            with pytest.raises(TransportError):
+                ws_validate_and_send(
+                    rh, Request(self.ws_base_url, proxies={'all': f'{supported_proto}://10.255.255.255'})).close()
+

 def create_fake_ws_connection(raised):
    import websockets.sync.client
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2136,6 +2136,11 @@ class YoutubeDL:

    def _check_formats(self, formats):
        for f in formats:
+            working = f.get('__working')
+            if working is not None:
+                if working:
+                    yield f
+                continue
            self.to_screen('[info] Testing format %s' % f['format_id'])
            path = self.get_output_path('temp')
            if not self._ensure_dir_exists(f'{path}/'):
@ -2152,33 +2157,44 @@ class YoutubeDL:
                        os.remove(temp_file.name)
                    except OSError:
                        self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+            f['__working'] = success
            if success:
                yield f
            else:
                self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

+    def _select_formats(self, formats, selector):
+        return list(selector({
+            'formats': formats,
+            'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+            'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
+                                   or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
+        }))
+
    def _default_format_spec(self, info_dict, download=True):
+        download = download and not self.params.get('simulate')
+        prefer_best = download and (
+            self.params['outtmpl']['default'] == '-'
+            or info_dict.get('is_live') and not self.params.get('live_from_start'))

        def can_merge():
            merger = FFmpegMergerPP(self)
            return merger.available and merger.can_merge()

-        prefer_best = (
-            not self.params.get('simulate')
-            and download
-            and (
-                not can_merge()
-                or info_dict.get('is_live') and not self.params.get('live_from_start')
-                or self.params['outtmpl']['default'] == '-'))
-        compat = (
-            prefer_best
-            or self.params.get('allow_multiple_audio_streams', False)
-            or 'format-spec' in self.params['compat_opts'])
+        if not prefer_best and download and not can_merge():
+            prefer_best = True
+            formats = self._get_formats(info_dict)
+            evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
+            if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
+                self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
+                                    'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')

-        return (
-            'best/bestvideo+bestaudio' if prefer_best
-            else 'bestvideo*+bestaudio/best' if not compat
-            else 'bestvideo+bestaudio/best')
+        compat = (self.params.get('allow_multiple_audio_streams')
+                  or 'format-spec' in self.params['compat_opts'])
+
+        return ('best/bestvideo+bestaudio' if prefer_best
+                else 'bestvideo+bestaudio/best' if compat
+                else 'bestvideo*+bestaudio/best')

    def build_format_selector(self, format_spec):
        def syntax_error(note, start):
@ -2928,12 +2944,7 @@ class YoutubeDL:
                self.write_debug(f'Default format spec: {req_format}')
                format_selector = self.build_format_selector(req_format)

-            formats_to_download = list(format_selector({
-                'formats': formats,
-                'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
-                'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
-                                       or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
-            }))
+            formats_to_download = self._select_formats(formats, format_selector)
            if interactive_format_selection and not formats_to_download:
                self.report_error('Requested format is not available', tb=False, is_error=False)
                continue
@ -3060,7 +3071,7 @@ class YoutubeDL:
                f = formats[-1]
                self.report_warning(
                    'No subtitle format found matching "%s" for language %s, '
-                    'using %s' % (formats_query, lang, f['ext']))
+                    'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
            subs[lang] = f
        return subs

--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@ -46,7 +46,7 @@ from .utils import (
 from .utils._utils import _YDLLogger
 from .utils.networking import normalize_url

-CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
+CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}


@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
            'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
            'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
            'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
+            'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
        }[browser_name]

    elif sys.platform == 'darwin':
@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
            'edge': os.path.join(appdata, 'Microsoft Edge'),
            'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
            'vivaldi': os.path.join(appdata, 'Vivaldi'),
+            'whale': os.path.join(appdata, 'Naver/Whale'),
        }[browser_name]

    else:
@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
            'edge': os.path.join(config, 'microsoft-edge'),
            'opera': os.path.join(config, 'opera'),
            'vivaldi': os.path.join(config, 'vivaldi'),
+            'whale': os.path.join(config, 'naver-whale'),
        }[browser_name]

    # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
        'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
        'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
        'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
+        'whale': 'Whale',
    }[browser_name]

    browsers_without_profiles = {'opera'}
@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
        if value is None:
            return is_encrypted, None

+    # In chrome, session cookies have expires_utc set to 0
+    # In our cookie-store, cookies that do not expire should have expires set to None
+    if not expires_utc:
+        expires_utc = None
+
    return is_encrypted, http.cookiejar.Cookie(
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -288,7 +288,6 @@ from .bundestag import BundestagIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
-from .cableav import CableAVIE
 from .callin import CallinIE
 from .caltrans import CaltransIE
 from .cam4 import CAM4IE
@ -387,7 +386,11 @@ from .comedycentral import (
    ComedyCentralIE,
    ComedyCentralTVIE,
 )
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonmistakes import (
+    BlobIE,
+    CommonMistakesIE,
+    UnicodeBOMIE,
+)
 from .commonprotocols import (
    MmsIE,
    RtmpIE,
@ -544,7 +547,6 @@ from .egghead import (
    EggheadLessonIE,
 )
 from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
 from .eitb import EitbIE
 from .elementorembed import ElementorEmbedIE
 from .elonet import ElonetIE
@ -857,10 +859,6 @@ from .iwara import (
 )
 from .ixigua import IxiguaIE
 from .izlesene import IzleseneIE
-from .jable import (
-    JableIE,
-    JablePlaylistIE,
-)
 from .jamendo import (
    JamendoIE,
    JamendoAlbumIE,
@ -1495,7 +1493,6 @@ from .polskieradio import (
 )
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
 from .pornbox import PornboxIE
 from .pornflip import PornFlipIE
 from .pornhub import (
@ -2373,7 +2370,6 @@ from .wykop import (
 )
 from .xanimu import XanimuIE
 from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
 from .xhamster import (
    XHamsterIE,
    XHamsterEmbedIE,
@ -2428,8 +2424,6 @@ from .younow import (
    YouNowMomentIE,
 )
 from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
 from .zaiko import (
    ZaikoIE,
    ZaikoETicketIE,
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@ -8,9 +8,11 @@ from ..utils import (
    determine_ext,
    filter_dict,
    int_or_none,
+    orderedSet,
    unified_timestamp,
    url_or_none,
    urlencode_postdata,
+    urljoin,
 )
 from ..utils.traversal import traverse_obj

@ -276,6 +278,47 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
    }]

    _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
+    _WORKING_CDNS = [
+        'gcp_cdn',  # live-global-cdn-v02.afreecatv.com
+        'gs_cdn_pc_app',  # pc-app.stream.afreecatv.com
+        'gs_cdn_mobile_web',  # mobile-web.stream.afreecatv.com
+        'gs_cdn_pc_web',  # pc-web.stream.afreecatv.com
+    ]
+    _BAD_CDNS = [
+        'gs_cdn',  # chromecast.afreeca.gscdn.com (cannot resolve)
+        'gs_cdn_chromecast',  # chromecast.stream.afreecatv.com (HTTP Error 400)
+        'azure_cdn',  # live-global-cdn-v01.afreecatv.com (cannot resolve)
+        'aws_cf',  # live-global-cdn-v03.afreecatv.com (cannot resolve)
+        'kt_cdn',  # kt.stream.afreecatv.com (HTTP Error 400)
+    ]
+
+    def _extract_formats(self, channel_info, broadcast_no, aid):
+        stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
+
+        # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
+        default_cdn_ids = orderedSet([
+            *traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
+            *self._WORKING_CDNS,
+        ])
+        cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
+
+        for attempt, cdn_id in enumerate(cdn_ids, start=1):
+            m3u8_url = traverse_obj(self._download_json(
+                urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
+                f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
+                fatal=False, query={
+                    'return_type': cdn_id,
+                    'broad_key': f'{broadcast_no}-common-master-hls',
+                }), ('view_url', {url_or_none}))
+            try:
+                return self._extract_m3u8_formats(
+                    m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
+                    headers={'Referer': 'https://play.afreecatv.com/'})
+            except ExtractorError as e:
+                if attempt == len(cdn_ids):
+                    raise
+                self.report_warning(
+                    f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')

    def _real_extract(self, url):
        broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
@ -294,7 +337,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
                'This livestream is protected by a password, use the --video-password option',
                expected=True)

-        aid = self._download_json(
+        token_info = traverse_obj(self._download_json(
            self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
            'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
                'bno': broadcast_no,
@ -302,18 +345,17 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
                'type': 'aid',
                'quality': 'master',
                'pwd': password,
-            })))['CHANNEL']['AID']
+            }))), ('CHANNEL', {dict})) or {}
+        aid = token_info.get('AID')
+        if not aid:
+            result = token_info.get('RESULT')
+            if result == 0:
+                raise ExtractorError('This livestream has ended', expected=True)
+            elif result == -6:
+                self.raise_login_required('This livestream is for subscribers only', method='password')
+            raise ExtractorError('Unable to extract access token')

-        stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
-        stream_info = self._download_json(f'{stream_base_url}/broad_stream_assign.html', broadcast_no, query={
-            # works: gs_cdn_pc_app, gs_cdn_mobile_web, gs_cdn_pc_web
-            'return_type': 'gs_cdn_pc_app',
-            'broad_key': f'{broadcast_no}-common-master-hls',
-        }, note='Downloading metadata for stream', errnote='Unable to download metadata for stream')
-
-        formats = self._extract_m3u8_formats(
-            stream_info['view_url'], broadcast_no, 'mp4', m3u8_id='hls',
-            query={'aid': aid}, headers={'Referer': url})
+        formats = self._extract_formats(channel_info, broadcast_no, aid)

        station_info = traverse_obj(self._download_json(
            'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
--- a/yt_dlp/extractor/alura.py
+++ b/yt_dlp/extractor/alura.py
@ -39,7 +39,7 @@ class AluraIE(InfoExtractor):

    def _real_extract(self, url):

-        course, video_id = self._match_valid_url(url)
+        course, video_id = self._match_valid_url(url).group('course_name', 'id')
        video_url = self._VIDEO_URL % (course, video_id)

        video_dict = self._download_json(video_url, video_id, 'Searching for videos')
@ -52,7 +52,7 @@ class AluraIE(InfoExtractor):

            formats = []
            for video_obj in video_dict:
-                video_url_m3u8 = video_obj.get('link')
+                video_url_m3u8 = video_obj.get('mp4')
                video_format = self._extract_m3u8_formats(
                    video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False)
--- a/yt_dlp/extractor/asobistage.py
+++ b/yt_dlp/extractor/asobistage.py
@ -105,7 +105,7 @@ class AsobiStageIE(InfoExtractor):
        video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
        webpage = self._download_webpage(url, video_id)
        event_data = traverse_obj(
-            self._search_nextjs_data(webpage, video_id, default='{}'),
+            self._search_nextjs_data(webpage, video_id, default={}),
            ('props', 'pageProps', 'eventCMSData', {
                'title': ('event_name', {str}),
                'thumbnail': ('event_thumbnail_image', {url_or_none}),
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        'url': 'http://www.bbc.com/news/world-europe-32668511',
        'info_dict': {
            'id': 'world-europe-32668511',
-            'title': 'Russia stages massive WW2 parade',
+            'title': 'Russia stages massive WW2 parade despite Western boycott',
            'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
        },
        'playlist_count': 2,
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        'info_dict': {
            'id': '3662a707-0af9-3149-963f-47bea720b460',
            'title': 'BUGGER',
+            'description': r're:BUGGER  The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
        },
        'playlist_count': 18,
    }, {
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        'info_dict': {
            'id': 'p02mprgb',
            'ext': 'mp4',
-            'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
-            'description': 'md5:2868290467291b37feda7863f7a83f54',
+            'title': 'Germanwings crash site aerial video',
+            'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
            'duration': 47,
            'timestamp': 1427219242,
            'upload_date': '20150324',
+            'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
        },
        'params': {
-            # rtmp download
            'skip_download': True,
        }
    }, {
@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        },
        'params': {
            'skip_download': True,
-        }
+        },
+        'skip': 'now SIMORGH_DATA with no video',
    }, {
        # single video embedded with data-playable containing XML playlists (regional section)
        'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
        'info_dict': {
-            'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
+            'id': '39275083',
+            'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
            'ext': 'mp4',
            'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
-            'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
+            'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
            'timestamp': 1434713142,
            'upload_date': '20150619',
+            'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
        },
        'params': {
            'skip_download': True,
-        }
+        },
    }, {
        # single video from video playlist embedded with vxp-playlist-data JSON
        'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        },
        'params': {
            'skip_download': True,
-        }
+        },
+        'skip': '404 Not Found',
    }, {
-        # single video story with digitalData
+        # single video story with __PWA_PRELOADED_STATE__
        'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
        'info_dict': {
            'id': 'p02q6gc4',
-            'ext': 'flv',
-            'title': 'Sri Lanka’s spicy secret',
-            'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
-            'timestamp': 1437674293,
-            'upload_date': '20150723',
+            'ext': 'mp4',
+            'title': 'Tasting the spice of life in Jaffna',
+            'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
+            'timestamp': 1646058397,
+            'upload_date': '20220228',
+            'duration': 255,
+            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
    }, {
        # single video story without digitalData
        'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'timestamp': 1415867444,
            'upload_date': '20141113',
        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
+        'skip': 'redirects to TopGear home page',
    }, {
        # single video embedded with Morph
+        # TODO: replacement test page
        'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
        'info_dict': {
            'id': 'p041vhd0',
@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'uploader': 'BBC Sport',
            'uploader_id': 'bbc_sport',
        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'skip': 'Georestricted to UK',
+        'skip': 'Video no longer in page',
    }, {
-        # single video with playlist.sxml URL in playlist param
+        # single video in __INITIAL_DATA__
        'url': 'http://www.bbc.com/sport/0/football/33653409',
        'info_dict': {
            'id': 'p02xycnp',
            'ext': 'mp4',
-            'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
-            'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
+            'title': 'Ronaldo to Man Utd, Arsenal to spend?',
+            'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
+            'timestamp': 1437750175,
+            'upload_date': '20150724',
+            'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
            'duration': 140,
        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
    }, {
-        # article with multiple videos embedded with playlist.sxml in playlist param
+        # article with multiple videos embedded with Morph.setPayload
        'url': 'http://www.bbc.com/sport/0/football/34475836',
        'info_dict': {
            'id': '34475836',
@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
        },
        'playlist_count': 3,
+    }, {
+        # Testing noplaylist
+        'url': 'http://www.bbc.com/sport/0/football/34475836',
+        'info_dict': {
+            'id': 'p034ppnv',
+            'ext': 'mp4',
+            'title': 'All you need to know about Jurgen Klopp',
+            'timestamp': 1444335081,
+            'upload_date': '20151008',
+            'duration': 122.0,
+            'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
+        },
+        'params': {
+            'noplaylist': True,
+        },
    }, {
        # school report article with single video
        'url': 'http://www.bbc.co.uk/schoolreport/35744779',
@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'title': 'School which breaks down barriers in Jerusalem',
        },
        'playlist_count': 1,
+        'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
    }, {
        # single video with playlist URL from weather section
        'url': 'http://www.bbc.com/weather/features/33601775',
@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'thumbnail': r're:https?://.+/.+\.jpg',
            'timestamp': 1437785037,
            'upload_date': '20150725',
+            'duration': 105,
        },
    }, {
        # video with window.__INITIAL_DATA__ and value as JSON string
        'url': 'https://www.bbc.com/news/av/world-europe-59468682',
        'info_dict': {
-            'id': 'p0b71qth',
+            'id': 'p0b779gc',
            'ext': 'mp4',
            'title': 'Why France is making this woman a national hero',
-            'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+            'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
            'thumbnail': r're:https?://.+/.+\.jpg',
-            'timestamp': 1638230731,
-            'upload_date': '20211130',
+            'timestamp': 1638215626,
+            'upload_date': '20211129',
+            'duration': 125,
+        },
+    }, {
+        # video with script id __NEXT_DATA__ and value as JSON string
+        'url': 'https://www.bbc.com/news/uk-68546268',
+        'info_dict': {
+            'id': 'p0hj0lq7',
+            'ext': 'mp4',
+            'title': 'Nasser Hospital doctor describes his treatment by IDF',
+            'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1710188248,
+            'upload_date': '20240311',
+            'duration': 104,
        },
    }, {
        # single video article embedded with data-media-vpid
@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'uploader': 'Radio 3',
            'uploader_id': 'bbc_radio_three',
        },
+        'skip': '404 Not Found',
    }, {
        'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
        'info_dict': {
@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'ext': 'mp4',
            'title': 'md5:2fabf12a726603193a2879a055f72514',
            'description': 'Learn English words and phrases from this story',
+            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
        },
        'add_ie': [BBCCoUkIE.ie_key()],
    }, {
@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        'info_dict': {
            'id': 'p07c6sb9',
            'ext': 'mp4',
-            'title': 'How positive thinking is harming your happiness',
-            'alt_title': 'The downsides of positive thinking',
-            'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+            'title': 'The downsides of positive thinking',
+            'description': 'The downsides of positive thinking',
            'duration': 235,
-            'thumbnail': r're:https?://.+/p07c9dsr.jpg',
-            'upload_date': '20190604',
-            'categories': ['Psychology'],
+            'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
+            'upload_date': '20220223',
+            'timestamp': 1645632746,
        },
    }, {
        # BBC Sounds
-        'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+        'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
        'info_dict': {
-            'id': 'm001q789',
+            'id': 'p0hrw4nr',
            'ext': 'mp4',
-            'title': 'The Night Tracks Mix - Music for the darkling hour',
-            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
-            'chapters': 'count:8',
-            'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
-            'uploader': 'Radio 3',
-            'duration': 1800,
-            'uploader_id': 'bbc_radio_three',
-        },
+            'title': 'Are our coastlines being washed away?',
+            'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
+            'timestamp': 1713556800,
+            'upload_date': '20240419',
+            'duration': 1588,
+            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
+            'uploader': 'World Service',
+            'uploader_id': 'bbc_world_service',
+            'series': 'CrowdScience',
+            'chapters': [],
+        }
    }, {  # onion routes
        'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
        'only_matching': True,
@ -1008,8 +1039,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            webpage, 'group id', default=None)
        if group_id:
            return self.url_result(
-                'https://www.bbc.co.uk/programmes/%s' % group_id,
-                ie=BBCCoUkIE.ie_key())
+                f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)

        # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
        programme_id = self._search_regex(
@ -1069,83 +1099,133 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
                }

        # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
-        # There are several setPayload calls may be present but the video
-        # seems to be always related to the first one
-        morph_payload = self._parse_json(
-            self._search_regex(
-                r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
-                webpage, 'morph payload', default='{}'),
-            playlist_id, fatal=False)
+        # Several setPayload calls may be present but the video(s)
+        # should be in one that mentions leadMedia or videoData
+        morph_payload = self._search_json(
+            r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
+            contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
+            default={})
        if morph_payload:
-            components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
-            for component in components:
-                if not isinstance(component, dict):
-                    continue
-                lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
-                if not lead_media:
-                    continue
-                identifiers = lead_media.get('identifiers')
-                if not identifiers or not isinstance(identifiers, dict):
-                    continue
-                programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
+            for lead_media in traverse_obj(morph_payload, (
+                    'body', 'components', ..., 'props', 'leadMedia', {dict})):
+                programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
                if not programme_id:
                    continue
-                title = lead_media.get('title') or self._og_search_title(webpage)
                formats, subtitles = self._download_media_selector(programme_id)
-                description = lead_media.get('summary')
-                uploader = lead_media.get('masterBrand')
-                uploader_id = lead_media.get('mid')
-                duration = None
-                duration_d = lead_media.get('duration')
-                if isinstance(duration_d, dict):
-                    duration = parse_duration(dict_get(
-                        duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
                return {
                    'id': programme_id,
-                    'title': title,
-                    'description': description,
-                    'duration': duration,
-                    'uploader': uploader,
-                    'uploader_id': uploader_id,
+                    'title': lead_media.get('title') or self._og_search_title(webpage),
+                    **traverse_obj(lead_media, {
+                        'description': ('summary', {str}),
+                        'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
+                        'uploader': ('masterBrand', {str}),
+                        'uploader_id': ('mid', {str}),
+                    }),
                    'formats': formats,
                    'subtitles': subtitles,
                }
+            body = self._parse_json(traverse_obj(morph_payload, (
+                'body', 'content', 'article', 'body')), playlist_id, fatal=False)
+            for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
+                if video_data.get('vpid'):
+                    video_id = video_data['vpid']
+                    formats, subtitles = self._download_media_selector(video_id)
+                    entry = {
+                        'id': video_id,
+                        'formats': formats,
+                        'subtitles': subtitles,
+                    }
+                else:
+                    video_id = video_data['pid']
+                    entry = self.url_result(
+                        f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
+                        video_id, url_transparent=True)
+                entry.update({
+                    'timestamp': traverse_obj(morph_payload, (
+                        'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
+                    ),
+                    **traverse_obj(video_data, {
+                        'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
+                        'title': (('title', 'caption'), {str}, any),
+                        'duration': ('duration', {parse_duration}),
+                    }),
+                })
+                if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
+                    return entry
+                entries.append(entry)
+            if entries:
+                playlist_title = traverse_obj(morph_payload, (
+                    'body', 'content', 'article', 'headline', {str})) or playlist_title
+                return self.playlist_result(
+                    entries, playlist_id, playlist_title, playlist_description)

-        preload_state = self._parse_json(self._search_regex(
-            r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
-            'preload state', default='{}'), playlist_id, fatal=False)
-        if preload_state:
-            current_programme = preload_state.get('programmes', {}).get('current') or {}
-            programme_id = current_programme.get('id')
-            if current_programme and programme_id and current_programme.get('type') == 'playable_item':
-                title = current_programme.get('titles', {}).get('tertiary') or playlist_title
-                formats, subtitles = self._download_media_selector(programme_id)
-                synopses = current_programme.get('synopses') or {}
-                network = current_programme.get('network') or {}
-                duration = int_or_none(
-                    current_programme.get('duration', {}).get('value'))
-                thumbnail = None
-                image_url = current_programme.get('image_url')
-                if image_url:
-                    thumbnail = image_url.replace('{recipe}', 'raw')
+        # various PRELOADED_STATE JSON
+        preload_state = self._search_json(
+            r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
+            'preload state', playlist_id, transform_source=js_to_json, default={})
+        # PRELOADED_STATE with current programmme
+        current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
+        programme_id = traverse_obj(current_programme, ('id', {str}))
+        if programme_id and current_programme.get('type') == 'playable_item':
+            title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
+            formats, subtitles = self._download_media_selector(programme_id)
+            return {
+                'id': programme_id,
+                'title': title,
+                'formats': formats,
+                **traverse_obj(current_programme, {
+                    'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+                    'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
+                    'duration': ('duration', 'value', {int_or_none}),
+                    'uploader': ('network', 'short_title', {str}),
+                    'uploader_id': ('network', 'id', {str}),
+                    'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
+                    'series': ('titles', 'primary', {str}),
+                }),
+                'subtitles': subtitles,
+                'chapters': traverse_obj(preload_state, (
+                    'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
+                        'title': ('titles', {lambda x: join_nonempty(
+                            'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+                        'start_time': ('offset', 'start', {float_or_none}),
+                        'end_time': ('offset', 'end', {float_or_none}),
+                    })
+                ),
+            }
+
+        # PWA_PRELOADED_STATE with article video asset
+        asset_id = traverse_obj(preload_state, (
+            'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
+            'assetVideo', 0, {str}, any))
+        if asset_id:
+            video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
+            if video_id:
+                article = traverse_obj(preload_state, (
+                    'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
+
+                def image_url(image_id):
+                    return traverse_obj(preload_state, (
+                        'entities', 'images', image_id, 'url',
+                        {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
+
+                formats, subtitles = self._download_media_selector(video_id)
                return {
-                    'id': programme_id,
-                    'title': title,
-                    'description': dict_get(synopses, ('long', 'medium', 'short')),
-                    'thumbnail': thumbnail,
-                    'duration': duration,
-                    'uploader': network.get('short_title'),
-                    'uploader_id': network.get('id'),
+                    'id': video_id,
+                    **traverse_obj(preload_state, ('entities', 'videos', asset_id, {
+                        'title': ('title', {str}),
+                        'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
+                        'thumbnail': (0, {image_url}),
+                        'duration': ('duration', {int_or_none}),
+                    })),
                    'formats': formats,
                    'subtitles': subtitles,
-                    'chapters': traverse_obj(preload_state, (
-                        'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
-                            'title': ('titles', {lambda x: join_nonempty(
-                                'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
-                            'start_time': ('offset', 'start', {float_or_none}),
-                            'end_time': ('offset', 'end', {float_or_none}),
-                        })) or None,
+                    'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
                }
+            else:
+                return self.url_result(
+                    f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
+                    asset_id, playlist_title, display_id=playlist_id,
+                    description=playlist_description)

        bbc3_config = self._parse_json(
            self._search_regex(
@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)

+        def parse_model(model):
+            """Extract single video from model structure"""
+            item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
+            if not item_id:
+                return
+            formats, subtitles = self._download_media_selector(item_id)
+            return {
+                'id': item_id,
+                'formats': formats,
+                'subtitles': subtitles,
+                **traverse_obj(model, {
+                    'title': ('title', {str}),
+                    'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+                    'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
+                    'duration': ('versions', 0, 'duration', {int}),
+                    'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
+                })
+            }
+
+        def is_type(*types):
+            return lambda _, v: v['type'] in types
+
        initial_data = self._search_regex(
            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
            'quoted preload state', default=None)
@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
        if initial_data:
+            for video_data in traverse_obj(initial_data, (
+                    'stores', 'article', 'articleBodyContent', is_type('video'))):
+                model = traverse_obj(video_data, (
+                    'model', 'blocks', is_type('aresMedia'),
+                    'model', 'blocks', is_type('aresMediaMetadata'),
+                    'model', {dict}, any))
+                entry = parse_model(model)
+                if entry:
+                    entries.append(entry)
+            if entries:
+                return self.playlist_result(
+                    entries, playlist_id, playlist_title, playlist_description)
+
            def parse_media(media):
                if not media:
                    return
@ -1234,27 +1349,90 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
                        'subtitles': subtitles,
                        'timestamp': item_time,
                        'description': strip_or_none(item_desc),
+                        'duration': int_or_none(item.get('duration')),
                    })
-            for resp in (initial_data.get('data') or {}).values():
-                name = resp.get('name')
+
+            for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
+                name = resp['name']
                if name == 'media-experience':
                    parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                elif name == 'article':
-                    for block in (try_get(resp,
-                                          (lambda x: x['data']['blocks'],
-                                           lambda x: x['data']['content']['model']['blocks'],),
-                                          list) or []):
-                        if block.get('type') not in ['media', 'video']:
-                            continue
-                        parse_media(block.get('model'))
+                    for block in traverse_obj(resp, (
+                            'data', (None, ('content', 'model')), 'blocks',
+                            is_type('media', 'video'), 'model', {dict})):
+                        parse_media(block)
            return self.playlist_result(
                entries, playlist_id, playlist_title, playlist_description)

+        # extract from SIMORGH_DATA hydration JSON
+        simorgh_data = self._search_json(
+            r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
+            'simorgh data', playlist_id, default={})
+        if simorgh_data:
+            done = False
+            for video_data in traverse_obj(simorgh_data, (
+                    'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
+                model = traverse_obj(video_data, (
+                    'model', 'blocks', is_type('aresMedia'),
+                    'model', 'blocks', is_type('aresMediaMetadata'),
+                    'model', {dict}, any))
+                if video_data['type'] == 'video':
+                    entry = parse_model(model)
+                else:  # legacyMedia: no duration, subtitles
+                    block_id, entry = traverse_obj(model, ('blockId', {str})), None
+                    media_data = traverse_obj(simorgh_data, (
+                        'pageData', 'promo', 'media',
+                        {lambda x: x if x['id'] == block_id else None}))
+                    formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
+                        'url': ('url', {url_or_none}),
+                        'ext': ('format', {str}),
+                        'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+                    }))
+                    if formats:
+                        entry = {
+                            'id': block_id,
+                            'display_id': playlist_id,
+                            'formats': formats,
+                            'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
+                            **traverse_obj(model, {
+                                'title': ('title', {str}),
+                                'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+                                'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+                                'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
+                            }),
+                        }
+                        done = True
+                if entry:
+                    entries.append(entry)
+                if done:
+                    break
+            if entries:
+                return self.playlist_result(
+                    entries, playlist_id, playlist_title, playlist_description)
+
        def extract_all(pattern):
            return list(filter(None, map(
                lambda s: self._parse_json(s, playlist_id, fatal=False),
                re.findall(pattern, webpage))))

+        # US accessed article with single embedded video (e.g.
+        # https://www.bbc.com/news/uk-68546268)
+        next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
+                                 ('props', 'pageProps', 'page'))
+        model = traverse_obj(next_data, (
+            ..., 'contents', is_type('video'),
+            'model', 'blocks', is_type('media'),
+            'model', 'blocks', is_type('mediaMetadata'),
+            'model', {dict}, any))
+        if model and (entry := parse_model(model)):
+            if not entry.get('timestamp'):
+                entry['timestamp'] = traverse_obj(next_data, (
+                    ..., 'contents', is_type('timestamp'), 'model',
+                    'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
+            entries.append(entry)
+            return self.playlist_result(
+                entries, playlist_id, playlist_title, playlist_description)
+
        # Multiple video article (e.g.
        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
        EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):

        return formats

-    def _download_playinfo(self, video_id, cid):
+    def _download_playinfo(self, video_id, cid, headers=None):
        return self._download_json(
            'https://api.bilibili.com/x/player/playurl', video_id,
            query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
-            note=f'Downloading video formats for cid {cid}')['data']
+            note=f'Downloading video formats for cid {cid}', headers=headers)['data']

    def json2srt(self, json_data):
        srt_data = ''
@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage, urlh = self._download_webpage_handle(url, video_id)
+        headers = self.geo_verification_headers()
+        webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
        if not self._match_valid_url(urlh.url):
            return self.url_result(urlh.url)

@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
            self._download_json(
                'https://api.bilibili.com/x/player/pagelist', video_id,
                fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
-                note='Extracting videos in anthology'),
+                note='Extracting videos in anthology', headers=headers),
            'data', expected_type=list) or []
        is_anthology = len(page_list_json) > 1

@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):

        festival_info = {}
        if is_festival:
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._download_playinfo(video_id, cid, headers=headers)

            festival_info = traverse_obj(initial_state, {
                'uploader': ('videoInfo', 'upName'),
@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):

    def _real_extract(self, url):
        episode_id = self._match_id(url)
-        webpage = self._download_webpage(url, episode_id)
+        headers = self.geo_verification_headers()
+        webpage = self._download_webpage(url, episode_id, headers=headers)

        if '您所在的地区无法观看本片' in webpage:
            raise GeoRestrictedError('This video is restricted')
        elif '正在观看预览，大会员免费看全片' in webpage:
            self.raise_login_required('This video is for premium members only')

-        headers = {'Referer': url, **self.geo_verification_headers()}
+        headers['Referer'] = url
        play_info = self._download_json(
            'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
            'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
@ -724,7 +726,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
            'duration': float_or_none(play_info.get('timelength'), scale=1000),
            'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
            '__post_extractor': self.extract_comments(aid),
-            'http_headers': headers,
+            'http_headers': {'Referer': url},
        }


@ -1043,15 +1045,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):

            try:
                response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
-                                               playlist_id, note=f'Downloading page {page_idx}', query=query)
+                                               playlist_id, note=f'Downloading page {page_idx}', query=query,
+                                               headers={'referer': url})
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 412:
                    raise ExtractorError(
                        'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
                raise
-            if response['code'] == -401:
+            if response['code'] in (-352, -401):
                raise ExtractorError(
-                    'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+                    f'Request is blocked by server ({-response["code"]}), '
+                    'please add cookies, wait and try later.', expected=True)
            return response['data']

        def get_metadata(page_data):
--- a/yt_dlp/extractor/boosty.py
+++ b/yt_dlp/extractor/boosty.py
@ -1,7 +1,11 @@
+import json
+import urllib.parse
+
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
    ExtractorError,
+    bug_reports_message,
    int_or_none,
    qualities,
    str_or_none,
@ -162,9 +166,19 @@ class BoostyIE(InfoExtractor):

    def _real_extract(self, url):
        user, post_id = self._match_valid_url(url).group('user', 'post_id')
+
+        auth_headers = {}
+        auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
+        if auth_cookie is not None:
+            try:
+                auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
+                auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
+            except (json.JSONDecodeError, KeyError):
+                self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
+
        post = self._download_json(
            f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
-            note='Downloading post data', errnote='Unable to download post data')
+            note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)

        post_title = post.get('title')
        if not post_title:
@ -202,7 +216,9 @@ class BoostyIE(InfoExtractor):
                        'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
                    }, get_all=False)})

-        if not entries:
+        if not entries and not post.get('hasAccess'):
+            self.raise_login_required('This post requires a subscription', metadata_available=True)
+        elif not entries:
            raise ExtractorError('No videos found', expected=True)
        if len(entries) == 1:
            return entries[0]
--- a/yt_dlp/extractor/cableav.py
+++ b/yt_dlp/extractor/cableav.py
@ -1,32 +0,0 @@
-from .common import InfoExtractor
-
-
-class CableAVIE(InfoExtractor):
-    _VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
-    _TESTS = [{
-        'url': 'https://cableav.tv/lS4iR9lWjN8/',
-        'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
-        'info_dict': {
-            'id': 'lS4iR9lWjN8',
-            'ext': 'mp4',
-            'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
-            'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        video_url = self._og_search_video_url(webpage, secure=False)
-
-        formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
-
-        return {
-            'id': video_id,
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'formats': formats,
-        }
--- a/yt_dlp/extractor/canalalpha.py
+++ b/yt_dlp/extractor/canalalpha.py
@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
            'id': '24484',
            'ext': 'mp4',
            'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
-            'description': 'md5:3de3f151180684621e85be7c10e4e613',
+            'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
            'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
            'upload_date': '20211026',
            'duration': 360,
@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
            'duration': 360,
        },
        'params': {'skip_download': True}
+    }, {
+        'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
+        'info_dict': {
+            'id': '33500',
+            'ext': 'mp4',
+            'title': 'Encore des mesures d\'économie dans le Jura',
+            'description': 'md5:938b5b556592f2d1b9ab150268082a80',
+            'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
+            'upload_date': '20240411',
+            'duration': 105,
+        },
    }]

    def _real_extract(self, url):
-        id = self._match_id(url)
-        webpage = self._download_webpage(url, id)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
        data_json = self._parse_json(self._search_regex(
            r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
-            webpage, 'data_json'), id)['1']['data']['data']
+            webpage, 'data_json'), video_id)['1']['data']['data']
        manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
        subtitles = {}
        formats = [{
@ -75,15 +86,17 @@ class CanalAlphaIE(InfoExtractor):
            'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
        } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
        if manifests.get('hls'):
-            m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
-            formats.extend(m3u8_frmts)
-            subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+            fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                manifests['hls'], video_id, m3u8_id='hls', fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
        if manifests.get('dash'):
-            dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
-            formats.extend(dash_frmts)
-            subtitles = self._merge_subtitles(subtitles, dash_subs)
+            fmts, subs = self._extract_mpd_formats_and_subtitles(
+                manifests['dash'], video_id, mpd_id='dash', fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
        return {
-            'id': id,
+            'id': video_id,
            'title': data_json.get('title').strip(),
            'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
            'thumbnail': data_json.get('poster'),
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -151,7 +151,7 @@ class CBCIE(InfoExtractor):

 class CBCPlayerIE(InfoExtractor):
    IE_NAME = 'cbc.ca:player'
-    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
+    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
    _TESTS = [{
        'url': 'http://www.cbc.ca/player/play/2683190193',
        'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
            'location': 'Canada',
            'media_type': 'Full Program',
        },
+    }, {
+        'url': 'https://www.cbc.ca/player/play/video/1.7194274',
+        'md5': '188b96cf6bdcb2540e178a6caa957128',
+        'info_dict': {
+            'id': '2334524995812',
+            'ext': 'mp4',
+            'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
+            'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
+            'timestamp': 1714788791,
+            'duration': 77.678,
+            'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
+            'uploader': 'CBCC-NEW',
+            'chapters': 'count:0',
+            'upload_date': '20240504',
+            'categories': 'count:3',
+            'series': 'The National',
+            'tags': 'count:15',
+            'creators': ['encoder'],
+            'location': 'Canada',
+            'media_type': 'Excerpt',
+        },
    }, {
        'url': 'cbcplayer:1.7159484',
        'only_matching': True,
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@ -16,7 +16,6 @@ from ..utils import (
    merge_dicts,
    multipart_encode,
    parse_duration,
-    random_birthday,
    traverse_obj,
    try_call,
    try_get,
@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
            'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'crash404',
-            'view_count': int,
            'average_rating': float,
            'duration': 137,
            'age_limit': 0,
+            'upload_date': '20160220',
+            'timestamp': 1455968218,
        }
    }, {
-        # Age-restricted
-        'url': 'http://www.cda.pl/video/1273454c4',
+        # Age-restricted with vfilm redirection
+        'url': 'https://www.cda.pl/video/8753244c4',
+        'md5': 'd8eeb83d63611289507010d3df3bb8b3',
        'info_dict': {
-            'id': '1273454c4',
+            'id': '8753244c4',
            'ext': 'mp4',
-            'title': 'Bronson (2008) napisy HD 1080p',
-            'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+            'title': '[18+] Bez Filtra: Rezerwowe Psy czyli...  najwulgarniejsza polska gra?',
+            'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
            'height': 1080,
-            'uploader': 'boniek61',
+            'uploader': 'arhn eu',
            'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 5554,
+            'duration': 991,
            'age_limit': 18,
-            'view_count': int,
            'average_rating': float,
-        },
+            'timestamp': 1633888264,
+            'upload_date': '20211010',
+        }
+    }, {
+        # Age-restricted without vfilm redirection
+        'url': 'https://www.cda.pl/video/17028157b8',
+        'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
+        'info_dict': {
+            'id': '17028157b8',
+            'ext': 'mp4',
+            'title': 'STENDUPY MICHAŁ OGIŃSKI',
+            'description': 'md5:5851f3272bfc31f762d616040a1d609a',
+            'height': 480,
+            'uploader': 'oginski',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 18855,
+            'age_limit': 18,
+            'average_rating': float,
+            'timestamp': 1699705901,
+            'upload_date': '20231111',
+        }
    }, {
        'url': 'http://ebd.cda.pl/0x0/5749950c',
        'only_matching': True,
    }]

    def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
-        form_data = random_birthday('rok', 'miesiac', 'dzien')
-        form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
-        data, content_type = multipart_encode(form_data)
+        data, content_type = multipart_encode({'age_confirm': ''})
        return self._download_webpage(
-            urljoin(url, '/a/validatebirth'), video_id, *args,
+            url, video_id, *args,
            data=data, headers={
                'Referer': url,
                'Content-Type': content_type,
@ -164,7 +182,7 @@ class CDAIE(InfoExtractor):
        if 'Authorization' in self._API_HEADERS:
            return self._api_extract(video_id)
        else:
-            return self._web_extract(video_id, url)
+            return self._web_extract(video_id)

    def _api_extract(self, video_id):
        meta = self._download_json(
@ -197,9 +215,9 @@ class CDAIE(InfoExtractor):
            'view_count': meta.get('views'),
        }

-    def _web_extract(self, video_id, url):
+    def _web_extract(self, video_id):
        self._set_cookie('cda.pl', 'cda.player', 'html5')
-        webpage = self._download_webpage(
+        webpage, urlh = self._download_webpage_handle(
            f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)

        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
@ -209,10 +227,10 @@ class CDAIE(InfoExtractor):
            self.raise_geo_restricted()

        need_confirm_age = False
-        if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
+        if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
                                   webpage, 'birthday validate form', default=None):
            webpage = self._download_age_confirm_page(
-                url, video_id, note='Confirming age')
+                urlh.url, video_id, note='Confirming age')
            need_confirm_age = True

        formats = []
@ -222,9 +240,6 @@ class CDAIE(InfoExtractor):
            (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
            <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
        ''', webpage, 'uploader', default=None, group='uploader')
-        view_count = self._search_regex(
-            r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
-            'view_count', default=None)
        average_rating = self._search_regex(
            (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
             r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
@ -235,7 +250,6 @@ class CDAIE(InfoExtractor):
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'uploader': uploader,
-            'view_count': int_or_none(view_count),
            'average_rating': float_or_none(average_rating),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -957,7 +957,8 @@ class InfoExtractor:
        if urlh is False:
            assert not fatal
            return False
-        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
+                                             encoding=encoding, data=data)
        return (content, urlh)

    @staticmethod
@ -1005,8 +1006,10 @@ class InfoExtractor:
                'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
                expected=True)

-    def _request_dump_filename(self, url, video_id):
-        basen = f'{video_id}_{url}'
+    def _request_dump_filename(self, url, video_id, data=None):
+        if data is not None:
+            data = hashlib.md5(data).hexdigest()
+        basen = join_nonempty(video_id, data, url, delim='_')
        trim_length = self.get_param('trim_file_name') or 240
        if len(basen) > trim_length:
            h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@ -1028,16 +1031,18 @@ class InfoExtractor:
        except LookupError:
            return webpage_bytes.decode('utf-8', 'replace')

-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
+                              prefix=None, encoding=None, data=None):
        webpage_bytes = urlh.read()
        if prefix is not None:
            webpage_bytes = prefix + webpage_bytes
+        url_or_request = self._create_request(url_or_request, data)
        if self.get_param('dump_intermediate_pages', False):
            self.to_screen('Dumping request to ' + urlh.url)
            dump = base64.b64encode(webpage_bytes).decode('ascii')
            self._downloader.to_screen(dump)
        if self.get_param('write_pages'):
-            filename = self._request_dump_filename(urlh.url, video_id)
+            filename = self._request_dump_filename(urlh.url, video_id, url_or_request.data)
            self.to_screen(f'Saving request to {filename}')
            with open(filename, 'wb') as outf:
                outf.write(webpage_bytes)
@ -1098,7 +1103,7 @@ class InfoExtractor:
                             impersonate=None, require_impersonation=False):
            if self.get_param('load_pages'):
                url_or_request = self._create_request(url_or_request, data, headers, query)
-                filename = self._request_dump_filename(url_or_request.url, video_id)
+                filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
                self.to_screen(f'Loading request from {filename}')
                try:
                    with open(filename, 'rb') as dumpf:
@ -1738,12 +1743,16 @@ class InfoExtractor:
        traverse_json_ld(json_ld)
        return filter_dict(info)

-    def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', fatal=fatal, **kw),
-            video_id, transform_source=transform_source, fatal=fatal)
+    def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
+        if default == '{}':
+            self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
+            default = {}
+        if default is not NO_DEFAULT:
+            fatal = False
+
+        return self._search_json(
+            r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
+            video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)

    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
--- a/yt_dlp/extractor/commonmistakes.py
+++ b/yt_dlp/extractor/commonmistakes.py
@ -40,3 +40,19 @@ class UnicodeBOMIE(InfoExtractor):
            'Your URL starts with a Byte Order Mark (BOM). '
            'Removing the BOM and looking for "%s" ...' % real_url)
        return self.url_result(real_url)
+
+
+class BlobIE(InfoExtractor):
+    IE_DESC = False
+    _VALID_URL = r'blob:'
+
+    _TESTS = [{
+        'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        raise ExtractorError(
+            'You\'ve asked yt-dlp to download a blob URL. '
+            'A blob URL exists only locally in your browser. '
+            'It is not possible for yt-dlp to access it.', expected=True)
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
    _BASE_URL = 'https://www.crunchyroll.com'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
+    _REFRESH_TOKEN = None
    _AUTH_HEADERS = None
+    _AUTH_EXPIRY = None
    _API_ENDPOINT = None
-    _BASIC_AUTH = None
+    _BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
+        't-kdgp2h8c3jub8fn0fq',
+        'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
+    )).encode()).decode()
    _IS_PREMIUM = None
-    _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
    _LOCALE_LOOKUP = {
        'ar': 'ar-SA',
        'de': 'de-DE',
@ -43,69 +47,78 @@ class CrunchyrollBaseIE(InfoExtractor):
        'hi': 'hi-IN',
    }

-    @property
-    def is_logged_in(self):
-        return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
+    def _set_auth_info(self, response):
+        CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
+        CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
+        CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
+
+    def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
+        try:
+            return self._download_json(
+                f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
+                headers=headers, data=urlencode_postdata(data), impersonate=True)
+        except ExtractorError as error:
+            if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
+                raise
+            if target := error.cause.response.extensions.get('impersonate'):
+                raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
+            raise ExtractorError(
+                'Request blocked by Cloudflare. '
+                'Install the required impersonation dependency if possible, '
+                'or else navigate to Crunchyroll in your browser, '
+                'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+                'and your browser\'s User-Agent (with --user-agent)', expected=True)

    def _perform_login(self, username, password):
-        if self.is_logged_in:
+        if not CrunchyrollBaseIE._REFRESH_TOKEN:
+            CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
+        if CrunchyrollBaseIE._REFRESH_TOKEN:
            return

-        upsell_response = self._download_json(
-            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
-            query={
-                'sess_id': 1,
-                'device_id': 'whatvalueshouldbeforweb',
-                'device_type': 'com.crunchyroll.static',
-                'access_token': 'giKq5eY27ny3cqz',
-                'referer': f'{self._BASE_URL}/welcome/login'
-            })
-        if upsell_response['code'] != 'ok':
-            raise ExtractorError('Could not get session id')
-        session_id = upsell_response['data']['session_id']
-
-        login_response = self._download_json(
-            f'{self._API_BASE}/login.1.json', None, 'Logging in',
-            data=urlencode_postdata({
-                'account': username,
-                'password': password,
-                'session_id': session_id
-            }))
-        if login_response['code'] != 'ok':
-            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
-        if not self.is_logged_in:
-            raise ExtractorError('Login succeeded but did not set etp_rt cookie')
-
-    def _update_auth(self):
-        if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
-            return
-
-        if not CrunchyrollBaseIE._BASIC_AUTH:
-            cx_api_param = self._CLIENT_ID[self.is_logged_in]
-            self.write_debug(f'Using cxApiParam={cx_api_param}')
-            CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
-
-        auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
-        if self.is_logged_in:
-            grant_type = 'etp_rt_cookie'
-        else:
-            grant_type = 'client_id'
-            auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
        try:
-            auth_response = self._download_json(
-                f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
-                headers=auth_headers, data=f'grant_type={grant_type}'.encode())
+            login_response = self._request_token(
+                headers={'Authorization': self._BASIC_AUTH}, data={
+                    'username': username,
+                    'password': password,
+                    'grant_type': 'password',
+                    'scope': 'offline_access',
+                }, note='Logging in', errnote='Failed to log in')
        except ExtractorError as error:
-            if isinstance(error.cause, HTTPError) and error.cause.status == 403:
-                raise ExtractorError(
-                    'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
-                    'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
-                    'and your browser\'s User-Agent (with --user-agent)', expected=True)
+            if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+                raise ExtractorError('Invalid username and/or password', expected=True)
            raise

-        CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
-        CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
-        CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
+        CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
+        self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
+        self._set_auth_info(login_response)
+
+    def _update_auth(self):
+        if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
+            return
+
+        auth_headers = {'Authorization': self._BASIC_AUTH}
+        if CrunchyrollBaseIE._REFRESH_TOKEN:
+            data = {
+                'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
+                'grant_type': 'refresh_token',
+                'scope': 'offline_access',
+            }
+        else:
+            data = {'grant_type': 'client_id'}
+            auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
+        try:
+            auth_response = self._request_token(auth_headers, data)
+        except ExtractorError as error:
+            username, password = self._get_login_info()
+            if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
+                raise
+            self.to_screen('Refresh token has expired. Re-logging in')
+            CrunchyrollBaseIE._REFRESH_TOKEN = None
+            self.cache.store(self._NETRC_MACHINE, username, None)
+            self._perform_login(username, password)
+            return
+
+        self._set_auth_info(auth_response)

    def _locale_from_language(self, language):
        config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@ -168,7 +181,8 @@ class CrunchyrollBaseIE(InfoExtractor):
        self._update_auth()
        stream_response = self._download_json(
            f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
-            display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
+            display_id, note='Downloading stream info', errnote='Failed to download stream info',
+            headers=CrunchyrollBaseIE._AUTH_HEADERS)

        available_formats = {'': ('', '', stream_response['url'])}
        for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
@ -383,11 +397,12 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):

        if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
            message = f'This {object_type} is for premium members only'
-            if self.is_logged_in:
-                raise ExtractorError(message, expected=True)
-            self.raise_login_required(message)
-
-        result['formats'], result['subtitles'] = self._extract_stream(internal_id)
+            if CrunchyrollBaseIE._REFRESH_TOKEN:
+                self.raise_no_formats(message, expected=True, video_id=internal_id)
+            else:
+                self.raise_login_required(message, method='password', metadata_available=True)
+        else:
+            result['formats'], result['subtitles'] = self._extract_stream(internal_id)

        result['chapters'] = self._extract_chapters(internal_id)

@ -573,14 +588,16 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
        if not response:
            raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)

+        result = self._transform_music_response(response)
+
        if not self._IS_PREMIUM and response.get('isPremiumOnly'):
            message = f'This {response.get("type") or "media"} is for premium members only'
-            if self.is_logged_in:
-                raise ExtractorError(message, expected=True)
-            self.raise_login_required(message)
-
-        result = self._transform_music_response(response)
-        result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
+            if CrunchyrollBaseIE._REFRESH_TOKEN:
+                self.raise_no_formats(message, expected=True, video_id=internal_id)
+            else:
+                self.raise_login_required(message, method='password', metadata_available=True)
+        else:
+            result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)

        return result

--- a/yt_dlp/extractor/einthusan.py
+++ b/yt_dlp/extractor/einthusan.py
@ -1,105 +0,0 @@
-import json
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_b64decode,
-    compat_str,
-    compat_urlparse,
-)
-from ..utils import (
-    extract_attributes,
-    ExtractorError,
-    get_elements_by_class,
-    urlencode_postdata,
-)
-
-
-class EinthusanIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
-    _TESTS = [{
-        'url': 'https://einthusan.tv/movie/watch/9097/',
-        'md5': 'ff0f7f2065031b8a2cf13a933731c035',
-        'info_dict': {
-            'id': '9097',
-            'ext': 'mp4',
-            'title': 'Ae Dil Hai Mushkil',
-            'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        }
-    }, {
-        'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
-        'only_matching': True,
-    }, {
-        'url': 'https://einthusan.com/movie/watch/9097/',
-        'only_matching': True,
-    }, {
-        'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
-        'only_matching': True,
-    }]
-
-    # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
-    def _decrypt(self, encrypted_data, video_id):
-        return self._parse_json(compat_b64decode((
-            encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
-        )).decode('utf-8'), video_id)
-
-    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        host = mobj.group('host')
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
-
-        player_params = extract_attributes(self._search_regex(
-            r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
-
-        page_id = self._html_search_regex(
-            '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
-        video_data = self._download_json(
-            'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
-            data=urlencode_postdata({
-                'xEvent': 'UIVideoPlayer.PingOutcome',
-                'xJson': json.dumps({
-                    'EJOutcomes': player_params['data-ejpingables'],
-                    'NativeHLS': False
-                }),
-                'arcVersion': 3,
-                'appVersion': 59,
-                'gorilla.csrf.Token': page_id,
-            }))['Data']
-
-        if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
-            raise ExtractorError(
-                'Download rate reached. Please try again later.', expected=True)
-
-        ej_links = self._decrypt(video_data['EJLinks'], video_id)
-
-        formats = []
-
-        m3u8_url = ej_links.get('HLSLink')
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
-
-        mp4_url = ej_links.get('MP4Link')
-        if mp4_url:
-            formats.append({
-                'url': mp4_url,
-            })
-
-        description = get_elements_by_class('synopsis', webpage)[0]
-        thumbnail = self._html_search_regex(
-            r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
-            webpage, 'thumbnail url', fatal=False, group='url')
-        if thumbnail is not None:
-            thumbnail = compat_urlparse.urljoin(url, thumbnail)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-            'description': description,
-        }
--- a/yt_dlp/extractor/eplus.py
+++ b/yt_dlp/extractor/eplus.py
@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
    _VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
                  r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
    _TESTS = [{
-        'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
+        'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
        'info_dict': {
-            'id': '354502-0001-002',
-            'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022～LIVE with a smile!～【Streaming+(配信)】',
+            'id': '335699-0001-006',
+            'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
            'live_status': 'was_live',
-            'release_date': '20211231',
-            'release_timestamp': 1640952000,
+            'release_date': '20201221',
+            'release_timestamp': 1608544800,
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': [
+            'This event may not be accessible',
+            'No video formats found',
+            'Requested format is not available',
+        ],
+    }, {
+        'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
+        'info_dict': {
+            'id': '348021-0054-001',
+            'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ～Starlines～【東京/DAY.1】',
+            'live_status': 'was_live',
+            'release_date': '20220115',
+            'release_timestamp': 1642233600,
            'description': str,
        },
        'params': {
@ -124,6 +142,10 @@ class EplusIbIE(InfoExtractor):
        if data_json.get('drm_mode') == 'ON':
            self.report_drm(video_id)

+        if data_json.get('is_pass_ticket') == 'YES':
+            raise ExtractorError(
+                'This URL is for a pass ticket instead of a player page', expected=True)
+
        delivery_status = data_json.get('delivery_status')
        archive_mode = data_json.get('archive_mode')
        release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
--- a/yt_dlp/extractor/europa.py
+++ b/yt_dlp/extractor/europa.py
@ -94,13 +94,14 @@ class EuropaIE(InfoExtractor):

 class EuroParlWebstreamIE(InfoExtractor):
    _VALID_URL = r'''(?x)
-        https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
-        (?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
+        https?://multimedia\.europarl\.europa\.eu/
+        (?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
    '''
    _TESTS = [{
        'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
        'info_dict': {
            'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
+            'display_id': '20220914-0900-PLENARY',
            'ext': 'mp4',
            'title': 'Plenary session',
            'release_timestamp': 1663139069,
@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
        'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
        'info_dict': {
            'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
+            'display_id': '20230301-1130-COMMITTEE-CULT',
            'ext': 'mp4',
            'release_date': '20230301',
            'title': 'Committee on Culture and Education',
@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
            'live_status': 'is_live',
        },
        'skip': 'Not live anymore'
+    }, {
+        'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
+        'info_dict': {
+            'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
+            'display_id': '20240320-1345-SPECIAL-PRESSER',
+            'ext': 'mp4',
+            'release_date': '20240320',
+            'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
+            'release_timestamp': 1710939767,
+        }
+    }, {
+        'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -166,6 +181,7 @@ class EuroParlWebstreamIE(InfoExtractor):

        return {
            'id': json_info['id'],
+            'display_id': display_id,
            'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
            'formats': formats,
            'subtitles': subtitles,
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
                    js_data, lambda x: x['jsmods']['instances'], list) or [])

        def extract_dash_manifest(video, formats):
-            dash_manifest = video.get('dash_manifest')
+            dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@ -1,9 +1,11 @@
 import re

 from .common import InfoExtractor
+from .youtube import YoutubeIE
 from ..compat import compat_parse_qs
 from ..utils import (
    ExtractorError,
+    bug_reports_message,
    determine_ext,
    extract_attributes,
    get_element_by_class,
@ -38,6 +40,17 @@ class GoogleDriveIE(InfoExtractor):
            'duration': 45,
            'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
        }
+    }, {
+        # has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
+        'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+        'md5': '322db8d63dd19788c04050a4bba67073',
+        'info_dict': {
+            'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+            'ext': 'mp3',
+            'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
+            'duration': 184,
+            'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+        },
    }, {
        # video can't be watched anonymously due to view count limit reached,
        # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
        'only_matching': True,
    }]
    _FORMATS_EXT = {
-        '5': 'flv',
-        '6': 'flv',
-        '13': '3gp',
-        '17': '3gp',
-        '18': 'mp4',
-        '22': 'mp4',
-        '34': 'flv',
-        '35': 'flv',
-        '36': '3gp',
-        '37': 'mp4',
-        '38': 'mp4',
-        '43': 'webm',
-        '44': 'webm',
-        '45': 'webm',
-        '46': 'webm',
-        '59': 'mp4',
+        **{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
+        '50': 'm4a',
    }
    _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
    _CAPTIONS_ENTRY_TAG = {
@ -194,10 +193,13 @@ class GoogleDriveIE(InfoExtractor):
                if len(fmt_stream_split) < 2:
                    continue
                format_id, format_url = fmt_stream_split[:2]
+                ext = self._FORMATS_EXT.get(format_id)
+                if not ext:
+                    self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
                f = {
                    'url': lowercase_escape(format_url),
                    'format_id': format_id,
-                    'ext': self._FORMATS_EXT[format_id],
+                    'ext': ext,
                }
                resolution = resolutions.get(format_id)
                if resolution:
--- a/yt_dlp/extractor/hytale.py
+++ b/yt_dlp/extractor/hytale.py
@ -1,7 +1,8 @@
 import re

+from .cloudflarestream import CloudflareStreamIE
 from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils.traversal import traverse_obj


 class HytaleIE(InfoExtractor):
@ -49,7 +50,7 @@ class HytaleIE(InfoExtractor):
        entries = [
            self.url_result(
                f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
-                title=self._titles.get(video_hash), url_transparent=True)
+                CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
            for video_hash in re.findall(
                r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
                webpage)
--- a/yt_dlp/extractor/jable.py
+++ b/yt_dlp/extractor/jable.py
@ -1,103 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    InAdvancePagedList,
-    int_or_none,
-    orderedSet,
-    unified_strdate,
-)
-
-
-class JableIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
-    _TESTS = [{
-        'url': 'https://jable.tv/videos/pppd-812/',
-        'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
-        'info_dict': {
-            'id': 'pppd-812',
-            'ext': 'mp4',
-            'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
-            'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'age_limit': 18,
-            'like_count': int,
-            'view_count': int,
-        },
-    }, {
-        'url': 'https://jable.tv/videos/apak-220/',
-        'md5': '71f9239d69ced58ab74a816908847cc1',
-        'info_dict': {
-            'id': 'apak-220',
-            'ext': 'mp4',
-            'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
-            'description': '',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'age_limit': 18,
-            'like_count': int,
-            'view_count': int,
-            'upload_date': '20220319',
-        },
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        formats = self._extract_m3u8_formats(
-            self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
-
-        return {
-            'id': video_id,
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage, default=''),
-            'thumbnail': self._og_search_thumbnail(webpage, default=None),
-            'formats': formats,
-            'age_limit': 18,
-            'upload_date': unified_strdate(self._search_regex(
-                r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
-            'view_count': int_or_none(self._search_regex(
-                r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
-                webpage, 'view_count', default='').replace(' ', '')),
-            'like_count': int_or_none(self._search_regex(
-                r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
-        }
-
-
-class JablePlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
-    _TESTS = [{
-        'url': 'https://jable.tv/models/kaede-karen/',
-        'info_dict': {
-            'id': 'kaede-karen',
-            'title': '楓カレン',
-        },
-        'playlist_count': 34,
-    }, {
-        'url': 'https://jable.tv/categories/roleplay/',
-        'only_matching': True,
-    }, {
-        'url': 'https://jable.tv/tags/girl/',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-
-        def page_func(page_num):
-            return [
-                self.url_result(player_url, JableIE)
-                for player_url in orderedSet(re.findall(
-                    r'href="(https://jable.tv/videos/[\w-]+/?)"',
-                    self._download_webpage(url, playlist_id, query={
-                        'mode': 'async',
-                        'from': page_num + 1,
-                        'function': 'get_block',
-                        'block_id': 'list_videos_common_videos_list',
-                    }, note=f'Downloading page {page_num + 1}')))]
-
-        return self.playlist_result(
-            InAdvancePagedList(page_func, int_or_none(self._search_regex(
-                r'from:(\d+)">[^<]+\s*&raquo;', webpage, 'last page number', default=1)), 24),
-            playlist_id, self._search_regex(
-                r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
--- a/yt_dlp/extractor/jiosaavn.py
+++ b/yt_dlp/extractor/jiosaavn.py
@ -1,10 +1,12 @@
 import functools
+import math
+import re

 from .common import InfoExtractor
 from ..utils import (
-    format_field,
+    InAdvancePagedList,
+    clean_html,
    int_or_none,
-    js_to_json,
    make_archive_id,
    smuggle_url,
    unsmuggle_url,
@ -16,6 +18,7 @@ from ..utils.traversal import traverse_obj


 class JioSaavnBaseIE(InfoExtractor):
+    _API_URL = 'https://www.jiosaavn.com/api.php'
    _VALID_BITRATES = {'16', '32', '64', '128', '320'}

    @functools.cached_property
@ -30,7 +33,7 @@ class JioSaavnBaseIE(InfoExtractor):
    def _extract_formats(self, song_data):
        for bitrate in self.requested_bitrates:
            media_data = self._download_json(
-                'https://www.jiosaavn.com/api.php', song_data['id'],
+                self._API_URL, song_data['id'],
                f'Downloading format info for {bitrate}',
                fatal=False, data=urlencode_postdata({
                    '__call': 'song.generateAuthToken',
@ -50,31 +53,45 @@ class JioSaavnBaseIE(InfoExtractor):
                'vcodec': 'none',
            }

-    def _extract_song(self, song_data):
+    def _extract_song(self, song_data, url=None):
        info = traverse_obj(song_data, {
            'id': ('id', {str}),
-            'title': ('title', 'text', {str}),
-            'album': ('album', 'text', {str}),
-            'thumbnail': ('image', 0, {url_or_none}),
+            'title': ('song', {clean_html}),
+            'album': ('album', {clean_html}),
+            'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
            'duration': ('duration', {int_or_none}),
            'view_count': ('play_count', {int_or_none}),
            'release_year': ('year', {int_or_none}),
-            'artists': ('artists', lambda _, v: v['role'] == 'singer', 'name', {str}),
-            'webpage_url': ('perma_url', {url_or_none}),  # for song, playlist extraction
+            'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
+            'webpage_url': ('perma_url', {url_or_none}),
        })
-        if not info.get('webpage_url'):  # for album extraction / fallback
-            info['webpage_url'] = format_field(
-                song_data, [('title', 'action')], 'https://www.jiosaavn.com%s') or None
-        if webpage_url := info['webpage_url']:
-            info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, url_basename(webpage_url))]
+        if webpage_url := info.get('webpage_url') or url:
+            info['display_id'] = url_basename(webpage_url)
+            info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]

        return info

-    def _extract_initial_data(self, url, display_id):
-        webpage = self._download_webpage(url, display_id)
-        return self._search_json(
-            r'window\.__INITIAL_DATA__\s*=', webpage,
-            'initial data', display_id, transform_source=js_to_json)
+    def _call_api(self, type_, token, note='API', params={}):
+        return self._download_json(
+            self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
+            query={
+                '__call': 'webapi.get',
+                '_format': 'json',
+                '_marker': '0',
+                'ctx': 'web6dot0',
+                'token': token,
+                'type': type_,
+                **params,
+            })
+
+    def _yield_songs(self, playlist_data):
+        for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
+            song_info = self._extract_song(song_data)
+            url = smuggle_url(song_info['webpage_url'], {
+                'id': song_data['id'],
+                'encrypted_media_url': song_data['encrypted_media_url'],
+            })
+            yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)


 class JioSaavnSongIE(JioSaavnBaseIE):
@ -85,10 +102,11 @@ class JioSaavnSongIE(JioSaavnBaseIE):
        'md5': '3b84396d15ed9e083c3106f1fa589c04',
        'info_dict': {
            'id': 'IcoLuefJ',
+            'display_id': 'OQsEfQFVUXk',
            'ext': 'm4a',
            'title': 'Leja Re',
            'album': 'Leja Re',
-            'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
+            'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
            'duration': 205,
            'view_count': int,
            'release_year': 2018,
@ -111,8 +129,8 @@ class JioSaavnSongIE(JioSaavnBaseIE):
            result = {'id': song_data['id']}
        else:
            # only extract metadata if this is not a url_transparent result
-            song_data = self._extract_initial_data(url, self._match_id(url))['song']['song']
-            result = self._extract_song(song_data)
+            song_data = self._call_api('song', self._match_id(url))['songs'][0]
+            result = self._extract_song(song_data, url)

        result['formats'] = list(self._extract_formats(song_data))
        return result
@ -130,19 +148,12 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
        'playlist_count': 10,
    }]

-    def _entries(self, playlist_data):
-        for song_data in traverse_obj(playlist_data, (
-                'modules', lambda _, x: x['key'] == 'list', 'data', lambda _, v: v['title']['action'])):
-            song_info = self._extract_song(song_data)
-            # album song data is missing artists and release_year, need to re-extract metadata
-            yield self.url_result(song_info['webpage_url'], JioSaavnSongIE, **song_info)
-
    def _real_extract(self, url):
        display_id = self._match_id(url)
-        album_data = self._extract_initial_data(url, display_id)['albumView']
+        album_data = self._call_api('album', display_id)

        return self.playlist_result(
-            self._entries(album_data), display_id, traverse_obj(album_data, ('album', 'title', 'text', {str})))
+            self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))


 class JioSaavnPlaylistIE(JioSaavnBaseIE):
@ -154,21 +165,30 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
            'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
            'title': 'Mood English',
        },
-        'playlist_mincount': 50,
+        'playlist_mincount': 301,
+    }, {
+        'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
+        'info_dict': {
+            'id': 'DVR,pFUOwyXqIp77B1JF,A__',
+            'title': 'Mood Hindi',
+        },
+        'playlist_mincount': 801,
    }]
+    _PAGE_SIZE = 50

-    def _entries(self, playlist_data):
-        for song_data in traverse_obj(playlist_data, ('list', lambda _, v: v['perma_url'])):
-            song_info = self._extract_song(song_data)
-            url = smuggle_url(song_info['webpage_url'], {
-                'id': song_data['id'],
-                'encrypted_media_url': song_data['encrypted_media_url'],
-            })
-            yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
+    def _fetch_page(self, token, page):
+        return self._call_api(
+            'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
+
+    def _entries(self, token, first_page_data, page):
+        page_data = first_page_data if not page else self._fetch_page(token, page + 1)
+        yield from self._yield_songs(page_data)

    def _real_extract(self, url):
        display_id = self._match_id(url)
-        playlist_data = self._extract_initial_data(url, display_id)['playlist']['playlist']
+        playlist_data = self._fetch_page(display_id, 1)
+        total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)

-        return self.playlist_result(
-            self._entries(playlist_data), display_id, traverse_obj(playlist_data, ('title', 'text', {str})))
+        return self.playlist_result(InAdvancePagedList(
+            functools.partial(self._entries, display_id, playlist_data),
+            total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
--- a/yt_dlp/extractor/mixch.py
+++ b/yt_dlp/extractor/mixch.py
@ -1,6 +1,12 @@
 from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
-from ..utils import ExtractorError, UserNotLive, int_or_none, url_or_none
+from ..utils import (
+    ExtractorError,
+    UserNotLive,
+    int_or_none,
+    str_or_none,
+    url_or_none,
+)
 from ..utils.traversal import traverse_obj


@ -9,17 +15,20 @@ class MixchIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'

    _TESTS = [{
-        'url': 'https://mixch.tv/u/16236849/live',
+        'url': 'https://mixch.tv/u/16943797/live',
        'skip': 'don\'t know if this live persists',
        'info_dict': {
-            'id': '16236849',
-            'title': '24配信シェア⭕️投票🙏💦',
-            'comment_count': 13145,
-            'view_count': 28348,
-            'timestamp': 1636189377,
-            'uploader': '🦥伊咲👶🏻#フレアワ',
-            'uploader_id': '16236849',
-        }
+            'id': '16943797',
+            'ext': 'mp4',
+            'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
+            'comment_count': int,
+            'view_count': int,
+            'timestamp': 1714726805,
+            'uploader': 'Ent.View K-news🎶💕',
+            'uploader_id': '16943797',
+            'live_status': 'is_live',
+            'upload_date': '20240503',
+        },
    }, {
        'url': 'https://mixch.tv/u/16137876/live',
        'only_matching': True,
@ -48,8 +57,20 @@ class MixchIE(InfoExtractor):
                'protocol': 'm3u8',
            }],
            'is_live': True,
+            '__post_extractor': self.extract_comments(video_id),
        }

+    def _get_comments(self, video_id):
+        yield from traverse_obj(self._download_json(
+            f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
+            note='Downloading comments', errnote='Failed to download comments'), (..., {
+                'author': ('name', {str}),
+                'author_id': ('user_id', {str_or_none}),
+                'id': ('message_id', {str}, {lambda x: x or None}),
+                'text': ('body', {str}),
+                'timestamp': ('created', {int}),
+            }))
+

 class MixchArchiveIE(InfoExtractor):
    IE_NAME = 'mixch:archive'
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@ -561,7 +561,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
            'timestamp': ('createTime', {self.kilo_or_none}),
        })

-        if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
+        if not self._yes_playlist(
+                info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
            formats = self.extract_formats(info['mainSong'])

            return {
--- a/yt_dlp/extractor/nfb.py
+++ b/yt_dlp/extractor/nfb.py
@ -5,7 +5,6 @@ from ..utils import (
    merge_dicts,
    parse_count,
    url_or_none,
-    urljoin,
 )
 from ..utils.traversal import traverse_obj

@ -16,8 +15,7 @@ class NFBBaseIE(InfoExtractor):

    def _extract_ep_data(self, webpage, video_id, fatal=False):
        return self._search_json(
-            r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
-            contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
+            r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {}

    def _extract_ep_info(self, data, video_id, slug=None):
        info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
@ -224,18 +222,14 @@ class NFBIE(NFBBaseIE):
        # type_ can change from film to serie(s) after redirect; new slug may have episode number
        type_, slug = self._match_valid_url(urlh.url).group('type', 'id')

-        embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
-            r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
-        video_id = self._match_id(embed_url)  # embed url has unique slug
-        player = self._download_webpage(embed_url, video_id, 'Downloading player page')
-        if 'MESSAGE_GEOBLOCKED' in player:
-            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+        player_data = self._search_json(
+            r'window\.PLAYER_OPTIONS\[[^\]]+\]\s*=', webpage, 'player data', slug)
+        video_id = self._match_id(player_data['overlay']['url'])  # overlay url always has unique slug

        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-            self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
-            video_id, 'mp4', m3u8_id='hls')
+            player_data['source'], video_id, 'mp4', m3u8_id='hls')

-        if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
+        if dv_source := url_or_none(player_data.get('dvSource')):
            fmts, subs = self._extract_m3u8_formats_and_subtitles(
                dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
            for fmt in fmts:
@ -246,17 +240,16 @@ class NFBIE(NFBBaseIE):
        info = {
            'id': video_id,
            'title': self._html_search_regex(
-                r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
+                r'["\']nfb_version_title["\']\s*:\s*["\']([^"\']+)',
                webpage, 'title', default=None),
            'description': self._html_search_regex(
                r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
                webpage, 'description', default=None),
-            'thumbnail': self._html_search_regex(
-                r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
+            'thumbnail': url_or_none(player_data.get('poster')),
            'uploader': self._html_search_regex(
-                r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
+                r'<[^>]+\bitemprop=["\']director["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
            'release_year': int_or_none(self._html_search_regex(
-                r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
+                r'["\']nfb_version_year["\']\s*:\s*["\']([^"\']+)',
                webpage, 'release_year', default=None)),
        } if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)

--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -1,8 +1,8 @@
 import itertools
+import urllib.parse

 from .common import InfoExtractor
 from .vimeo import VimeoIE
-from ..compat import compat_urllib_parse_unquote
 from ..networking.exceptions import HTTPError
 from ..utils import (
    KNOWN_EXTENSIONS,
@ -14,7 +14,6 @@ from ..utils import (
    parse_iso8601,
    str_or_none,
    traverse_obj,
-    try_get,
    url_or_none,
    urljoin,
 )
@ -199,7 +198,50 @@ class PatreonIE(PatreonBaseIE):
            'channel_id': '2147162',
            'uploader_url': 'https://www.patreon.com/yaboyroshi',
        },
+    }, {
+        # NSFW vimeo embed URL
+        'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
+        'info_dict': {
+            'id': '902250943',
+            'ext': 'mp4',
+            'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
+            'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
+            'uploader': 'Npickyeonhwa',
+            'uploader_id': '90574422',
+            'uploader_url': 'https://www.patreon.com/Yeonhwa726',
+            'channel_id': '10237902',
+            'channel_url': 'https://www.patreon.com/Yeonhwa726',
+            'duration': 70,
+            'timestamp': 1705150153,
+            'upload_date': '20240113',
+            'comment_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.+',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        # multiple attachments/embeds
+        'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
+        'playlist_count': 3,
+        'info_dict': {
+            'id': '100601977',
+            'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
+            'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
+            'uploader': 'Bradley Hall',
+            'uploader_id': '24401883',
+            'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
+            'channel_id': '3193932',
+            'channel_url': 'https://www.patreon.com/bradleyhallguitar',
+            'channel_follower_count': int,
+            'timestamp': 1710777855,
+            'upload_date': '20240318',
+            'like_count': int,
+            'comment_count': int,
+            'thumbnail': r're:^https?://.+',
+        },
+        'skip': 'Patron-only content',
    }]
+    _RETURN_TYPE = 'video'

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -214,95 +256,108 @@ class PatreonIE(PatreonBaseIE):
                'include': 'audio,user,user_defined_tags,campaign,attachments_media',
            })
        attributes = post['data']['attributes']
-        title = attributes['title'].strip()
-        image = attributes.get('image') or {}
-        info = {
-            'id': video_id,
-            'title': title,
-            'description': clean_html(attributes.get('content')),
-            'thumbnail': image.get('large_url') or image.get('url'),
-            'timestamp': parse_iso8601(attributes.get('published_at')),
-            'like_count': int_or_none(attributes.get('like_count')),
-            'comment_count': int_or_none(attributes.get('comment_count')),
-        }
-        can_view_post = traverse_obj(attributes, 'current_user_can_view')
-        if can_view_post and info['comment_count']:
-            info['__post_extractor'] = self.extract_comments(video_id)
+        info = traverse_obj(attributes, {
+            'title': ('title', {str.strip}),
+            'description': ('content', {clean_html}),
+            'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
+            'timestamp': ('published_at', {parse_iso8601}),
+            'like_count': ('like_count', {int_or_none}),
+            'comment_count': ('comment_count', {int_or_none}),
+        })

-        for i in post.get('included', []):
-            i_type = i.get('type')
-            if i_type == 'media':
-                media_attributes = i.get('attributes') or {}
-                download_url = media_attributes.get('download_url')
+        entries = []
+        idx = 0
+        for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
+            include_type = include['type']
+            if include_type == 'media':
+                media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
+                download_url = url_or_none(media_attributes.get('download_url'))
                ext = mimetype2ext(media_attributes.get('mimetype'))

                # if size_bytes is None, this media file is likely unavailable
                # See: https://github.com/yt-dlp/yt-dlp/issues/4608
                size_bytes = int_or_none(media_attributes.get('size_bytes'))
                if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
-                    # XXX: what happens if there are multiple attachments?
-                    return {
-                        **info,
+                    idx += 1
+                    entries.append({
+                        'id': f'{video_id}-{idx}',
                        'ext': ext,
                        'filesize': size_bytes,
                        'url': download_url,
-                    }
-            elif i_type == 'user':
-                user_attributes = i.get('attributes')
-                if user_attributes:
-                    info.update({
-                        'uploader': user_attributes.get('full_name'),
-                        'uploader_id': str_or_none(i.get('id')),
-                        'uploader_url': user_attributes.get('url'),
                    })

-            elif i_type == 'post_tag':
-                info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
+            elif include_type == 'user':
+                info.update(traverse_obj(include, {
+                    'uploader': ('attributes', 'full_name', {str}),
+                    'uploader_id': ('id', {str_or_none}),
+                    'uploader_url': ('attributes', 'url', {url_or_none}),
+                }))

-            elif i_type == 'campaign':
-                info.update({
-                    'channel': traverse_obj(i, ('attributes', 'title')),
-                    'channel_id': str_or_none(i.get('id')),
-                    'channel_url': traverse_obj(i, ('attributes', 'url')),
-                    'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
-                })
+            elif include_type == 'post_tag':
+                if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
+                    info.setdefault('tags', []).append(post_tag)
+
+            elif include_type == 'campaign':
+                info.update(traverse_obj(include, {
+                    'channel': ('attributes', 'title', {str}),
+                    'channel_id': ('id', {str_or_none}),
+                    'channel_url': ('attributes', 'url', {url_or_none}),
+                    'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
+                }))

        # handle Vimeo embeds
-        if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
-            embed_html = try_get(attributes, lambda x: x['embed']['html'])
-            v_url = url_or_none(compat_urllib_parse_unquote(
-                self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
-            if v_url:
-                v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
-                if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
-                    return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
+        if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
+            v_url = urllib.parse.unquote(self._html_search_regex(
+                r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
+                traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
+            if url_or_none(v_url) and self._request_webpage(
+                    v_url, video_id, 'Checking Vimeo embed URL',
+                    headers={'Referer': 'https://patreon.com/'},
+                    fatal=False, errnote=False):
+                entries.append(self.url_result(
+                    VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+                    VimeoIE, url_transparent=True))

-        embed_url = try_get(attributes, lambda x: x['embed']['url'])
+        embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
        if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
-            return self.url_result(embed_url, **info)
+            entries.append(self.url_result(embed_url))

-        post_file = traverse_obj(attributes, 'post_file')
+        post_file = traverse_obj(attributes, ('post_file', {dict}))
        if post_file:
            name = post_file.get('name')
            ext = determine_ext(name)
            if ext in KNOWN_EXTENSIONS:
-                return {
-                    **info,
+                entries.append({
+                    'id': video_id,
                    'ext': ext,
                    'url': post_file['url'],
-                }
+                })
            elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
                formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
-                return {
-                    **info,
+                entries.append({
+                    'id': video_id,
                    'formats': formats,
                    'subtitles': subtitles,
-                }
+                })

-        if can_view_post is False:
+        can_view_post = traverse_obj(attributes, 'current_user_can_view')
+        comments = None
+        if can_view_post and info.get('comment_count'):
+            comments = self.extract_comments(video_id)
+
+        if not entries and can_view_post is False:
            self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
-        else:
+        elif not entries:
            self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
+        elif len(entries) == 1:
+            info.update(entries[0])
+        else:
+            for entry in entries:
+                entry.update(info)
+            return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
+
+        info['id'] = video_id
+        info['__post_extractor'] = comments
        return info

    def _get_comments(self, post_id):
--- a/yt_dlp/extractor/porn91.py
+++ b/yt_dlp/extractor/porn91.py
@ -1,95 +0,0 @@
-import urllib.parse
-from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-    int_or_none,
-    parse_duration,
-    remove_end,
-    unified_strdate,
-    ExtractorError,
-)
-
-
-class Porn91IE(InfoExtractor):
-    IE_NAME = '91porn'
-    _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
-
-    _TESTS = [{
-        'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
-        'md5': 'd869db281402e0ef4ddef3c38b866f86',
-        'info_dict': {
-            'id': '7e42283b4f5ab36da134',
-            'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
-            'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
-            'ext': 'mp4',
-            'duration': 431,
-            'upload_date': '20150520',
-            'comment_count': int,
-            'view_count': int,
-            'age_limit': 18,
-        }
-    }, {
-        'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
-        'md5': 'f8fd50540468a6d795378cd778b40226',
-        'info_dict': {
-            'id': '7ef0cf3d362c699ab91c',
-            'title': '真实空乘，冲上云霄第二部',
-            'description': 'md5:618bf9652cafcc66cd277bd96789baea',
-            'ext': 'mp4',
-            'duration': 248,
-            'upload_date': '20221119',
-            'comment_count': int,
-            'view_count': int,
-            'age_limit': 18,
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        self._set_cookie('91porn.com', 'language', 'cn_CN')
-
-        webpage = self._download_webpage(
-            'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
-
-        if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
-            raise ExtractorError('91 Porn says: Video does not exist', expected=True)
-
-        daily_limit = self._search_regex(
-            r'作为游客，你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
-        if daily_limit:
-            raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
-
-        video_link_url = self._search_regex(
-            r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
-        video_link_url = self._search_regex(
-            r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
-
-        formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
-
-        return {
-            'id': video_id,
-            'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
-            'formats': formats,
-            'subtitles': subtitles,
-            'upload_date': unified_strdate(self._search_regex(
-                r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
-            'description': self._html_search_regex(
-                r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
-            'duration': parse_duration(self._search_regex(
-                r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
-            'comment_count': int_or_none(self._search_regex(
-                r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
-            'view_count': int_or_none(self._search_regex(
-                r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
-            'age_limit': 18,
-        }
-
-    def _get_formats_and_subtitle(self, video_link_url, video_id):
-        ext = determine_ext(video_link_url)
-        if ext == 'm3u8':
-            formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
-        else:
-            formats = [{'url': video_link_url, 'ext': ext}]
-            subtitles = {}
-
-        return formats, subtitles
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@ -97,7 +97,7 @@ class PornHubBaseIE(InfoExtractor):
        login_form = self._hidden_inputs(login_page)

        login_form.update({
-            'username': username,
+            'email': username,
            'password': password,
        })

--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -361,7 +361,7 @@ class SoundcloudBaseIE(InfoExtractor):
            'like_count': extract_count('favoritings') or extract_count('likes'),
            'comment_count': extract_count('comment'),
            'repost_count': extract_count('reposts'),
-            'genre': info.get('genre'),
+            'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
            'formats': formats if not extract_flat else None
        }

@ -395,10 +395,10 @@ class SoundcloudIE(SoundcloudBaseIE):
    _TESTS = [
        {
            'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-            'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+            'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
            'info_dict': {
                'id': '62986583',
-                'ext': 'mp3',
+                'ext': 'opus',
                'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
                'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
                'uploader': 'E.T. ExTerrestrial Music',
@ -411,6 +411,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
+                'uploader_url': 'https://soundcloud.com/ethmusic',
+                'genres': [],
            }
        },
        # geo-restricted
@ -418,7 +421,7 @@ class SoundcloudIE(SoundcloudBaseIE):
            'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
            'info_dict': {
                'id': '47127627',
-                'ext': 'mp3',
+                'ext': 'opus',
                'title': 'Goldrushed',
                'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
                'uploader': 'The Royal Concept',
@ -431,6 +434,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/the-concept-band',
+                'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
+                'genres': ['Alternative'],
            },
        },
        # private link
@ -452,6 +458,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/jaimemf',
+                'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+                'genres': ['youtubedl'],
            },
        },
        # private link (alt format)
@ -473,6 +482,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/jaimemf',
+                'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+                'genres': ['youtubedl'],
            },
        },
        # downloadable song
@ -482,6 +494,21 @@ class SoundcloudIE(SoundcloudBaseIE):
            'info_dict': {
                'id': '343609555',
                'ext': 'wav',
+                'title': 'The Following',
+                'description': '',
+                'uploader': '80M',
+                'uploader_id': '312384765',
+                'uploader_url': 'https://soundcloud.com/the80m',
+                'upload_date': '20170922',
+                'timestamp': 1506120436,
+                'duration': 397.228,
+                'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
+                'license': 'all-rights-reserved',
+                'like_count': int,
+                'comment_count': int,
+                'repost_count': int,
+                'view_count': int,
+                'genres': ['Dance & EDM'],
            },
        },
        # private link, downloadable format
@ -503,6 +530,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
+                'uploader_url': 'https://soundcloud.com/oriuplift',
+                'genres': ['Trance'],
            },
        },
        # no album art, use avatar pic for thumbnail
@ -525,6 +555,8 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/garyvee',
+                'genres': [],
            },
            'params': {
                'skip_download': True,
@ -532,13 +564,13 @@ class SoundcloudIE(SoundcloudBaseIE):
        },
        {
            'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
-            'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+            'md5': '8227c3473a4264df6b02ad7e5b7527ac',
            'info_dict': {
                'id': '583011102',
-                'ext': 'mp3',
+                'ext': 'opus',
                'title': 'Mezzo Valzer',
-                'description': 'md5:4138d582f81866a530317bae316e8b61',
-                'uploader': 'Micronie',
+                'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
+                'uploader': 'Giovanni Sarani',
                'uploader_id': '3352531',
                'timestamp': 1551394171,
                'upload_date': '20190228',
@ -549,6 +581,8 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'genres': ['Piano'],
+                'uploader_url': 'https://soundcloud.com/giovannisarani',
            },
        },
        {
--- a/yt_dlp/extractor/stacommu.py
+++ b/yt_dlp/extractor/stacommu.py
@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):


 class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
    IE_NAME = 'theatercomplextown:vod'
    _TESTS = [{
        'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
    }, {
        'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
        'only_matching': True,
+    }, {
+        'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
+        'only_matching': True,
    }]

    _API_PATH = 'videoEpisodes'
@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):


 class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
    IE_NAME = 'theatercomplextown:ppv'
    _TESTS = [{
        'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
    }, {
        'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
        'only_matching': True,
+    }, {
+        'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
+        'only_matching': True,
    }]

    _API_PATH = 'events'
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
        ptype, video_id = self._match_valid_url(url).groups()

        webpage = self._download_webpage(url, video_id, fatal=False) or ''
-        props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
+        props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
        player_api_cache = try_get(
            props, lambda x: x['initialReduxState']['playerApiCache']) or {}

--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@ -45,19 +45,18 @@ class TikTokBaseIE(InfoExtractor):
        # "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
        'aid': '0',
    }
-    _KNOWN_APP_INFO = [
-        '7351144126450059040',
-        '7351149742343391009',
-        '7351153174894626592',
-    ]
    _APP_INFO_POOL = None
    _APP_INFO = None
    _APP_USER_AGENT = None

+    @property
+    def _KNOWN_APP_INFO(self):
+        return self._configuration_arg('app_info', ie_key=TikTokIE)
+
    @property
    def _API_HOSTNAME(self):
        return self._configuration_arg(
-            'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
+            'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]

    def _get_next_app_info(self):
        if self._APP_INFO_POOL is None:
@ -66,13 +65,10 @@ class TikTokBaseIE(InfoExtractor):
                for key, default in self._APP_INFO_DEFAULTS.items()
                if key != 'iid'
            }
-            app_info_list = (
-                self._configuration_arg('app_info', ie_key=TikTokIE)
-                or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
            self._APP_INFO_POOL = [
                {**defaults, **dict(
                    (k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
-                )} for app_info in app_info_list
+                )} for app_info in self._KNOWN_APP_INFO
            ]

        if not self._APP_INFO_POOL:
@ -757,11 +753,13 @@ class TikTokIE(TikTokBaseIE):

    def _real_extract(self, url):
        video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
-        try:
-            return self._extract_aweme_app(video_id)
-        except ExtractorError as e:
-            e.expected = True
-            self.report_warning(f'{e}; trying with webpage')
+
+        if self._KNOWN_APP_INFO:
+            try:
+                return self._extract_aweme_app(video_id)
+            except ExtractorError as e:
+                e.expected = True
+                self.report_warning(f'{e}; trying with webpage')

        url = self._create_url(user_id, video_id)
        webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
@ -776,7 +774,7 @@ class TikTokIE(TikTokBaseIE):
            status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
            video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))

-        elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
+        elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
            self.write_debug('Found next.js data')
            status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
            video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@ -2,85 +2,88 @@ import urllib.parse

 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
    determine_ext,
    extract_attributes,
+    get_element_by_class,
+    get_element_html_by_class,
    int_or_none,
-    parse_duration,
-    traverse_obj,
-    try_get,
    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class TV5MondePlusIE(InfoExtractor):
-    IE_DESC = 'TV5MONDE+'
-    _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+    IE_NAME = 'TV5MONDE'
+    _VALID_URL = r'https?://(?:www\.)?tv5monde\.com/tv/video/(?P<id>[^/?#]+)'
    _TESTS = [{
-        # movie
-        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
-        'md5': 'c86f60bf8b75436455b1b205f9745955',
+        # documentary
+        'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
+        'md5': 'd2a708902d3df230a357c99701aece05',
        'info_dict': {
-            'id': 'ZX0ipMyFQq_6D4BA7b',
-            'display_id': 'les-novices',
+            'id': '3FPa7JMu21_6D4BA7b',
+            'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
            'ext': 'mp4',
-            'title': 'Les novices',
-            'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
-            'upload_date': '20230821',
-            'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
-            'duration': 5177,
-            'episode': 'Les novices',
+            'title': "Baudouin, l'héritage d'un roi",
+            'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
+            'duration': 4842,
+            'upload_date': '20240130',
+            'timestamp': 1706641242,
+            'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
+            'description': 'md5:78125c74a5cac06d7743a2d09126edad',
+            'series': "Baudouin, l'héritage d'un roi",
        },
    }, {
        # series episode
-        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+        'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
+        'md5': 'f5e09637cadd55639c05874e22eb56bf',
        'info_dict': {
-            'id': 'wJ0eeEPozr_6D4BA7b',
-            'display_id': 'opj-les-dents-de-la-terre-2',
+            'id': 'obRRZ8m6g9_6D4BA7b',
+            'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
            'ext': 'mp4',
-            'title': "OPJ - Les dents de la Terre (2)",
-            'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
-            'upload_date': '20230823',
-            'series': 'OPJ',
-            'episode': 'Les dents de la Terre (2)',
-            'duration': 2877,
-            'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+            'title': 'Toute la vie',
+            'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
+            'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
+            'duration': 2526,
+            'upload_date': '20230721',
+            'timestamp': 1689971646,
+            'series': 'Toute la vie',
+            'episode': 'Mardi 23 mars 2021',
        },
    }, {
        # movie
-        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
-        'md5': '32fa0cde16a4480d1251502a66856d5f',
+        'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
+        'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
        'info_dict': {
-            'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
-            'display_id': 'ceux-qui-travaillent',
+            'id': 'DOcfvdLKXL_6D4BA7b',
+            'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
            'ext': 'mp4',
-            'title': 'Ceux qui travaillent',
-            'description': 'md5:570e8bb688036ace873b2d50d24c026d',
-            'upload_date': '20210819',
+            'title': 'Ce fleuve qui nous charrie',
+            'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
+            'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
+            'duration': 5300,
+            'upload_date': '20210822',
+            'timestamp': 1629594105,
+            'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
+            'series': 'Ce fleuve qui nous charrie',
        },
-        'skip': 'no longer available',
    }, {
-        # series episode
-        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
+        # news
+        'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
+        'md5': 'c62977d6d10754a2ecebba70ad370479',
        'info_dict': {
-            'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
-            'display_id': 'vestiaires-caro-actrice',
+            'id': 'LgQFrOCNsc_6D4BA7b',
+            'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
            'ext': 'mp4',
-            'title': "Vestiaires - Caro actrice",
-            'description': 'md5:db15d2e1976641e08377f942778058ea',
-            'upload_date': '20210819',
-            'series': "Vestiaires",
-            'episode': 'Caro actrice',
+            'title': 'TV5MONDE, le journal',
+            'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
+            'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
+            'duration': 854,
+            'upload_date': '20240508',
+            'timestamp': 1715159640,
+            'series': 'TV5MONDE, le journal',
+            'episode': 'EDITION DU 08/05/24 - 11H',
        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'no longer available',
-    }, {
-        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
-        'only_matching': True,
-    }, {
-        'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
-        'only_matching': True,
    }]
    _GEO_BYPASS = False

@ -98,7 +101,6 @@ class TV5MondePlusIE(InfoExtractor):
        if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
            self.raise_geo_restricted(countries=['FR'])

-        title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
        vpl_data = extract_attributes(self._search_regex(
            r'(<[^>]+class="video_player_loader"[^>]+>)',
            webpage, 'video player loader'))
@ -147,26 +149,7 @@ class TV5MondePlusIE(InfoExtractor):
        process_video_files(video_files)

        metadata = self._parse_json(
-            vpl_data['data-metadata'], display_id)
-        duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
-                    or parse_duration(self._html_search_meta('duration', webpage)))
-
-        description = self._html_search_regex(
-            r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
-            'description', fatal=False)
-
-        series = self._html_search_regex(
-            r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
-            'series', default=None)
-
-        if series and series != title:
-            title = '%s - %s' % (series, title)
-
-        upload_date = self._search_regex(
-            r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
-            webpage, 'upload date', default=None)
-        if upload_date:
-            upload_date = upload_date.replace('_', '')
+            vpl_data.get('data-metadata') or '{}', display_id, fatal=False)

        if not video_id:
            video_id = self._search_regex(
@ -175,16 +158,20 @@ class TV5MondePlusIE(InfoExtractor):
                default=display_id)

        return {
+            **traverse_obj(metadata, ('content', {
+                'id': ('id', {str}),
+                'title': ('title', {str}),
+                'episode': ('title', {str}),
+                'series': ('series', {str}),
+                'timestamp': ('publishDate_ts', {int_or_none}),
+                'duration': ('duration', {int_or_none}),
+            })),
            'id': video_id,
            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': vpl_data.get('data-image'),
-            'duration': duration,
-            'upload_date': upload_date,
+            'title': clean_html(get_element_by_class('main-title', webpage)),
+            'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
+            'thumbnail': url_or_none(vpl_data.get('data-image')),
            'formats': formats,
            'subtitles': self._extract_subtitles(self._parse_json(
                traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
-            'series': series,
-            'episode': episode,
        }
--- a/yt_dlp/extractor/tva.py
+++ b/yt_dlp/extractor/tva.py
@ -1,10 +1,9 @@
+import functools
+import re
+
 from .common import InfoExtractor
-from ..utils import (
-    float_or_none,
-    int_or_none,
-    smuggle_url,
-    strip_or_none,
-)
+from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
+from ..utils.traversal import traverse_obj


 class TVAIE(InfoExtractor):
@ -49,11 +48,20 @@ class QubIE(InfoExtractor):
        'info_dict': {
            'id': '6084352463001',
            'ext': 'mp4',
-            'title': 'Épisode 01',
+            'title': 'Ép 01. Mon dernier jour',
            'uploader_id': '5481942443001',
            'upload_date': '20190907',
            'timestamp': 1567899756,
            'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+            'thumbnail': r're:https://.+\.jpg',
+            'episode': 'Ép 01. Mon dernier jour',
+            'episode_number': 1,
+            'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
+            'duration': 2625.963,
+            'season': 'Season 1',
+            'season_number': 1,
+            'series': 'Alerte Amber',
+            'channel': 'TVA',
        },
    }, {
        'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
@ -64,22 +72,24 @@ class QubIE(InfoExtractor):

    def _real_extract(self, url):
        entity_id = self._match_id(url)
-        entity = self._download_json(
-            'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
-            entity_id, query={'id': entity_id})
+        webpage = self._download_webpage(url, entity_id)
+        entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
        video_id = entity['videoId']
        episode = strip_or_none(entity.get('name'))

        return {
            '_type': 'url_transparent',
+            'url': f'https://videos.tva.ca/details/_{video_id}',
+            'ie_key': TVAIE.ie_key(),
            'id': video_id,
            'title': episode,
-            # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
-            'url': 'https://videos.tva.ca/details/_' + video_id,
-            'description': entity.get('longDescription'),
-            'duration': float_or_none(entity.get('durationMillis'), 1000),
            'episode': episode,
-            'episode_number': int_or_none(entity.get('episodeNumber')),
-            # 'ie_key': 'BrightcoveNew',
-            'ie_key': TVAIE.ie_key(),
+            **traverse_obj(entity, {
+                'description': ('longDescription', {str}),
+                'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
+                'channel': ('knownEntities', 'channel', 'name', {str}),
+                'series': ('knownEntities', 'videoShow', 'name', {str}),
+                'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
+                'episode_number': ('episodeNumber', {int_or_none}),
+            }),
        }
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -36,7 +36,7 @@ class TwitterBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'twitter'
    _API_BASE = 'https://api.twitter.com/1.1/'
    _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
-    _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+    _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
    _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
    _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
    _flow_token = None
@ -1191,6 +1191,31 @@ class TwitterIE(TwitterBaseIE):
            'age_limit': 0,
            '_old_archive_ids': ['twitter 1724884212803834154'],
        },
+    }, {
+        # x.com
+        'url': 'https://x.com/historyinmemes/status/1790637656616943991',
+        'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
+        'info_dict': {
+            'id': '1790637589910654976',
+            'ext': 'mp4',
+            'title': 'Historic Vids - One of the most intense moments in history',
+            'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
+            'display_id': '1790637656616943991',
+            'uploader': 'Historic Vids',
+            'uploader_id': 'historyinmemes',
+            'uploader_url': 'https://twitter.com/historyinmemes',
+            'channel_id': '855481986290524160',
+            'upload_date': '20240515',
+            'timestamp': 1715756260.0,
+            'duration': 15.488,
+            'tags': [],
+            'comment_count': int,
+            'repost_count': int,
+            'like_count': int,
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1790637656616943991'],
+        }
    }, {
        # onion route
        'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
        r'filemoon\.sx',
        r'hentai\.animestigma\.com',
        r'thisav\.com',
+        r'gounlimited\.to',
+        r'highstream\.tv',
+        r'uqload\.com',
+        r'vedbam\.xyz',
+        r'vadbam\.net'
+        r'vidlo\.us',
+        r'wolfstream\.tv',
+        r'xvideosharing\.com',
+        r'(?:\w+\.)?viidshar\.com',
+        r'sxyprn\.com',
+        r'jable\.tv',
+        r'91porn\.com',
+        r'einthusan\.(?:tv|com|ca)',
+        r'yourupload\.com',
    )

    _TESTS = [{
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@ -451,6 +451,7 @@ class VKIE(VKBaseIE):
            info_page, 'view count', default=None))

        formats = []
+        subtitles = {}
        for format_id, format_url in data.items():
            format_url = url_or_none(format_url)
            if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@ -462,12 +463,21 @@ class VKIE(VKBaseIE):
                formats.append({
                    'format_id': format_id,
                    'url': format_url,
+                    'ext': 'mp4',
+                    'source_preference': 1,
                    'height': height,
                })
            elif format_id == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    format_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id, fatal=False, live=is_live))
+                    m3u8_id=format_id, fatal=False, live=is_live)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            elif format_id.startswith('dash_'):
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    format_url, video_id, mpd_id=format_id, fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
            elif format_id == 'rtmp':
                formats.append({
                    'format_id': format_id,
@ -475,7 +485,6 @@ class VKIE(VKBaseIE):
                    'ext': 'flv',
                })

-        subtitles = {}
        for sub in data.get('subs') or {}:
            subtitles.setdefault(sub.get('lang', 'en'), []).append({
                'ext': sub.get('title', '.srt').split('.')[-1],
@ -496,6 +505,7 @@ class VKIE(VKBaseIE):
            'comment_count': int_or_none(mv_data.get('commcount')),
            'is_live': is_live,
            'subtitles': subtitles,
+            '_format_sort_fields': ('res', 'source'),
        }


--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@ -12,6 +12,7 @@ from ..utils import (
    jwt_decode_hs256,
    traverse_obj,
    try_call,
+    url_basename,
    url_or_none,
    urlencode_postdata,
    variadic,
@ -147,7 +148,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
        metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
        if not metadata:
            webpage = self._download_webpage(url, video_id)
-            nextjs_data = self._search_nextjs_data(webpage, video_id)
+            nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
            metadata = traverse_obj(nextjs_data, (
                'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
        return metadata
@ -194,8 +195,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):

        return {
            'id': video_id,
-            'formats': self._get_formats(video_data, (
-                (('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
+            'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
            **traverse_obj(metadata, {
                'title': ('displayName', {str}),
                'description': ('description', {str}),
@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
        'params': {
            'skip_download': 'm3u8',
        },
+    }, {
+        'note': 'manifest provides live-a (partial) and live-b (full) streams',
+        'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
+        'only_matching': True,
    }]

    _API_PATH = 'events'
@ -285,12 +289,16 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):

        video_data, decrypt = self._call_encrypted_api(
            video_id, ':watchArchive', 'watch archive', data={'method': 1})
-        info['formats'] = self._get_formats(video_data, (
-            ('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
+        # 'chromecastUrls' can be only partial videos, avoid
+        info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
        for f in info['formats']:
            # bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
            if f.get('tbr'):
                f['tbr'] = int(f['tbr'] / 2.5)
+            # prefer variants with the same basename as the master playlist to avoid partial streams
+            f['format_id'] = url_basename(f['url']).partition('.')[0]
+            if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
+                f['preference'] = -10

        hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
        if hls_aes_key:
--- a/yt_dlp/extractor/xfileshare.py
+++ b/yt_dlp/extractor/xfileshare.py
@ -1,198 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    decode_packed_codes,
-    determine_ext,
-    int_or_none,
-    js_to_json,
-    urlencode_postdata,
-)
-
-
-# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
-def aa_decode(aa_code):
-    symbol_table = [
-        ('7', '((ﾟｰﾟ) + (o^_^o))'),
-        ('6', '((o^_^o) +(o^_^o))'),
-        ('5', '((ﾟｰﾟ) + (ﾟΘﾟ))'),
-        ('2', '((o^_^o) - (ﾟΘﾟ))'),
-        ('4', '(ﾟｰﾟ)'),
-        ('3', '(o^_^o)'),
-        ('1', '(ﾟΘﾟ)'),
-        ('0', '(c^_^o)'),
-    ]
-    delim = '(ﾟДﾟ)[ﾟεﾟ]+'
-    ret = ''
-    for aa_char in aa_code.split(delim):
-        for val, pat in symbol_table:
-            aa_char = aa_char.replace(pat, val)
-        aa_char = aa_char.replace('+ ', '')
-        m = re.match(r'^\d+', aa_char)
-        if m:
-            ret += chr(int(m.group(0), 8))
-        else:
-            m = re.match(r'^u([\da-f]+)', aa_char)
-            if m:
-                ret += chr(int(m.group(1), 16))
-    return ret
-
-
-class XFileShareIE(InfoExtractor):
-    _SITES = (
-        (r'aparat\.cam', 'Aparat'),
-        (r'clipwatching\.com', 'ClipWatching'),
-        (r'gounlimited\.to', 'GoUnlimited'),
-        (r'govid\.me', 'GoVid'),
-        (r'holavid\.com', 'HolaVid'),
-        (r'streamty\.com', 'Streamty'),
-        (r'thevideobee\.to', 'TheVideoBee'),
-        (r'uqload\.com', 'Uqload'),
-        (r'vidbom\.com', 'VidBom'),
-        (r'vidlo\.us', 'vidlo'),
-        (r'vidlocker\.xyz', 'VidLocker'),
-        (r'vidshare\.tv', 'VidShare'),
-        (r'vup\.to', 'VUp'),
-        (r'wolfstream\.tv', 'WolfStream'),
-        (r'xvideosharing\.com', 'XVideoSharing'),
-    )
-
-    IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
-    _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
-                  % '|'.join(site for site in list(zip(*_SITES))[0]))
-    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
-
-    _FILE_NOT_FOUND_REGEXES = (
-        r'>(?:404 - )?File Not Found<',
-        r'>The file was removed by administrator<',
-    )
-
-    _TESTS = [{
-        'url': 'https://uqload.com/dltx1wztngdz',
-        'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
-        'info_dict': {
-            'id': 'dltx1wztngdz',
-            'ext': 'mp4',
-            'title': 'Rick Astley Never Gonna Give You mp4',
-            'thumbnail': r're:https://.*\.jpg'
-        }
-    }, {
-        'url': 'http://xvideosharing.com/fq65f94nd2ve',
-        'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
-        'info_dict': {
-            'id': 'fq65f94nd2ve',
-            'ext': 'mp4',
-            'title': 'sample',
-            'thumbnail': r're:http://.*\.jpg',
-        },
-    }, {
-        'url': 'https://aparat.cam/n4d6dh0wvlpr',
-        'only_matching': True,
-    }, {
-        'url': 'https://wolfstream.tv/nthme29v9u2x',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        host, video_id = self._match_valid_url(url).groups()
-
-        url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
-        webpage = self._download_webpage(url, video_id)
-
-        if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        fields = self._hidden_inputs(webpage)
-
-        if fields.get('op') == 'download1':
-            countdown = int_or_none(self._search_regex(
-                r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
-                webpage, 'countdown', default=None))
-            if countdown:
-                self._sleep(countdown, video_id)
-
-            webpage = self._download_webpage(
-                url, video_id, 'Downloading video page',
-                data=urlencode_postdata(fields), headers={
-                    'Referer': url,
-                    'Content-type': 'application/x-www-form-urlencoded',
-                })
-
-        title = (self._search_regex(
-            (r'style="z-index: [0-9]+;">([^<]+)</span>',
-             r'<td nowrap>([^<]+)</td>',
-             r'h4-fine[^>]*>([^<]+)<',
-             r'>Watch (.+)[ <]',
-             r'<h2 class="video-page-head">([^<]+)</h2>',
-             r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<',  # streamin.to
-             r'title\s*:\s*"([^"]+)"'),  # govid.me
-            webpage, 'title', default=None) or self._og_search_title(
-            webpage, default=None) or video_id).strip()
-
-        for regex, func in (
-                (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
-                (r'(ﾟ.+)', aa_decode)):
-            obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
-            if obf_code:
-                webpage = webpage.replace(obf_code, func(obf_code))
-
-        formats = []
-
-        jwplayer_data = self._search_regex(
-            [
-                r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
-                r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
-            ], webpage,
-            'jwplayer data', default=None)
-        if jwplayer_data:
-            jwplayer_data = self._parse_json(
-                jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
-            if jwplayer_data:
-                formats = self._parse_jwplayer_data(
-                    jwplayer_data, video_id, False,
-                    m3u8_id='hls', mpd_id='dash')['formats']
-
-        if not formats:
-            urls = []
-            for regex in (
-                    r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
-                    r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
-                    r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
-                    r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
-                for mobj in re.finditer(regex, webpage):
-                    video_url = mobj.group('url')
-                    if video_url not in urls:
-                        urls.append(video_url)
-
-            sources = self._search_regex(
-                r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
-            if sources:
-                urls.extend(self._parse_json(sources, video_id))
-
-            formats = []
-            for video_url in urls:
-                if determine_ext(video_url) == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        video_url, video_id, 'mp4',
-                        entry_protocol='m3u8_native', m3u8_id='hls',
-                        fatal=False))
-                else:
-                    formats.append({
-                        'url': video_url,
-                        'format_id': 'sd',
-                    })
-
-        thumbnail = self._search_regex(
-            [
-                r'<video[^>]+poster="([^"]+)"',
-                r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
-            ], webpage, 'thumbnail', default=None)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-            'http_headers': {'Referer': url}
-        }
--- a/yt_dlp/extractor/xvideos.py
+++ b/yt_dlp/extractor/xvideos.py
@ -173,8 +173,41 @@ class XVideosIE(InfoExtractor):

 class XVideosQuickiesIE(InfoExtractor):
    IE_NAME = 'xvideos:quickies'
-    _VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?P<domain>(?:[^/?#]+\.)?xvideos2?\.com)/(?:profiles/|amateur-channels/)?[^/?#]+#quickies/a/(?P<id>\w+)'
    _TESTS = [{
+        'url': 'https://www.xvideos.com/lili_love#quickies/a/ipdtikh1a4c',
+        'md5': 'f9e4f518ff1de14b99a400bbd0fc5ee0',
+        'info_dict': {
+            'id': 'ipdtikh1a4c',
+            'ext': 'mp4',
+            'title': 'Mexican chichóna putisima',
+            'age_limit': 18,
+            'duration': 81,
+            'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+        }
+    }, {
+        'url': 'https://www.xvideos.com/profiles/lili_love#quickies/a/ipphaob6fd1',
+        'md5': '5340938aac6b46e19ebdd1d84535862e',
+        'info_dict': {
+            'id': 'ipphaob6fd1',
+            'ext': 'mp4',
+            'title': 'Puta chichona mexicana squirting',
+            'age_limit': 18,
+            'duration': 56,
+            'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+        }
+    }, {
+        'url': 'https://www.xvideos.com/amateur-channels/lili_love#quickies/a/hfmffmd7661',
+        'md5': '92428518bbabcb4c513e55922e022491',
+        'info_dict': {
+            'id': 'hfmffmd7661',
+            'ext': 'mp4',
+            'title': 'Chichona mexican slut',
+            'age_limit': 18,
+            'duration': 9,
+            'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+        }
+    }, {
        'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
        'md5': '16e322a93282667f1963915568f782c1',
        'info_dict': {
@ -189,4 +222,4 @@ class XVideosQuickiesIE(InfoExtractor):

    def _real_extract(self, url):
        domain, id_ = self._match_valid_url(url).group('domain', 'id')
-        return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
+        return self.url_result(f'https://{domain}/video{"" if id_.isdecimal() else "."}{id_}/_', XVideosIE, id_)
--- a/yt_dlp/extractor/yandexvideo.py
+++ b/yt_dlp/extractor/yandexvideo.py
@ -259,15 +259,15 @@ class ZenYandexIE(InfoExtractor):
            webpage = self._download_webpage(redirect, video_id, note='Redirecting')
        data_json = self._search_json(
            r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
-        serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
-                                         webpage, 'server state').replace('State', 'Settings')
+        serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
        uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
                                      webpage, 'uploader', default='<a>')
        uploader_name = extract_attributes(uploader).get('aria-label')
-        video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
-        stream_urls = try_get(video_json, lambda x: x['video']['streams'])
+        item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
+        video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
+
        formats, subtitles = [], {}
-        for s_url in stream_urls:
+        for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
            ext = determine_ext(s_url)
            if ext == 'mpd':
                fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
            'id': '16290308',
            'age_limit': 18,
            'categories': [],
-            'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+            'description': str,  # TODO: detect/remove SEO spam description in ytdl backport
            'display_id': 'tinderspecial-trailer1',
            'duration': 298.0,
            'ext': 'mp4',
            'upload_date': '20201123',
            'uploader': 'Ersties',
            'tags': [],
-            'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
-            'timestamp': 1606089600,
+            'thumbnail': r're:https://.+\.jpg',
+            'timestamp': 1606147564,
            'title': 'Tinder In Real Life',
            'view_count': int,
        }
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):

    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
-        definitions = self._download_json(
-            f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+        self._set_cookie('.youporn.com', 'age_verified', '1')
+        webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
+        definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']

-        def get_format_data(data, f):
-            return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+        def get_format_data(data, stream_type):
+            info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+            if not info_url:
+                return []
+            return traverse_obj(
+                self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+                lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))

        formats = []
        # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ class YouPornIE(InfoExtractor):
            f['height'] = height
            formats.append(f)

-        webpage = self._download_webpage(
-            'http://www.youporn.com/watch/%s' % video_id, display_id,
-            headers={'Cookie': 'age_verified=1'})
-
        title = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
            webpage, 'title', default=None) or self._og_search_title(
--- a/yt_dlp/extractor/yourporn.py
+++ b/yt_dlp/extractor/yourporn.py
@ -1,65 +0,0 @@
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
-    parse_duration,
-    urljoin,
-)
-
-
-class YourPornIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
-    _TESTS = [{
-        'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
-        'md5': '6f8682b6464033d87acaa7a8ff0c092e',
-        'info_dict': {
-            'id': '57ffcb2e1179b',
-            'ext': 'mp4',
-            'title': 'md5:c9f43630bd968267672651ba905a7d35',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 165,
-            'age_limit': 18,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        parts = self._parse_json(
-            self._search_regex(
-                r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
-                group='data'),
-            video_id)[video_id].split('/')
-
-        num = 0
-        for c in parts[6] + parts[7]:
-            if c.isnumeric():
-                num += int(c)
-        parts[5] = compat_str(int(parts[5]) - num)
-        parts[1] += '8'
-        video_url = urljoin(url, '/'.join(parts))
-
-        title = (self._search_regex(
-            r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
-            default=None) or self._og_search_description(webpage)).strip()
-        thumbnail = self._og_search_thumbnail(webpage)
-        duration = parse_duration(self._search_regex(
-            r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
-            default=None))
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'age_limit': 18,
-            'ext': 'mp4',
-        }
--- a/yt_dlp/extractor/yourupload.py
+++ b/yt_dlp/extractor/yourupload.py
@ -1,43 +0,0 @@
-from .common import InfoExtractor
-from ..utils import urljoin
-
-
-class YourUploadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
-    _TESTS = [{
-        'url': 'http://yourupload.com/watch/14i14h',
-        'md5': '5e2c63385454c557f97c4c4131a393cd',
-        'info_dict': {
-            'id': '14i14h',
-            'ext': 'mp4',
-            'title': 'BigBuckBunny_320x180.mp4',
-            'thumbnail': r're:^https?://.*\.jpe?g',
-        }
-    }, {
-        'url': 'http://www.yourupload.com/embed/14i14h',
-        'only_matching': True,
-    }, {
-        'url': 'http://embed.yourupload.com/14i14h',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        embed_url = 'http://www.yourupload.com/embed/%s' % video_id
-
-        webpage = self._download_webpage(embed_url, video_id)
-
-        title = self._og_search_title(webpage)
-        video_url = urljoin(embed_url, self._og_search_video_url(webpage))
-        thumbnail = self._og_search_thumbnail(webpage, default=None)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'url': video_url,
-            'thumbnail': thumbnail,
-            'http_headers': {
-                'Referer': embed_url,
-            },
-        }
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -240,6 +240,16 @@ INNERTUBE_CLIENTS = {
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 85
    },
+    # This client has pre-merged video+audio 720p/1080p streams
+    'mediaconnect': {
+        'INNERTUBE_CONTEXT': {
+            'client': {
+                'clientName': 'MEDIA_CONNECT_FRONTEND',
+                'clientVersion': '0.1',
+            },
+        },
+        'INNERTUBE_CONTEXT_CLIENT_NAME': 95
+    },
 }


@ -1171,7 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
    )
-    _formats = {
+    _formats = {  # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
        '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
        '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
@ -2343,6 +2353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'format': '17',  # 3gp format available on android
                'extractor_args': {'youtube': {'player_client': ['android']}},
            },
+            'skip': 'android client broken',
        },
        {
            # Skip download of additional client configs (remix client config in this case)
@ -2720,7 +2731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'heatmap': 'count:100',
            },
            'params': {
-                'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
+                'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
            },
        },
    ]
@ -3307,7 +3318,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'value': ('intensityScoreNormalized', {float_or_none}),
            })) or None

-    def _extract_comment(self, comment_renderer, parent=None):
+    def _extract_comment(self, entities, parent=None):
+        comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
+        if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
+            return
+
+        toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
+        time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
+
+        return {
+            'id': comment_id,
+            'parent': parent or 'root',
+            **traverse_obj(comment_entity_payload, {
+                'text': ('properties', 'content', 'content', {str}),
+                'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
+                'author_id': ('author', 'channelId', {self.ucid_or_none}),
+                'author': ('author', 'displayName', {str}),
+                'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
+                'author_is_uploader': ('author', 'isCreator', {bool}),
+                'author_is_verified': ('author', 'isVerified', {bool}),
+                'author_url': ('author', 'channelCommand', 'innertubeCommand', (
+                    ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')
+                ), {lambda x: urljoin('https://www.youtube.com', x)}),
+            }, get_all=False),
+            'is_favorited': (None if toolbar_entity_payload is None else
+                             toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
+            '_time_text': time_text,  # FIXME: non-standard, but we need a way of showing that it is an estimate.
+            'timestamp': self._parse_time_text(time_text),
+        }
+
+    def _extract_comment_old(self, comment_renderer, parent=None):
        comment_id = comment_renderer.get('commentId')
        if not comment_id:
            return
@ -3388,21 +3428,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                break
            return _continuation

-        def extract_thread(contents):
+        def extract_thread(contents, entity_payloads):
            if not parent:
                tracker['current_page_thread'] = 0
            for content in contents:
                if not parent and tracker['total_parent_comments'] >= max_parents:
                    yield
                comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
-                comment_renderer = get_first(
-                    (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
-                    expected_type=dict, default={})

-                comment = self._extract_comment(comment_renderer, parent)
+                # old comment format
+                if not entity_payloads:
+                    comment_renderer = get_first(
+                        (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
+                        expected_type=dict, default={})
+
+                    comment = self._extract_comment_old(comment_renderer, parent)
+
+                # new comment format
+                else:
+                    view_model = (
+                        traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
+                        or traverse_obj(content, ('commentViewModel', {dict})))
+                    comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
+                    if not comment_keys:
+                        continue
+                    entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
+                    comment = self._extract_comment(entities, parent)
+                    if comment:
+                        comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
+
                if not comment:
                    continue
                comment_id = comment['id']
+
                if comment.get('is_pinned'):
                    tracker['pinned_comment_ids'].add(comment_id)
                # Sometimes YouTube may break and give us infinite looping comments.
@ -3495,7 +3553,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            check_get_keys = None
            if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
                check_get_keys = [[*continuation_items_path, ..., (
-                    'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
+                    'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
            try:
                response = self._extract_response(
                    item_id=None, query=continuation,
@ -3519,6 +3577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                raise
            is_forced_continuation = False
            continuation = None
+            mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
            for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
                if is_first_continuation:
                    continuation = extract_header(continuation_items)
@ -3527,7 +3586,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        break
                    continue

-                for entry in extract_thread(continuation_items):
+                for entry in extract_thread(continuation_items, mutations):
                    if not entry:
                        return
                    yield entry
@ -3604,8 +3663,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        yt_query = {
            'videoId': video_id,
        }
-        if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
-            yt_query['params'] = 'CgIIAQ=='

        pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
        if pp_arg:
@ -3621,19 +3678,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

    def _get_requested_clients(self, url, smuggled_data):
        requested_clients = []
-        default = ['ios', 'android', 'web']
+        android_clients = []
+        default = ['ios', 'web']
        allowed_clients = sorted(
            (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
            key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
        for client in self._configuration_arg('player_client'):
-            if client in allowed_clients:
-                requested_clients.append(client)
-            elif client == 'default':
+            if client == 'default':
                requested_clients.extend(default)
            elif client == 'all':
                requested_clients.extend(allowed_clients)
-            else:
+            elif client not in allowed_clients:
                self.report_warning(f'Skipping unsupported client {client}')
+            elif client.startswith('android'):
+                android_clients.append(client)
+            else:
+                requested_clients.append(client)
+        # Force deprioritization of broken Android clients for format de-duplication
+        requested_clients.extend(android_clients)
        if not requested_clients:
            requested_clients = default

@ -3852,6 +3914,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

            client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+            # Android client formats are broken due to integrity check enforcement
+            # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
+            is_broken = client_name and client_name.startswith(short_client_name('android'))
+            if is_broken:
+                self.report_warning(
+                    f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
+                    'They will be deprioritized', only_once=True)
+
            name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
            fps = int_or_none(fmt.get('fps')) or 0
            dct = {
@ -3864,7 +3934,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    name, fmt.get('isDrc') and 'DRC',
                    try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                    try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
-                    throttled and 'THROTTLED', is_damaged and 'DAMAGED',
+                    throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
                    (self.get_param('verbose') or all_formats) and client_name,
                    delim=', '),
                # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
@ -3882,8 +3952,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'language': join_nonempty(audio_track.get('id', '').split('.')[0],
                                          'desc' if language_preference < -1 else '') or None,
                'language_preference': language_preference,
-                # Strictly de-prioritize damaged and 3gp formats
-                'preference': -10 if is_damaged else -2 if itag == '17' else None,
+                # Strictly de-prioritize broken, damaged and 3gp formats
+                'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
            }
            mime_mobj = re.match(
                r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
--- a/yt_dlp/networking/_curlcffi.py
+++ b/yt_dlp/networking/_curlcffi.py
@ -21,7 +21,7 @@ from .exceptions import (
    TransportError,
 )
 from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
-from ..dependencies import curl_cffi
+from ..dependencies import curl_cffi, certifi
 from ..utils import int_or_none

 if curl_cffi is None:
@ -132,6 +132,16 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
        extensions.pop('cookiejar', None)
        extensions.pop('timeout', None)

+    def send(self, request: Request) -> Response:
+        target = self._get_request_target(request)
+        try:
+            response = super().send(request)
+        except HTTPError as e:
+            e.response.extensions['impersonate'] = target
+            raise
+        response.extensions['impersonate'] = target
+        return response
+
    def _send(self, request: Request):
        max_redirects_exceeded = False
        session: curl_cffi.requests.Session = self._get_instance(
@ -156,6 +166,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
                # See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html
                session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)

+            # curl_cffi does not currently set these for proxies
+            session.curl.setopt(CurlOpt.PROXY_CAINFO, certifi.where())
+
+            if not self.verify:
+                session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYPEER, 0)
+                session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYHOST, 0)
+
        headers = self._get_impersonate_headers(request)

        if self._client_cert:
@ -203,7 +220,10 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
                max_redirects_exceeded = True
                curl_response = e.response

-            elif e.code == CurlECode.PROXY:
+            elif (
+                e.code == CurlECode.PROXY
+                or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
+            ):
                raise ProxyError(cause=e) from e
            else:
                raise TransportError(cause=e) from e
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@ -497,6 +497,7 @@ class Response(io.IOBase):
    @param headers: response headers.
    @param status: Response HTTP status code. Default is 200 OK.
    @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+    @param extensions: Dictionary of handler-specific response extensions.
    """

    def __init__(
@ -505,7 +506,9 @@ class Response(io.IOBase):
            url: str,
            headers: Mapping[str, str],
            status: int = 200,
-            reason: str = None):
+            reason: str = None,
+            extensions: dict = None
+    ):

        self.fp = fp
        self.headers = Message()
@ -517,6 +520,7 @@ class Response(io.IOBase):
            self.reason = reason or HTTPStatus(status).phrase
        except ValueError:
            self.reason = None
+        self.extensions = extensions or {}

    def readable(self):
        return self.fp.readable()
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
            # Ref: https://en.wikipedia.org/wiki/Uname#Examples
            if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
                machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
+            # sys.executable returns a /tmp/ path for staticx builds (linux_static)
+            # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
+            if static_exe_path := os.getenv('STATICX_PROG_PATH'):
+                path = static_exe_path
        return f'{remove_end(sys.platform, "32")}{machine}_exe', path

    path = os.path.dirname(__file__)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
    return encoding if encoding is not None else 'utf-8'


-_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
 _CMD_QUOTE_TRANS = str.maketrans({
    # Keep quotes balanced by replacing them with `""` instead of `\\"`
    '"': '""',
-    # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+    # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
    # `=` should be unique since variables containing `=` cannot be set using cmd
    '\n': '%=%',
-    # While we are only required to escape backslashes immediately before quotes,
-    # we instead escape all of 'em anyways to be consistent
-    '\\': '\\\\',
+    '\r': '%=%',
    # Use zero length variable replacement so `%` doesn't get expanded
    # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
    '%': '%%cd:~,%',
@ -1656,19 +1654,14 @@ _CMD_QUOTE_TRANS = str.maketrans({

 def shell_quote(args, *, shell=False):
    args = list(variadic(args))
-    if any(isinstance(item, bytes) for item in args):
-        deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
-        encoding = get_filesystem_encoding()
-        for index, item in enumerate(args):
-            if isinstance(item, bytes):
-                args[index] = item.decode(encoding)

    if compat_os_name != 'nt':
        return shlex.join(args)

    trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
    return ' '.join(
-        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+        else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
        for s in args)
Author	SHA1	Message	Date
coletdjnz	12d8ea8246	[ie/youtube] Remove `android` from default clients (#9553 ) Closes #9554 Authored by: coletdjnz, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-05-17 16:03:02 +00:00
Justin Keogh	8e15177b41	[ie/youtube] Fix comments extraction (#9775 ) Closes #9358 Authored by: jakeogh, minamotorin, shoxie007, bbilly1 Co-authored-by: minamotorin <76122224+minamotorin@users.noreply.github.com> Co-authored-by: shoxie007 <74592022+shoxie007@users.noreply.github.com> Co-authored-by: Simon <35427372+bbilly1@users.noreply.github.com>	2024-05-17 14:37:30 +00:00
Roeniss Moon	dd9ad97b1f	[cookies] Add `--cookies-from-browser` support for Whale (#9649 ) Closes #9307 Authored by: roeniss	2024-05-17 14:33:12 +00:00
minamotorin	61b17437dc	[ie] Add POST data hash to `--write-pages` filenames (#9879 ) Closes #9773 Authored by: minamotorin	2024-05-17 14:28:36 +00:00
kylegustavo	7975ddf245	[ie/bbc] Fix and extend extraction (#9705 ) Closes #9701 Authored by: kylegustavo, dirkf, pukkandan	2024-05-17 06:20:13 +00:00
Podiumnoche	6d8a53d870	[ie/cda] Fix age-gated web extraction (#9939 ) Closes #5980, Closes #6638 Authored by: Podiumnoche, Szpachlarz, dirkf, emqi	2024-05-16 22:41:34 +00:00
bashonly	4813173e45	[ie/twitter] Support x.com URLs (#9926 ) Closes #9923 Authored by: bashonly	2024-05-16 22:36:56 +00:00
bashonly	41ba4a808b	[ie/tiktok] Extract via mobile API only if `app_info` is passed (#9938 ) Partially addresses #9506 Authored by: bashonly	2024-05-16 22:27:09 +00:00
Mozi	351dc0bc33	[ie/eplus] Handle URLs without videos (#9855 ) Authored by: pzhlkj6612	2024-05-13 23:21:11 +00:00
feederbox826	518c1afc15	[ie/pornhub] Fix login by email address (#9914 ) Closes #9717 Authored by: feederbox826	2024-05-13 23:18:14 +00:00
WyohKnott	85ec2a337a	[ie/googledrive] Fix formats extraction (#9908 ) Closes #8281 Authored by: WyohKnott	2024-05-12 23:05:47 +00:00
Jake Finley	b207d26f83	[ie/xvideos:quickies] Fix extractor (#9834 ) Closes #6356 Authored by: JakeFinley96	2024-05-12 20:42:33 +00:00
sepro	01395a3434	[cleanup] Remove questionable extractors (#9911 ) Closes #6279, Closes #6799 Authored by: seproDev	2024-05-12 22:12:11 +02:00
Haxy	cf212d0a33	[ie/youtube] Add `mediaconnect` client (#9546 ) Authored by: clienthax	2024-05-12 16:03:36 +00:00
alard	6db96268c5	[ie/TV5Monde] Fix extractor (#9143 ) Closes #9118 Authored by: alard, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2024-05-11 23:58:15 +02:00
Eric Lam	800a43983e	[ie/EuroParlWebstream] Support new URL format (#9647 ) Authored by: voidful, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2024-05-11 23:50:59 +02:00
DaPotato69	7e4259dff0	Better warning when requested subs format not found (#9873 ) Closes #9760 Authored by: DaPotato69	2024-05-11 21:11:40 +00:00
Stefan Lobbenmeier	f1f158976e	[cookies] Get chrome session cookies with `--cookies-from-browser` (#9747 ) Partially addresses #5534 Authored by: StefanLobbenmeier	2024-05-11 17:25:39 +00:00
llamasblade	31b417e1d1	[ie/hytale] Use `CloudflareStreamIE` explicitly (#9672 ) Authored by: llamasblade	2024-05-11 17:01:56 +00:00
Hugo Azevedo	fc2879ecb0	[ie/alura] Fix extractor (#9658 ) Authored by: hugohaa	2024-05-11 16:54:29 +00:00
rrgomes	0a1a8e3005	[ie/nfb] Fix extractors (#9650 ) Authored by: rrgomes	2024-05-11 16:38:41 +00:00
c-basalt	4cc99d7b6c	[ie/BilibiliSpaceVideo] Fix extraction (#9905 ) Closes #9892 Authored by: c-basalt	2024-05-10 22:34:53 +00:00
coletdjnz	3c7a287e28	[test] Add HTTP proxy tests (#9578 ) Also fixes HTTPS proxies for curl_cffi Authored by: coletdjnz	2024-05-11 10:06:58 +12:00
sepro	98d71d8c5e	[ie/commonmistakes] Raise error on blob URLs (#9897 ) Authored by: seproDev	2024-05-10 19:20:55 +02:00
kclauhk	00a9f2e1f7	[ie/canalalpha] Fix extractor (#9675 ) Authored by: kclauhk	2024-05-10 19:19:57 +02:00
Mozi	73f12119b5	[ie/netease:program] Improve `--no-playlist` message (#9488 ) Authored by: pzhlkj6612	2024-05-10 19:13:35 +02:00
Alexandre Huot	6b54cccdcb	[ie/Qub] Fix extractor (#7019 ) Closes #4989 Authored by: alexhuot1, dirkf	2024-05-08 22:10:06 +00:00
src-tinkerer	c4b87dd885	[ie/ZenYandex] Fix extractor (#9813 ) Closes #9803 Authored by: src-tinkerer	2024-05-08 21:27:30 +00:00
fireattack	2338827072	[ie/bilibili] Fix `--geo-verification-proxy` support (#9817 ) Closes #9797 Authored by: fireattack	2024-05-08 21:24:44 +00:00
fireattack	06d52c8731	[ie/BilibiliSpaceVideo] Better error message (#9839 ) Closes #9528 Authored by: fireattack	2024-05-08 21:09:38 +00:00
sepro	df5c9e733a	[ie/vk] Improve format extraction (#9885 ) Closes #5675 Authored by: seproDev	2024-05-08 23:02:22 +02:00
Mozi	b38018b781	[ie/mixch] Extract comments (#9860 ) Authored by: pzhlkj6612	2024-05-08 20:51:16 +00:00
Rasmus Antons	145dc6f656	[ie/boosty] Add cookies support (#9522 ) Closes #9401 Authored by: RasmusAntons	2024-05-08 20:16:32 +00:00
bashonly	5904853ae5	[ie/crunchyroll] Support browser impersonation (#9857 ) Closes #7442 Authored by: bashonly	2024-05-05 23:15:32 +00:00
Chris Caruso	c8bf48f3a8	[ie/cbc.ca:player] Improve `_VALID_URL` (#9866 ) Closes #9825 Authored by: carusocr	2024-05-05 23:02:24 +00:00
The-MAGI	351368cb9a	[ie/youporn] Fix extractor (#8827 ) Closes #7967 Authored by: The-MAGI	2024-05-05 22:57:38 +00:00
sepro	96da952504	[core] Warn if lack of ffmpeg alters format selection (#9805 ) Authored by: seproDev, pukkandan	2024-05-05 00:44:08 +02:00
bashonly	bec9a59e8e	[networking] Add `extensions` attribute to `Response` (#9756 ) CurlCFFIRH now provides an `impersonate` field in its responses' extensions Authored by: bashonly	2024-05-04 22:19:42 +00:00
bashonly	036e0d92c6	[ie/patreon] Extract multiple embeds (#9850 ) Closes #9848 Authored by: bashonly	2024-05-04 22:11:11 +00:00
bashonly	cb2fb4a643	[ie/crunchyroll] Always make metadata available (#9772 ) Closes #9750 Authored by: bashonly	2024-05-04 16:15:44 +00:00
bashonly	231c2eacc4	[ie/soundcloud] Extract `genres` (#9821 ) Authored by: bashonly	2024-05-04 16:14:36 +00:00
bashonly	c4853655cb	[ie/wrestleuniverse] Avoid partial stream formats (#9800 ) Authored by: bashonly	2024-05-04 16:07:15 +00:00
Simon Sawicki	ac817bc83e	[build] Migrate `linux_exe` to static musl builds (#9811 ) Authored by: Grub4K, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-04-28 22:19:25 +00:00
bashonly	1a366403d9	[build] Run `macos_legacy` job on `macos-12` (#9804 ) `macos-latest` has been bumped to `macos-14-arm64` which breaks the builds Authored by: bashonly	2024-04-28 15:35:17 +00:00
Simon Sawicki	7e26bd53f9	[core/windows] Fix tests for `sys.executable` with spaces (Fix for `64766459e3`) Authored by: Grub4K	2024-04-28 15:47:55 +02:00
Simon Sawicki	64766459e3	[core/windows] Improve shell quoting and tests (#9802 ) Authored by: Grub4K	2024-04-27 10:37:26 +02:00
bashonly	89f535e265	[ci] Fix `curl-cffi` installation (Bugfix for `02483bea1c`) Authored by: bashonly	2024-04-22 20:36:01 +00:00
bashonly	ff38a011d5	[ie/crunchyroll] Fix auth and remove cookies support (#9749 ) Closes #9745 Authored by: bashonly	2024-04-21 22:41:40 +00:00
bashonly	8056a3026e	[ie/theatercomplextown] Fix extractors (#9754 ) Authored by: bashonly	2024-04-21 16:05:42 +00:00
Simon Sawicki	3ee1194288	[ie] Make `_search_nextjs_data` non fatal (#8937 ) Authored by: Grub4K	2024-04-21 13:40:38 +02:00
bashonly	e3b42d8b1b	[ie/facebook] Fix DASH formats extraction (#9734 ) Closes #9720 Authored by: bashonly	2024-04-20 10:23:12 +00:00
bashonly	c9ce57d9bf	[ie/patreon] Fix Vimeo embed extraction (#9712 ) Fixes regression in `36b240f9a7` Closes #9709 Authored by: bashonly	2024-04-18 23:18:56 +00:00
bashonly	02483bea1c	[build] Normalize `curl_cffi` group to `curl-cffi` (#9698 ) Closes #9682 Authored by: bashonly	2024-04-18 23:11:12 +00:00
bashonly	315b354429	[ie/afreecatv:live] Add `cdn` extractor-arg (#9666 ) Closes #6497 Authored by: bashonly	2024-04-13 16:40:53 +00:00
bashonly	0c21c53885	[ie/jiosaavn] Extract via API and fix playlists (#9656 ) Closes #9648 Authored by: bashonly	2024-04-13 16:08:25 +00:00