mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
[networking] Add support for zstandard content-encoding
Supported by urllib/requests/curl_cffi Authored-by: coletdjnz
This commit is contained in:
parent
8531d2b03b
commit
aec3cc3218
6 changed files with 97 additions and 14 deletions
|
@ -57,6 +57,9 @@ curl-cffi = [
|
|||
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
|
||||
"curl-cffi>=0.5.10,!=0.6.*,<0.8; os_name!='nt' and implementation_name=='cpython'",
|
||||
]
|
||||
zstd = [
|
||||
"zstandard>=0.22.0",
|
||||
]
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
@ -36,7 +37,7 @@
|
|||
verify_address_availability,
|
||||
)
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3, zstandard
|
||||
from yt_dlp.networking import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
|
@ -62,7 +63,7 @@
|
|||
ImpersonateTarget,
|
||||
)
|
||||
from yt_dlp.utils import YoutubeDLError
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger, int_or_none
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
@ -217,6 +218,7 @@ def do_GET(self):
|
|||
self.end_headers()
|
||||
elif self.path == '/content-encoding':
|
||||
encodings = self.headers.get('ytdl-encoding', '')
|
||||
content_encoding_header = self.headers.get('ytdl-encoding-header', encodings)
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||
if encoding == 'br' and brotli:
|
||||
|
@ -228,6 +230,8 @@ def do_GET(self):
|
|||
payload = buf.getvalue()
|
||||
elif encoding == 'deflate':
|
||||
payload = zlib.compress(payload)
|
||||
elif encoding == 'zstd':
|
||||
payload = zstandard.compress(payload)
|
||||
elif encoding == 'unsupported':
|
||||
payload = b'raw'
|
||||
break
|
||||
|
@ -235,7 +239,7 @@ def do_GET(self):
|
|||
self._status(415)
|
||||
return
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Encoding', encodings)
|
||||
self.send_header('Content-Encoding', content_encoding_header)
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
@ -622,7 +626,7 @@ def test_gzip_trailing_garbage(self, handler):
|
|||
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
|
||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||
@pytest.mark.skipif(not brotli, reason='brotli not available')
|
||||
def test_brotli(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -632,6 +636,52 @@ def test_brotli(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'br'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.skipif(not brotli, reason='brotli not available')
|
||||
def test_brotli_error(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(TransportError):
|
||||
# depending on implementation, error may be raised at request time or read time
|
||||
res = validate_and_send(
|
||||
rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'deflate', 'ytdl-encoding-header': 'br'}))
|
||||
res.read()
|
||||
|
||||
# TODO: implement centralised version parser
|
||||
@pytest.mark.skip_handler_if(
|
||||
'CurlCFFI',
|
||||
lambda _: tuple(map(int, re.split(r'\D+', curl_cffi.__version__)[:3])) < (0, 7, 0),
|
||||
'zstd not supported by curl_cffi < 0.7.0')
|
||||
@pytest.mark.skip_handler_if(
|
||||
'Requests',
|
||||
lambda _: tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.')) < (2, 0, 0),
|
||||
'zstd not supported by urllib3 < 2.0.0')
|
||||
@pytest.mark.skipif(not zstandard, reason='zstandard not available')
|
||||
def test_zstd(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'zstd'}))
|
||||
assert res.headers.get('Content-Encoding') == 'zstd'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
# TODO: implement centralised version parser
|
||||
@pytest.mark.skip_handler_if(
|
||||
'Requests',
|
||||
lambda _: tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.')) < (2, 0, 0),
|
||||
'zstd not supported by urllib3 < 2.0.0')
|
||||
@pytest.mark.skipif(not zstandard, reason='zstandard not available')
|
||||
def test_zstd_error(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(TransportError):
|
||||
# depending on implementation, error may be raised at request time or read time
|
||||
res = validate_and_send(
|
||||
rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'unsupported', 'ytdl-encoding-header': 'zstd'}))
|
||||
res.read()
|
||||
|
||||
def test_deflate(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -641,6 +691,16 @@ def test_deflate(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
def test_deflate_error(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(TransportError):
|
||||
# depending on implementation, error may be raised at request time or read time
|
||||
res = validate_and_send(
|
||||
rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'gzip', 'ytdl-encoding-header': 'deflate'}))
|
||||
res.read()
|
||||
|
||||
def test_gzip(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -650,6 +710,16 @@ def test_gzip(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
def test_gzip_error(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(TransportError):
|
||||
# depending on implementation, error may be raised at request time or read time
|
||||
res = validate_and_send(
|
||||
rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'unsupported', 'ytdl-encoding-header': 'gzip'}))
|
||||
res.read()
|
||||
|
||||
def test_multiple_encodings(self, handler):
|
||||
with handler() as rh:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
|
|
|
@ -22,6 +22,10 @@
|
|||
if not _path_exists(certifi.where()):
|
||||
certifi = None
|
||||
|
||||
try:
|
||||
import zstandard
|
||||
except ImportError:
|
||||
zstandard = None
|
||||
|
||||
try:
|
||||
import mutagen
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
raise ImportError('curl_cffi is not installed')
|
||||
|
||||
|
||||
curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3]))
|
||||
curl_cffi_version = tuple(map(int, re.split(r'\D+', curl_cffi.__version__)[:3]))
|
||||
|
||||
if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)):
|
||||
curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
import socket
|
||||
import warnings
|
||||
|
||||
from ..dependencies import brotli, requests, urllib3
|
||||
from ..dependencies import requests, urllib3
|
||||
from ..utils import bug_reports_message, int_or_none, variadic
|
||||
from ..utils.networking import normalize_url
|
||||
|
||||
|
@ -59,12 +59,7 @@
|
|||
)
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
|
||||
SUPPORTED_ENCODINGS = [
|
||||
'gzip', 'deflate',
|
||||
]
|
||||
|
||||
if brotli is not None:
|
||||
SUPPORTED_ENCODINGS.append('br')
|
||||
SUPPORTED_ENCODINGS = urllib3.util.request.ACCEPT_ENCODING.split(',')
|
||||
|
||||
'''
|
||||
Override urllib3's behavior to not convert lower-case percent-encoded characters
|
||||
|
@ -259,7 +254,6 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
|
|||
https://github.com/psf/requests
|
||||
"""
|
||||
_SUPPORTED_URL_SCHEMES = ('http', 'https')
|
||||
_SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
|
||||
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||
RH_NAME = 'requests'
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
SSLError,
|
||||
TransportError,
|
||||
)
|
||||
from ..dependencies import brotli
|
||||
from ..dependencies import brotli, zstandard
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
from ..utils import update_url_query
|
||||
from ..utils.networking import normalize_url
|
||||
|
@ -50,6 +50,10 @@
|
|||
SUPPORTED_ENCODINGS.append('br')
|
||||
CONTENT_DECODE_ERRORS.append(brotli.error)
|
||||
|
||||
if zstandard:
|
||||
SUPPORTED_ENCODINGS.append('zstd')
|
||||
CONTENT_DECODE_ERRORS.append(zstandard.ZstdError)
|
||||
|
||||
|
||||
def _create_http_connection(http_class, source_address, *args, **kwargs):
|
||||
hc = http_class(*args, **kwargs)
|
||||
|
@ -118,6 +122,12 @@ def brotli(data):
|
|||
return data
|
||||
return brotli.decompress(data)
|
||||
|
||||
@staticmethod
|
||||
def zstd(data):
|
||||
if not data:
|
||||
return data
|
||||
return zstandard.ZstdDecompressor().decompress(data)
|
||||
|
||||
@staticmethod
|
||||
def gz(data):
|
||||
# There may be junk added the end of the file
|
||||
|
@ -158,6 +168,8 @@ def http_response(self, req, resp):
|
|||
decoded_response = self.deflate(decoded_response or resp.read())
|
||||
elif encoding == 'br' and brotli:
|
||||
decoded_response = self.brotli(decoded_response or resp.read())
|
||||
elif encoding == 'zstd' and zstandard:
|
||||
decoded_response = self.zstd(decoded_response or resp.read())
|
||||
|
||||
if decoded_response is not None:
|
||||
resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
|
||||
|
|
Loading…
Reference in a new issue