0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-01-03 06:01:02 +00:00

[networking] Ignore invalid proxies in env (#7704)

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2023-07-28 02:56:02 +12:00 committed by GitHub
parent dae349da97
commit bbeacff7fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 7 deletions

View file

@ -930,10 +930,10 @@ def test_empty_proxy(self, handler):
run_validation(handler, False, Request('http://', proxies={'http': None}))
run_validation(handler, False, Request('http://'), proxies={'http': None})
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_missing_proxy_scheme(self, handler, proxy_url):
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'}))
def test_invalid_proxy_url(self, handler, proxy_url):
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
@pytest.mark.parametrize('handler,extensions,fail', [
(handler_tests[0], extensions, fail)
@ -1126,9 +1126,11 @@ def test_legacy_server_connect_error(self):
('http', '__noproxy__', None),
('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
('https', 'example.com', 'http://example.com'),
('https', '//example.com', 'http://example.com'),
('https', 'socks5://example.com', 'socks5h://example.com'),
('http', 'socks://example.com', 'socks4://example.com'),
('http', 'socks4://example.com', 'socks4://example.com'),
('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
])
def test_clean_proxy(self, proxy_key, proxy_url, expected):
# proxies should be cleaned in urlopen()

View file

@ -262,9 +262,13 @@ def _check_proxies(self, proxies):
# Skip proxy scheme checks
continue
# Scheme-less proxies are not supported
try:
if urllib.request._parse_proxy(proxy_url)[0] is None:
# Scheme-less proxies are not supported
raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
except ValueError as e:
# parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
if scheme not in self._SUPPORTED_PROXY_SCHEMES:

View file

@ -98,7 +98,13 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
continue
if proxy_url is not None:
# Ensure proxies without a scheme are http.
try:
proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
except ValueError:
# Ignore invalid proxy URLs. Sometimes these may be introduced through environment
# variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
# If the proxy is going to be used, the Request Handler proxy validation will handle it.
continue
if proxy_scheme is None:
proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')