mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
[ie] Do not smuggle http_headers
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x Authored by: coletdjnz
This commit is contained in:
parent
d4f14a72dc
commit
f04b5bedad
9 changed files with 19 additions and 15 deletions
|
@ -1293,6 +1293,10 @@ def test_clean_header(self):
|
||||||
assert 'Youtubedl-no-compression' not in rh.headers
|
assert 'Youtubedl-no-compression' not in rh.headers
|
||||||
assert rh.headers.get('Accept-Encoding') == 'identity'
|
assert rh.headers.get('Accept-Encoding') == 'identity'
|
||||||
|
|
||||||
|
with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
|
||||||
|
rh = self.build_handler(ydl)
|
||||||
|
assert 'Ytdl-socks-proxy' not in rh.headers
|
||||||
|
|
||||||
def test_build_handler_params(self):
|
def test_build_handler_params(self):
|
||||||
with FakeYDL({
|
with FakeYDL({
|
||||||
'http_headers': {'test': 'testtest'},
|
'http_headers': {'test': 'testtest'},
|
||||||
|
|
|
@ -105,7 +105,7 @@ def _real_extract(self, url):
|
||||||
'chapter': module.get('title'),
|
'chapter': module.get('title'),
|
||||||
'chapter_id': str_or_none(module.get('id')),
|
'chapter_id': str_or_none(module.get('id')),
|
||||||
'title': activity.get('title'),
|
'title': activity.get('title'),
|
||||||
'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}})
|
'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -138,7 +138,7 @@ def _real_extract(self, url):
|
||||||
# of the video.
|
# of the video.
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': smuggle_url(data_url, {'http_headers': headers}),
|
'url': smuggle_url(data_url, {'referer': webpage_url}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'series': series_title,
|
'series': series_title,
|
||||||
|
|
|
@ -106,4 +106,4 @@ def _real_extract(self, url):
|
||||||
return self.url_result(src, YoutubeTabIE)
|
return self.url_result(src, YoutubeTabIE)
|
||||||
return self.url_result(smuggle_url(
|
return self.url_result(smuggle_url(
|
||||||
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
|
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
|
||||||
{'http_headers': {'Referer': url}}))
|
{'referer': url}))
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
dict_get,
|
dict_get,
|
||||||
extract_basic_auth,
|
extract_basic_auth,
|
||||||
|
filter_dict,
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
is_html,
|
is_html,
|
||||||
|
@ -2435,10 +2436,10 @@ def _real_extract(self, url):
|
||||||
# to accept raw bytes and being able to download only a chunk.
|
# to accept raw bytes and being able to download only a chunk.
|
||||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||||
# after a HEAD request, but not sure if we can rely on this.
|
# after a HEAD request, but not sure if we can rely on this.
|
||||||
full_response = self._request_webpage(url, video_id, headers={
|
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||||
'Accept-Encoding': 'identity',
|
'Accept-Encoding': 'identity',
|
||||||
**smuggled_data.get('http_headers', {})
|
'Referer': smuggled_data.get('referer'),
|
||||||
})
|
}))
|
||||||
new_url = full_response.url
|
new_url = full_response.url
|
||||||
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
|
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
|
||||||
if new_url != extract_basic_auth(url)[0]:
|
if new_url != extract_basic_auth(url)[0]:
|
||||||
|
@ -2458,7 +2459,7 @@ def _real_extract(self, url):
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
self.report_detected('direct video link')
|
self.report_detected('direct video link')
|
||||||
headers = smuggled_data.get('http_headers', {})
|
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||||
format_id = str(m.group('format_id'))
|
format_id = str(m.group('format_id'))
|
||||||
ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
|
ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -2710,7 +2711,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
||||||
'url': smuggle_url(json_ld['url'], {
|
'url': smuggle_url(json_ld['url'], {
|
||||||
'force_videoid': video_id,
|
'force_videoid': video_id,
|
||||||
'to_generic': True,
|
'to_generic': True,
|
||||||
'http_headers': {'Referer': url},
|
'referer': url,
|
||||||
}),
|
}),
|
||||||
}, json_ld)]
|
}, json_ld)]
|
||||||
|
|
||||||
|
|
|
@ -530,7 +530,7 @@ def _real_extract(self, url):
|
||||||
if service_name == 'vimeo':
|
if service_name == 'vimeo':
|
||||||
info['url'] = smuggle_url(
|
info['url'] = smuggle_url(
|
||||||
f'https://player.vimeo.com/video/{service_id}',
|
f'https://player.vimeo.com/video/{service_id}',
|
||||||
{'http_headers': {'Referer': url}})
|
{'referer': url})
|
||||||
|
|
||||||
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
|
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
|
||||||
if not video_slides:
|
if not video_slides:
|
||||||
|
|
|
@ -32,9 +32,7 @@ def _parse_video(self, video):
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(
|
||||||
'https://player.vimeo.com/video/' + vimeo_id, {
|
'https://player.vimeo.com/video/' + vimeo_id, {
|
||||||
'http_headers': {
|
'referer': 'https://storyfire.com/',
|
||||||
'Referer': 'https://storyfire.com/',
|
|
||||||
}
|
|
||||||
}),
|
}),
|
||||||
'thumbnail': video.get('storyImage'),
|
'thumbnail': video.get('storyImage'),
|
||||||
'view_count': int_or_none(video.get('views')),
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
|
|
@ -37,14 +37,14 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _smuggle_referrer(url, referrer_url):
|
def _smuggle_referrer(url, referrer_url):
|
||||||
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
|
return smuggle_url(url, {'referer': referrer_url})
|
||||||
|
|
||||||
def _unsmuggle_headers(self, url):
|
def _unsmuggle_headers(self, url):
|
||||||
"""@returns (url, smuggled_data, headers)"""
|
"""@returns (url, smuggled_data, headers)"""
|
||||||
url, data = unsmuggle_url(url, {})
|
url, data = unsmuggle_url(url, {})
|
||||||
headers = self.get_param('http_headers').copy()
|
headers = self.get_param('http_headers').copy()
|
||||||
if 'http_headers' in data:
|
if 'referer' in data:
|
||||||
headers.update(data['http_headers'])
|
headers['Referer'] = data['referer']
|
||||||
return url, data, headers
|
return url, data, headers
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
|
|
|
@ -123,6 +123,7 @@ def clean_headers(headers: HTTPHeaderDict):
|
||||||
if 'Youtubedl-No-Compression' in headers: # compat
|
if 'Youtubedl-No-Compression' in headers: # compat
|
||||||
del headers['Youtubedl-No-Compression']
|
del headers['Youtubedl-No-Compression']
|
||||||
headers['Accept-Encoding'] = 'identity'
|
headers['Accept-Encoding'] = 'identity'
|
||||||
|
headers.pop('Ytdl-socks-proxy', None)
|
||||||
|
|
||||||
|
|
||||||
def remove_dot_segments(path):
|
def remove_dot_segments(path):
|
||||||
|
|
Loading…
Reference in a new issue