mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
Implement --add-header
without modifying std_headers
Closes #2526, #1614
This commit is contained in:
parent
e48b3875ec
commit
8b7539d27c
10 changed files with 28 additions and 29 deletions
|
@ -737,9 +737,6 @@ ## Workarounds:
|
||||||
--prefer-insecure Use an unencrypted connection to retrieve
|
--prefer-insecure Use an unencrypted connection to retrieve
|
||||||
information about the video (Currently
|
information about the video (Currently
|
||||||
supported only for YouTube)
|
supported only for YouTube)
|
||||||
--user-agent UA Specify a custom user agent
|
|
||||||
--referer URL Specify a custom referer, use if the video
|
|
||||||
access is restricted to one domain
|
|
||||||
--add-header FIELD:VALUE Specify a custom HTTP header and its value,
|
--add-header FIELD:VALUE Specify a custom HTTP header and its value,
|
||||||
separated by a colon ":". You can use this
|
separated by a colon ":". You can use this
|
||||||
option multiple times
|
option multiple times
|
||||||
|
@ -1866,6 +1863,8 @@ #### Redundant options
|
||||||
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
||||||
--min-views COUNT --match-filter "view_count >=? COUNT"
|
--min-views COUNT --match-filter "view_count >=? COUNT"
|
||||||
--max-views COUNT --match-filter "view_count <=? COUNT"
|
--max-views COUNT --match-filter "view_count <=? COUNT"
|
||||||
|
--user-agent UA --add-header "User-Agent:UA"
|
||||||
|
--referer URL --add-header "Referer:URL"
|
||||||
|
|
||||||
|
|
||||||
#### Not recommended
|
#### Not recommended
|
||||||
|
|
|
@ -83,6 +83,7 @@
|
||||||
make_dir,
|
make_dir,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
|
merge_headers,
|
||||||
network_exceptions,
|
network_exceptions,
|
||||||
number_of_digits,
|
number_of_digits,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -332,6 +333,7 @@ class YoutubeDL(object):
|
||||||
nocheckcertificate: Do not verify SSL certificates
|
nocheckcertificate: Do not verify SSL certificates
|
||||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||||
At the moment, this is only supported by YouTube.
|
At the moment, this is only supported by YouTube.
|
||||||
|
http_headers: A dictionary of custom headers to be used for all requests
|
||||||
proxy: URL of the proxy server to use
|
proxy: URL of the proxy server to use
|
||||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||||
on geo-restricted sites.
|
on geo-restricted sites.
|
||||||
|
@ -647,6 +649,9 @@ def check_deprecated(param, option, suggestion):
|
||||||
else self.params['format'] if callable(self.params['format'])
|
else self.params['format'] if callable(self.params['format'])
|
||||||
else self.build_format_selector(self.params['format']))
|
else self.build_format_selector(self.params['format']))
|
||||||
|
|
||||||
|
# Set http_headers defaults according to std_headers
|
||||||
|
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
||||||
|
|
||||||
self._setup_opener()
|
self._setup_opener()
|
||||||
|
|
||||||
if auto_init:
|
if auto_init:
|
||||||
|
@ -2250,8 +2255,7 @@ def restore_last_token(self):
|
||||||
return _build_selector_function(parsed_selector)
|
return _build_selector_function(parsed_selector)
|
||||||
|
|
||||||
def _calc_headers(self, info_dict):
|
def _calc_headers(self, info_dict):
|
||||||
res = std_headers.copy()
|
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||||
res.update(info_dict.get('http_headers') or {})
|
|
||||||
|
|
||||||
cookies = self._calc_cookies(info_dict)
|
cookies = self._calc_cookies(info_dict)
|
||||||
if cookies:
|
if cookies:
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
SameFileError,
|
SameFileError,
|
||||||
setproctitle,
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
traverse_obj,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
from .update import run_update
|
from .update import run_update
|
||||||
|
@ -75,20 +76,15 @@ def _real_main(argv=None):
|
||||||
parser, opts, args = parseOpts(argv)
|
parser, opts, args = parseOpts(argv)
|
||||||
warnings, deprecation_warnings = [], []
|
warnings, deprecation_warnings = [], []
|
||||||
|
|
||||||
# Set user agent
|
|
||||||
if opts.user_agent is not None:
|
if opts.user_agent is not None:
|
||||||
std_headers['User-Agent'] = opts.user_agent
|
opts.headers.setdefault('User-Agent', opts.user_agent)
|
||||||
|
|
||||||
# Set referer
|
|
||||||
if opts.referer is not None:
|
if opts.referer is not None:
|
||||||
std_headers['Referer'] = opts.referer
|
opts.headers.setdefault('Referer', opts.referer)
|
||||||
|
|
||||||
# Custom HTTP headers
|
|
||||||
std_headers.update(opts.headers)
|
|
||||||
|
|
||||||
# Dump user agent
|
# Dump user agent
|
||||||
if opts.dump_user_agent:
|
if opts.dump_user_agent:
|
||||||
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
|
||||||
|
write_string(f'{ua}\n', out=sys.stdout)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Batch file verification
|
# Batch file verification
|
||||||
|
@ -767,6 +763,7 @@ def report_deprecation(val, old, new=None):
|
||||||
'legacyserverconnect': opts.legacy_server_connect,
|
'legacyserverconnect': opts.legacy_server_connect,
|
||||||
'nocheckcertificate': opts.no_check_certificate,
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
'prefer_insecure': opts.prefer_insecure,
|
'prefer_insecure': opts.prefer_insecure,
|
||||||
|
'http_headers': opts.headers,
|
||||||
'proxy': opts.proxy,
|
'proxy': opts.proxy,
|
||||||
'socket_timeout': opts.socket_timeout,
|
'socket_timeout': opts.socket_timeout,
|
||||||
'bidi_workaround': opts.bidi_workaround,
|
'bidi_workaround': opts.bidi_workaround,
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
std_headers,
|
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -503,7 +502,7 @@ def _extract_graphql(self, data, url):
|
||||||
'%s' % rhx_gis,
|
'%s' % rhx_gis,
|
||||||
'',
|
'',
|
||||||
'%s:%s' % (rhx_gis, csrf_token),
|
'%s:%s' % (rhx_gis, csrf_token),
|
||||||
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
'%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']),
|
||||||
]
|
]
|
||||||
|
|
||||||
# try all of the ways to generate a GIS query, and not only use the
|
# try all of the ways to generate a GIS query, and not only use the
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
std_headers,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
random_uuidv4,
|
random_uuidv4,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -70,7 +69,7 @@ def _fetch_dispatcher_config(self):
|
||||||
'clu': '',
|
'clu': '',
|
||||||
'wh': '1919*810',
|
'wh': '1919*810',
|
||||||
'rtm': self.iso_timestamp(),
|
'rtm': self.iso_timestamp(),
|
||||||
'ua': std_headers['User-Agent'],
|
'ua': self.get_param('http_headers')['User-Agent'],
|
||||||
}).encode('utf8')).decode('utf8').replace('\n', ''),
|
}).encode('utf8')).decode('utf8').replace('\n', ''),
|
||||||
}).encode('utf8'))
|
}).encode('utf8'))
|
||||||
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
std_headers,
|
|
||||||
Popen,
|
Popen,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -208,7 +207,7 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w
|
||||||
|
|
||||||
replaces = self.options
|
replaces = self.options
|
||||||
replaces['url'] = url
|
replaces['url'] = url
|
||||||
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent']
|
||||||
replaces['ua'] = user_agent.replace('"', '\\"')
|
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||||
replaces['jscode'] = jscode
|
replaces['jscode'] = jscode
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
qualities,
|
qualities,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
std_headers,
|
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -71,7 +70,7 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||||
self._manager = self._download_json(
|
self._manager = self._download_json(
|
||||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||||
None, 'Fetching manager info')['manager']
|
None, 'Fetching manager info')['manager']
|
||||||
|
|
|
@ -28,7 +28,6 @@
|
||||||
parse_qs,
|
parse_qs,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
std_headers,
|
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -758,7 +757,7 @@ def _try_album_password(self, url):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, data = unsmuggle_url(url, {})
|
url, data = unsmuggle_url(url, {})
|
||||||
headers = std_headers.copy()
|
headers = self.get_param('http_headers').copy()
|
||||||
if 'http_headers' in data:
|
if 'http_headers' in data:
|
||||||
headers.update(data['http_headers'])
|
headers.update(data['http_headers'])
|
||||||
if 'Referer' not in headers:
|
if 'Referer' not in headers:
|
||||||
|
|
|
@ -860,17 +860,16 @@ def _dict_from_options_callback(
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--user-agent',
|
'--user-agent',
|
||||||
metavar='UA', dest='user_agent',
|
metavar='UA', dest='user_agent',
|
||||||
help='Specify a custom user agent')
|
help=optparse.SUPPRESS_HELP)
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--referer',
|
'--referer',
|
||||||
metavar='URL', dest='referer', default=None,
|
metavar='URL', dest='referer', default=None,
|
||||||
help='Specify a custom referer, use if the video access is restricted to one domain',
|
help=optparse.SUPPRESS_HELP)
|
||||||
)
|
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--add-header',
|
'--add-header',
|
||||||
metavar='FIELD:VALUE', dest='headers', default={}, type='str',
|
metavar='FIELD:VALUE', dest='headers', default={}, type='str',
|
||||||
action='callback', callback=_dict_from_options_callback,
|
action='callback', callback=_dict_from_options_callback,
|
||||||
callback_kwargs={'multiple_keys': False, 'process_key': None},
|
callback_kwargs={'multiple_keys': False},
|
||||||
help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
|
help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
|
||||||
)
|
)
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
|
|
|
@ -1372,7 +1372,7 @@ def http_request(self, req):
|
||||||
if url != url_escaped:
|
if url != url_escaped:
|
||||||
req = update_Request(req, url=url_escaped)
|
req = update_Request(req, url=url_escaped)
|
||||||
|
|
||||||
for h, v in std_headers.items():
|
for h, v in self._params.get('http_headers', std_headers).items():
|
||||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||||
# The dict keys are capitalized because of this bug by urllib
|
# The dict keys are capitalized because of this bug by urllib
|
||||||
if h.capitalize() not in req.headers:
|
if h.capitalize() not in req.headers:
|
||||||
|
@ -5436,3 +5436,8 @@ def _cancel_all_tasks(loop):
|
||||||
|
|
||||||
|
|
||||||
has_websockets = bool(compat_websockets)
|
has_websockets = bool(compat_websockets)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_headers(*dicts):
|
||||||
|
"""Merge dicts of network headers case insensitively, prioritizing the latter ones"""
|
||||||
|
return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
|
||||||
|
|
Loading…
Reference in a new issue