mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-28 03:13:01 +00:00
[cleanup] Misc cleanup
This commit is contained in:
parent
47046464fa
commit
ae61d108dd
10 changed files with 65 additions and 64 deletions
21
README.md
21
README.md
|
@ -71,7 +71,7 @@
|
||||||
|
|
||||||
# NEW FEATURES
|
# NEW FEATURES
|
||||||
|
|
||||||
* Based on **youtube-dl 2021.12.17 [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a936c8b002ef536e9e2b778ded02c09c0fa)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
* Merged with **youtube-dl v2021.12.17 [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a936c8b002ef536e9e2b778ded02c09c0fa)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||||
|
|
||||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||||
|
|
||||||
|
@ -79,18 +79,13 @@ # NEW FEATURES
|
||||||
|
|
||||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||||
|
|
||||||
* **Youtube improvements**:
|
* **YouTube improvements**:
|
||||||
* All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) and private playlists supports downloading multiple pages of content
|
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
||||||
* Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works
|
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
|
||||||
* Mixes supports downloading multiple pages of content
|
* Supports some (but not all) age-gated content without cookies
|
||||||
* Some (but not all) age-gated content can be downloaded without cookies
|
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
||||||
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326)
|
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
|
||||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||||
* `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
|
|
||||||
* Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
|
|
||||||
* Download livestreams from the start using `--live-from-start` (experimental)
|
|
||||||
* Support for downloading stories (`ytstories:<channel UCID>`)
|
|
||||||
* Support for downloading clips
|
|
||||||
|
|
||||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
||||||
|
|
||||||
|
@ -124,6 +119,8 @@ # NEW FEATURES
|
||||||
|
|
||||||
See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes
|
See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes
|
||||||
|
|
||||||
|
Features marked with a **\*** have been back-ported to youtube-dl
|
||||||
|
|
||||||
### Differences in default behavior
|
### Differences in default behavior
|
||||||
|
|
||||||
Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc:
|
Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc:
|
||||||
|
|
|
@ -273,7 +273,11 @@ def batch_generator(name, num_tests):
|
||||||
|
|
||||||
def test_template(self):
|
def test_template(self):
|
||||||
for i in range(num_tests):
|
for i in range(num_tests):
|
||||||
getattr(self, f'test_{name}_{i}' if i else f'test_{name}')()
|
test_name = f'test_{name}_{i}' if i else f'test_{name}'
|
||||||
|
try:
|
||||||
|
getattr(self, test_name)()
|
||||||
|
except unittest.SkipTest:
|
||||||
|
print(f'Skipped {test_name}')
|
||||||
|
|
||||||
return test_template
|
return test_template
|
||||||
|
|
||||||
|
|
|
@ -3531,7 +3531,7 @@ def render_formats_table(self, info_dict):
|
||||||
'none', '' if f.get('vcodec') == 'none'
|
'none', '' if f.get('vcodec') == 'none'
|
||||||
else self._format_out('video only', self.Styles.SUPPRESS)),
|
else self._format_out('video only', self.Styles.SUPPRESS)),
|
||||||
format_field(f, 'abr', '\t%dk'),
|
format_field(f, 'abr', '\t%dk'),
|
||||||
format_field(f, 'asr', '\t%dHz'),
|
format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
|
||||||
join_nonempty(
|
join_nonempty(
|
||||||
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
|
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
|
||||||
format_field(f, 'language', '[%s]'),
|
format_field(f, 'language', '[%s]'),
|
||||||
|
|
|
@ -44,14 +44,26 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
|
|
||||||
|
|
||||||
compat_basestring = str
|
compat_basestring = str
|
||||||
|
compat_chr = chr
|
||||||
compat_collections_abc = collections.abc
|
compat_collections_abc = collections.abc
|
||||||
|
compat_cookiejar = http.cookiejar
|
||||||
|
compat_cookiejar_Cookie = http.cookiejar.Cookie
|
||||||
compat_cookies = http.cookies
|
compat_cookies = http.cookies
|
||||||
|
compat_cookies_SimpleCookie = http.cookies.SimpleCookie
|
||||||
compat_etree_Element = etree.Element
|
compat_etree_Element = etree.Element
|
||||||
compat_etree_register_namespace = etree.register_namespace
|
compat_etree_register_namespace = etree.register_namespace
|
||||||
compat_filter = filter
|
compat_filter = filter
|
||||||
|
compat_get_terminal_size = shutil.get_terminal_size
|
||||||
compat_getenv = os.getenv
|
compat_getenv = os.getenv
|
||||||
|
compat_getpass = getpass.getpass
|
||||||
|
compat_html_entities = html.entities
|
||||||
|
compat_html_entities_html5 = html.entities.html5
|
||||||
|
compat_HTMLParser = html.parser.HTMLParser
|
||||||
|
compat_http_client = http.client
|
||||||
|
compat_http_server = http.server
|
||||||
compat_input = input
|
compat_input = input
|
||||||
compat_integer_types = (int, )
|
compat_integer_types = (int, )
|
||||||
|
compat_itertools_count = itertools.count
|
||||||
compat_kwargs = lambda kwargs: kwargs
|
compat_kwargs = lambda kwargs: kwargs
|
||||||
compat_map = map
|
compat_map = map
|
||||||
compat_numeric_types = (int, float, complex)
|
compat_numeric_types = (int, float, complex)
|
||||||
|
@ -59,34 +71,22 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
compat_shlex_split = shlex.split
|
compat_shlex_split = shlex.split
|
||||||
compat_socket_create_connection = socket.create_connection
|
compat_socket_create_connection = socket.create_connection
|
||||||
compat_Struct = struct.Struct
|
compat_Struct = struct.Struct
|
||||||
|
compat_struct_pack = struct.pack
|
||||||
|
compat_struct_unpack = struct.unpack
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||||
|
compat_tokenize_tokenize = tokenize.tokenize
|
||||||
|
compat_urllib_error = urllib.error
|
||||||
|
compat_urllib_parse = urllib.parse
|
||||||
compat_urllib_parse_quote = urllib.parse.quote
|
compat_urllib_parse_quote = urllib.parse.quote
|
||||||
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
|
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
|
||||||
|
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
|
||||||
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
|
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
|
||||||
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
|
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
|
||||||
compat_urllib_request_DataHandler = urllib.request.DataHandler
|
|
||||||
compat_urllib_request = urllib.request
|
compat_urllib_request = urllib.request
|
||||||
|
compat_urllib_request_DataHandler = urllib.request.DataHandler
|
||||||
compat_urllib_response = urllib.response
|
compat_urllib_response = urllib.response
|
||||||
compat_urlretrieve = urllib.request.urlretrieve
|
compat_urlretrieve = urllib.request.urlretrieve
|
||||||
compat_xml_parse_error = etree.ParseError
|
compat_xml_parse_error = etree.ParseError
|
||||||
compat_xpath = lambda xpath: xpath
|
compat_xpath = lambda xpath: xpath
|
||||||
compat_zip = zip
|
compat_zip = zip
|
||||||
workaround_optparse_bug9161 = lambda: None
|
workaround_optparse_bug9161 = lambda: None
|
||||||
compat_getpass = getpass.getpass
|
|
||||||
compat_chr = chr
|
|
||||||
compat_urllib_parse = urllib.parse
|
|
||||||
compat_itertools_count = itertools.count
|
|
||||||
compat_cookiejar = http.cookiejar
|
|
||||||
compat_cookiejar_Cookie = http.cookiejar.Cookie
|
|
||||||
compat_cookies_SimpleCookie = http.cookies.SimpleCookie
|
|
||||||
compat_get_terminal_size = shutil.get_terminal_size
|
|
||||||
compat_html_entities = html.entities
|
|
||||||
compat_html_entities_html5 = html.entities.html5
|
|
||||||
compat_tokenize_tokenize = tokenize.tokenize
|
|
||||||
compat_HTMLParser = html.parser.HTMLParser
|
|
||||||
compat_http_client = http.client
|
|
||||||
compat_http_server = http.server
|
|
||||||
compat_struct_pack = struct.pack
|
|
||||||
compat_struct_unpack = struct.unpack
|
|
||||||
compat_urllib_error = urllib.error
|
|
||||||
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
|
|
||||||
|
|
|
@ -59,10 +59,11 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||||
|
|
||||||
def shorten_protocol_name(proto, simplify=False):
|
def shorten_protocol_name(proto, simplify=False):
|
||||||
short_protocol_names = {
|
short_protocol_names = {
|
||||||
'm3u8_native': 'm3u8_n',
|
'm3u8_native': 'm3u8',
|
||||||
'rtmp_ffmpeg': 'rtmp_f',
|
'm3u8': 'm3u8F',
|
||||||
|
'rtmp_ffmpeg': 'rtmpF',
|
||||||
'http_dash_segments': 'dash',
|
'http_dash_segments': 'dash',
|
||||||
'http_dash_segments_generator': 'dash_g',
|
'http_dash_segments_generator': 'dashG',
|
||||||
'niconico_dmc': 'dmc',
|
'niconico_dmc': 'dmc',
|
||||||
'websocket_frag': 'WSfrag',
|
'websocket_frag': 'WSfrag',
|
||||||
}
|
}
|
||||||
|
@ -70,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False):
|
||||||
short_protocol_names.update({
|
short_protocol_names.update({
|
||||||
'https': 'http',
|
'https': 'http',
|
||||||
'ftps': 'ftp',
|
'ftps': 'ftp',
|
||||||
|
'm3u8': 'm3u8', # Reverse above m3u8 mapping
|
||||||
'm3u8_native': 'm3u8',
|
'm3u8_native': 'm3u8',
|
||||||
'http_dash_segments_generator': 'dash',
|
'http_dash_segments_generator': 'dash',
|
||||||
'rtmp_ffmpeg': 'rtmp',
|
'rtmp_ffmpeg': 'rtmp',
|
||||||
|
|
|
@ -69,7 +69,7 @@ def real_download(self, filename, info_dict):
|
||||||
elif no_crypto:
|
elif no_crypto:
|
||||||
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
|
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
|
||||||
'Decryption will be performed natively, but will be extremely slow')
|
'Decryption will be performed natively, but will be extremely slow')
|
||||||
elif re.search(r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
|
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
|
||||||
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
|
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
|
||||||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||||
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
|
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
|
||||||
|
|
|
@ -2825,12 +2825,22 @@ def _real_extract(self, url):
|
||||||
new_url, {'force_videoid': force_videoid})
|
new_url, {'force_videoid': force_videoid})
|
||||||
return self.url_result(new_url)
|
return self.url_result(new_url)
|
||||||
|
|
||||||
|
def request_webpage():
|
||||||
|
request = sanitized_Request(url)
|
||||||
|
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||||
|
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||||
|
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||||
|
# that will always result in downloading the whole file that is not desirable.
|
||||||
|
# Therefore for extraction pass we have to override Accept-Encoding to any in order
|
||||||
|
# to accept raw bytes and being able to download only a chunk.
|
||||||
|
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||||
|
# after HEAD request finishes, but not sure if we can rely on this.
|
||||||
|
request.add_header('Accept-Encoding', '*')
|
||||||
|
return self._request_webpage(request, video_id)
|
||||||
|
|
||||||
full_response = None
|
full_response = None
|
||||||
if head_response is False:
|
if head_response is False:
|
||||||
request = sanitized_Request(url)
|
head_response = full_response = request_webpage()
|
||||||
request.add_header('Accept-Encoding', '*')
|
|
||||||
full_response = self._request_webpage(request, video_id)
|
|
||||||
head_response = full_response
|
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -2868,19 +2878,7 @@ def _real_extract(self, url):
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||||
|
|
||||||
if not full_response:
|
full_response = full_response or request_webpage()
|
||||||
request = sanitized_Request(url)
|
|
||||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
|
||||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
|
||||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
|
||||||
# that will always result in downloading the whole file that is not desirable.
|
|
||||||
# Therefore for extraction pass we have to override Accept-Encoding to any in order
|
|
||||||
# to accept raw bytes and being able to download only a chunk.
|
|
||||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
|
||||||
# after HEAD request finishes, but not sure if we can rely on this.
|
|
||||||
request.add_header('Accept-Encoding', '*')
|
|
||||||
full_response = self._request_webpage(request, video_id)
|
|
||||||
|
|
||||||
first_bytes = full_response.read(512)
|
first_bytes = full_response.read(512)
|
||||||
|
|
||||||
# Is it an M3U playlist?
|
# Is it an M3U playlist?
|
||||||
|
|
|
@ -2467,6 +2467,7 @@ def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
|
func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
|
||||||
assert os.path.basename(func_id) == func_id
|
assert os.path.basename(func_id) == func_id
|
||||||
|
|
||||||
|
self.write_debug(f'Extracting signature function {func_id}')
|
||||||
cache_spec = self.cache.load('youtube-sigfuncs', func_id)
|
cache_spec = self.cache.load('youtube-sigfuncs', func_id)
|
||||||
if cache_spec is not None:
|
if cache_spec is not None:
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
@ -2714,10 +2715,10 @@ def _extract_url(webpage):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def extract_id(cls, url):
|
def extract_id(cls, url):
|
||||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
video_id = cls.get_temp_id(url)
|
||||||
if mobj is None:
|
if not video_id:
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
raise ExtractorError(f'Invalid URL: {url}')
|
||||||
return mobj.group('id')
|
return video_id
|
||||||
|
|
||||||
def _extract_chapters_from_json(self, data, duration):
|
def _extract_chapters_from_json(self, data, duration):
|
||||||
chapter_list = traverse_obj(
|
chapter_list = traverse_obj(
|
||||||
|
|
|
@ -234,7 +234,7 @@ def random_user_agent():
|
||||||
])
|
])
|
||||||
|
|
||||||
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
||||||
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
|
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
|
||||||
|
|
||||||
NUMBER_RE = r'\d+(?:\.\d+)?'
|
NUMBER_RE = r'\d+(?:\.\d+)?'
|
||||||
|
|
||||||
|
@ -673,8 +673,8 @@ def replace_insane(char):
|
||||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
|
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
|
||||||
result = ''.join(map(replace_insane, s))
|
result = ''.join(map(replace_insane, s))
|
||||||
if is_id is NO_DEFAULT:
|
if is_id is NO_DEFAULT:
|
||||||
result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
|
result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
|
||||||
STRIP_RE = '(?:\0.|[ _-])*'
|
STRIP_RE = r'(?:\0.|[ _-])*'
|
||||||
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
|
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
|
||||||
result = result.replace('\0', '') or '_'
|
result = result.replace('\0', '') or '_'
|
||||||
|
|
||||||
|
@ -2400,8 +2400,7 @@ def remove_quotes(s):
|
||||||
|
|
||||||
|
|
||||||
def get_domain(url):
|
def get_domain(url):
|
||||||
domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
|
return '.'.join(urllib.parse.urlparse(url).netloc.rsplit('.', 2)[-2:])
|
||||||
return domain.group('domain') if domain else None
|
|
||||||
|
|
||||||
|
|
||||||
def url_basename(url):
|
def url_basename(url):
|
||||||
|
|
Loading…
Reference in a new issue