mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
Determine merge container better (See desc) (#1482)
* Determine the container early. Closes #4069 * Use codecs instead of just file extensions * Obey `--prefer-free-formats` * Allow fallbacks in `--merge-output` Authored by: pukkandan, selfisekai
This commit is contained in:
parent
fe0918bb65
commit
fc61aff41b
6 changed files with 84 additions and 39 deletions
|
@ -858,10 +858,10 @@ ## Video Format Options:
|
||||||
downloadable
|
downloadable
|
||||||
-F, --list-formats List available formats of each video.
|
-F, --list-formats List available formats of each video.
|
||||||
Simulate unless --no-simulate is used
|
Simulate unless --no-simulate is used
|
||||||
--merge-output-format FORMAT Container to use when merging formats (e.g.
|
--merge-output-format FORMAT Containers that may be used when merging
|
||||||
bestvideo+bestaudio). Ignored if no merge is
|
formats, separated by "/" (Eg: "mp4/mkv").
|
||||||
required. (currently supported: avi, flv,
|
Ignored if no merge is required. (currently
|
||||||
mkv, mov, mp4, webm)
|
supported: avi, flv, mkv, mov, mp4, webm)
|
||||||
|
|
||||||
## Subtitle Options:
|
## Subtitle Options:
|
||||||
--write-subs Write subtitle file
|
--write-subs Write subtitle file
|
||||||
|
|
|
@ -53,6 +53,7 @@
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
get_compatible_ext,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
get_element_html_by_attribute,
|
get_element_html_by_attribute,
|
||||||
|
@ -1843,6 +1844,31 @@ def test_determine_file_encoding(self):
|
||||||
self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
|
self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
|
||||||
self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
|
self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
|
||||||
|
|
||||||
|
def test_get_compatible_ext(self):
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
|
||||||
|
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
|
||||||
|
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
|
||||||
|
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
|
||||||
|
self.assertEqual(get_compatible_ext(
|
||||||
|
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -102,6 +102,7 @@
|
||||||
format_decimal_suffix,
|
format_decimal_suffix,
|
||||||
format_field,
|
format_field,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
|
get_compatible_ext,
|
||||||
get_domain,
|
get_domain,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
iri_to_uri,
|
iri_to_uri,
|
||||||
|
@ -134,6 +135,7 @@
|
||||||
timetuple_from_msec,
|
timetuple_from_msec,
|
||||||
to_high_limit_path,
|
to_high_limit_path,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
url_basename,
|
url_basename,
|
||||||
variadic,
|
variadic,
|
||||||
|
@ -372,7 +374,7 @@ class YoutubeDL:
|
||||||
|
|
||||||
Progress hooks are guaranteed to be called at least twice
|
Progress hooks are guaranteed to be called at least twice
|
||||||
(with status "started" and "finished") if the processing is successful.
|
(with status "started" and "finished") if the processing is successful.
|
||||||
merge_output_format: Extension to use when merging formats.
|
merge_output_format: "/" separated list of extensions to use when merging formats.
|
||||||
final_ext: Expected final extension; used to detect when the file was
|
final_ext: Expected final extension; used to detect when the file was
|
||||||
already downloaded and converted
|
already downloaded and converted
|
||||||
fixup: Automatically correct known faults of the file.
|
fixup: Automatically correct known faults of the file.
|
||||||
|
@ -2088,14 +2090,13 @@ def _merge(formats_pair):
|
||||||
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
||||||
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
||||||
|
|
||||||
output_ext = self.params.get('merge_output_format')
|
output_ext = get_compatible_ext(
|
||||||
if not output_ext:
|
vcodecs=[f.get('vcodec') for f in video_fmts],
|
||||||
if the_only_video:
|
acodecs=[f.get('acodec') for f in audio_fmts],
|
||||||
output_ext = the_only_video['ext']
|
vexts=[f['ext'] for f in video_fmts],
|
||||||
elif the_only_audio and not video_fmts:
|
aexts=[f['ext'] for f in audio_fmts],
|
||||||
output_ext = the_only_audio['ext']
|
preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
|
||||||
else:
|
or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
|
||||||
output_ext = 'mkv'
|
|
||||||
|
|
||||||
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
|
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
|
||||||
|
|
||||||
|
@ -3067,33 +3068,9 @@ def existing_video_file(*filepaths):
|
||||||
return
|
return
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
|
||||||
def compatible_formats(formats):
|
|
||||||
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
|
|
||||||
video_formats = [format for format in formats if format.get('vcodec') != 'none']
|
|
||||||
audio_formats = [format for format in formats if format.get('acodec') != 'none']
|
|
||||||
if len(video_formats) > 2 or len(audio_formats) > 2:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check extension
|
|
||||||
exts = {format.get('ext') for format in formats}
|
|
||||||
COMPATIBLE_EXTS = (
|
|
||||||
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
|
|
||||||
{'webm'},
|
|
||||||
)
|
|
||||||
for ext_sets in COMPATIBLE_EXTS:
|
|
||||||
if ext_sets.issuperset(exts):
|
|
||||||
return True
|
|
||||||
# TODO: Check acodec/vcodec
|
|
||||||
return False
|
|
||||||
|
|
||||||
requested_formats = info_dict['requested_formats']
|
requested_formats = info_dict['requested_formats']
|
||||||
old_ext = info_dict['ext']
|
old_ext = info_dict['ext']
|
||||||
if self.params.get('merge_output_format') is None:
|
if self.params.get('merge_output_format') is None:
|
||||||
if not compatible_formats(requested_formats):
|
|
||||||
info_dict['ext'] = 'mkv'
|
|
||||||
self.report_warning(
|
|
||||||
'Requested formats are incompatible for merge and will be merged into mkv')
|
|
||||||
if (info_dict['ext'] == 'webm'
|
if (info_dict['ext'] == 'webm'
|
||||||
and info_dict.get('thumbnails')
|
and info_dict.get('thumbnails')
|
||||||
# check with type instead of pp_key, __name__, or isinstance
|
# check with type instead of pp_key, __name__, or isinstance
|
||||||
|
|
|
@ -228,7 +228,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
|
||||||
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
|
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
|
||||||
|
|
||||||
# Postprocessor formats
|
# Postprocessor formats
|
||||||
validate_in('merge output format', opts.merge_output_format, FFmpegMergerPP.SUPPORTED_EXTS)
|
validate_regex('merge output format', opts.merge_output_format,
|
||||||
|
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
|
||||||
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
|
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
|
||||||
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
|
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
|
||||||
validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
|
validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
|
||||||
|
|
|
@ -782,7 +782,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
'--merge-output-format',
|
'--merge-output-format',
|
||||||
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
||||||
help=(
|
help=(
|
||||||
'Container to use when merging formats (e.g. bestvideo+bestaudio). Ignored if no merge is required. '
|
'Containers that may be used when merging formats, separated by "/" (Eg: "mp4/mkv"). '
|
||||||
|
'Ignored if no merge is required. '
|
||||||
f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
|
f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
|
||||||
video_format.add_option(
|
video_format.add_option(
|
||||||
'--allow-unplayable-formats',
|
'--allow-unplayable-formats',
|
||||||
|
|
|
@ -3456,6 +3456,46 @@ def parse_codecs(codecs_str):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
|
||||||
|
assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
|
||||||
|
|
||||||
|
allow_mkv = not preferences or 'mkv' in preferences
|
||||||
|
|
||||||
|
if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
|
||||||
|
return 'mkv' # TODO: any other format allows this?
|
||||||
|
|
||||||
|
# TODO: All codecs supported by parse_codecs isn't handled here
|
||||||
|
COMPATIBLE_CODECS = {
|
||||||
|
'mp4': {
|
||||||
|
'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
|
||||||
|
'h264', 'aacl', # Set in ISM
|
||||||
|
},
|
||||||
|
'webm': {
|
||||||
|
'av1', 'vp9', 'vp8', 'opus', 'vrbs',
|
||||||
|
'vp9x', 'vp8x', # in the webm spec
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', ''))
|
||||||
|
vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0])
|
||||||
|
|
||||||
|
for ext in preferences or COMPATIBLE_CODECS.keys():
|
||||||
|
codec_set = COMPATIBLE_CODECS.get(ext, set())
|
||||||
|
if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
|
||||||
|
return ext
|
||||||
|
|
||||||
|
COMPATIBLE_EXTS = (
|
||||||
|
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
|
||||||
|
{'webm'},
|
||||||
|
)
|
||||||
|
for ext in preferences or vexts:
|
||||||
|
current_exts = {ext, *vexts, *aexts}
|
||||||
|
if ext == 'mkv' or current_exts == {ext} or any(
|
||||||
|
ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
|
||||||
|
return ext
|
||||||
|
return 'mkv' if allow_mkv else preferences[-1]
|
||||||
|
|
||||||
|
|
||||||
def urlhandle_detect_ext(url_handle):
|
def urlhandle_detect_ext(url_handle):
|
||||||
getheader = url_handle.headers.get
|
getheader = url_handle.headers.get
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue