From 909d24dd6dc835e1291596dda17f962a6ec34875 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 5 Nov 2020 21:05:36 +0530 Subject: [PATCH] Better Format Selection * Added options: --video-multistreams, --no-video-multistreams, --audio-multistreams, --no-audio-multistreams * New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio* * Added b,w,v,a as alias for best, worst, video and audio respectively in format selection * Changed video format sorting to show video only files and video+audio files together. --- README.md | 56 +++++++++++--- youtube_dlc/YoutubeDL.py | 126 +++++++++++++++++--------------- youtube_dlc/__init__.py | 2 + youtube_dlc/extractor/common.py | 4 +- youtube_dlc/extractor/vimeo.py | 1 + youtube_dlc/options.py | 16 ++++ 6 files changed, 135 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 485b5a15b..d9470eb18 100644 --- a/README.md +++ b/README.md @@ -398,6 +398,10 @@ ## Video Format Options: --no-format-sort-force Some fields have precedence over the user specified sort order, see "Sorting Formats" for more details (default) + --video-multistreams Allow multiple video streams to be merged into a single file (default) + --no-video-multistreams Only one video stream is downloaded for each output file + --audio-multistreams Allow multiple audio streams to be merged into a single file (default) + --no-audio-multistreams Only one audio stream is downloaded for each output file --all-formats Download all available video formats --prefer-free-formats Prefer free video formats unless a specific one is requested @@ -436,8 +440,8 @@ ## Authentication Options: ## Adobe Pass Options: --ap-mso MSO Adobe Pass multiple-system operator (TV - provider) identifier, use --ap-list-mso - for a list of available MSOs + provider) identifier, use --ap-list-mso for + a list of available MSOs --ap-username USERNAME Multiple-system operator account login --ap-password PASSWORD Multiple-system operator account password. If this option is left out, youtube-dlc @@ -711,12 +715,23 @@ # FORMAT SELECTION You can also use special names to select particular edge case formats: - - `best`: Select the best quality format represented by a single file with video and audio. - - `worst`: Select the worst quality format represented by a single file with video and audio. - - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. - - `worstvideo`: Select the worst quality video-only format. May not be available. - - `bestaudio`: Select the best quality audio only-format. May not be available. - - `worstaudio`: Select the worst quality audio only-format. May not be available. + - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio. + - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio. + + - `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` + - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` + + - `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]` + - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` + + - `bv*`, `bestvideo*`: Select the best quality format that contains video. It may also contain audio. Equivalent to `best*[vcodec!=none]` + - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]` + + - `ba`, `bestaudio`: Select the best quality audio-only format. Equivalent to `best*[vcodec=none]` + - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` + + - `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]` + - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. See [sorting formats](#sorting-formats) for more details. @@ -724,8 +739,7 @@ # FORMAT SELECTION If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. -You can merge the video and audio of multiple formats into a single file using `-f +` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. - +You can merge the video and audio of multiple formats into a single file using `-f ++...` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. If `--no-video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, if `--no-audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. ## Filtering Formats @@ -791,7 +805,7 @@ ## Sorting Formats All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers the smallest resolution format. Additionally, you can suffix a prefered value for the fields, seperated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `has_video`, `has_audio`, `extractor_preference`, `language_preference`, `quality` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used by youtube-dlc is: `tbr,filesize,vbr,height,width,protocol,vext,abr,aext,fps,filesize_approx,source_preference,format_id`. Note that the extractors may override this default order (currently no extractor does this), but not the user-provided order. +The fields `has_video`, `extractor_preference`, `language_preference`, `quality` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used by youtube-dlc is: `tbr,filesize,vbr,height,width,protocol,vext,abr,aext,fps,filesize_approx,source_preference,format_id`. Note that the extractors may override this default order (currently no extractor does this), but not the user-provided order. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all repects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. @@ -808,6 +822,19 @@ ## Format Selection examples Note that on Windows you may need to use double quotes instead of single. ```bash +# Download and merge the best best video-only format and the best audio-only format, +# or download the best combined format if video-only format is not available +$ youtube-dlc + +# Same as above +$ youtube-dlc -f 'bestvideo+bestaudio/best' + +# Download best format that contains video, +# and if it doesn't already have an audio stream, merge it with best audio-only format +$ youtube-dlc -f 'bestvideo*+bestaudio/best' --no-audio-multistreams + + + # Download the worst video available $ youtube-dlc -f 'worstvideo+worstaudio/worst' @@ -818,6 +845,7 @@ # Download the smallest video available $ youtube-dlc -S '+size,+bitrate' + # Download the best mp4 video available, or the best video if no mp4 available $ youtube-dlc -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/bestvideo+bestaudio / best' @@ -826,6 +854,7 @@ # (For video, mp4 > webm > flv. For audio, m4a > aac > mp3 ...) $ youtube-dlc -S 'ext' + # Download the best video available but no better than 480p, # or the worst video if there is no video under 480p $ youtube-dlc -f 'bestvideo[height<=480]+bestaudio/best[height<=480] / worstvideo+bestaudio/worst' @@ -841,6 +870,7 @@ # So this works correctly for vertical videos as well $ youtube-dlc -S 'res:480' + # Download the best video (that also has audio) but no bigger than 50 MB, # or the worst video (that also has audio) if there is no video under 50 MB $ youtube-dlc -f 'best[filesize<50M] / worst' @@ -853,6 +883,7 @@ # Download best video (that also has audio) that is closest in size to 50 MB $ youtube-dlc -f 'best' -S 'filesize~50M' + # Download best video available via direct link over HTTP/HTTPS protocol, # or the best video available via any protocol if there is no such video $ youtube-dlc -f '(bestvideo+bestaudio/best)[protocol^=http][protocol!*=dash] / bestvideo+bestaudio/best' @@ -862,12 +893,14 @@ # (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...) $ youtube-dlc -S 'protocol' + # Download the best video-only format and the best audio-only format without merging them # For this case, an output template should be used since # by default, bestvideo and bestaudio will have the same file name. $ youtube-dlc -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s' + # Download the best video with h264 codec, or the best video if there is no such video $ youtube-dlc -f '(bestvideo+bestaudio/best)[vcodec^=avc1] / bestvideo+bestaudio/best' @@ -893,6 +926,7 @@ # prefering larger framerate for formats with the same resolution $ youtube-dlc -S 'res:720,fps' + # Download the video with smallest resolution no worse than 480p, # or the video with the largest resolution available if there is no such video, # prefering better codec and then larger total bitrate for the same resolution diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index 2e74802ee..41a1ec724 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -165,6 +165,8 @@ class YoutubeDL(object): format: Video format code. see "FORMAT SELECTION" for more details. format_sort: How to sort the video formats. see "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. + allow_multiple_video_streams: Allow multiple video streams to be merged into a single file + allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names. trim_file_name: Limit length of filename (extension excluded). @@ -1201,6 +1203,9 @@ def syntax_error(note, start): GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) + allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True), + 'video': self.params.get('allow_multiple_video_streams', True)} + def _parse_filter(tokens): filter_parts = [] for type, string, start, _, _ in tokens: @@ -1299,7 +1304,7 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins return selectors def _build_selector_function(selector): - if isinstance(selector, list): + if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] def selector_function(ctx): @@ -1307,9 +1312,11 @@ def selector_function(ctx): for format in f(ctx): yield format return selector_function - elif selector.type == GROUP: + + elif selector.type == GROUP: # () selector_function = _build_selector_function(selector.selector) - elif selector.type == PICKFIRST: + + elif selector.type == PICKFIRST: # / fs = [_build_selector_function(s) for s in selector.selector] def selector_function(ctx): @@ -1318,62 +1325,54 @@ def selector_function(ctx): if picked_formats: return picked_formats return [] - elif selector.type == SINGLE: - format_spec = selector.selector - def selector_function(ctx): - formats = list(ctx['formats']) - if not formats: - return - if format_spec == 'all': - for f in formats: - yield f - elif format_spec in ['best', 'worst', None]: - format_idx = 0 if format_spec == 'worst' else -1 - audiovideo_formats = [ - f for f in formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - yield audiovideo_formats[format_idx] - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) we will fallback to best/worst - # {video,audio}-only format - elif ctx['incomplete_formats']: - yield formats[format_idx] - elif format_spec == 'bestaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[-1] - elif format_spec == 'worstaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[0] - elif format_spec == 'bestvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[-1] - elif format_spec == 'worstvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[0] + elif selector.type == SINGLE: # atom + format_spec = selector.selector if selector.selector is not None else 'best' + + if format_spec == 'all': + def selector_function(ctx): + formats = list(ctx['formats']) + if formats: + for f in formats: + yield f + + else: + format_fallback = False + format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec) + if format_spec_obj is not None: + format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1 + format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False + not_format_type = 'v' if format_type == 'a' else 'a' + format_modified = format_spec_obj.group(3) is not None + + format_fallback = not format_type and not format_modified # for b, w + filter_f = ((lambda f: f.get(format_type + 'codec') != 'none') + if format_type and format_modified # bv*, ba*, wv*, wa* + else (lambda f: f.get(not_format_type + 'codec') == 'none') + if format_type # bv, ba, wv, wa + else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') + if not format_modified # b, w + else None) # b*, w* else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] - if format_spec in extensions: - filter_f = lambda f: f['ext'] == format_spec - else: - filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f, formats)) + format_idx = -1 + filter_f = ((lambda f: f.get('ext') == format_spec) + if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension + else (lambda f: f.get('format_id') == format_spec)) # id + + def selector_function(ctx): + formats = list(ctx['formats']) + if not formats: + return + matches = list(filter(filter_f, formats)) if filter_f is not None else formats if matches: - yield matches[-1] - elif selector.type == MERGE: + yield matches[format_idx] + elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']): + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + yield formats[format_idx] + + elif selector.type == MERGE: # + def _merge(formats_pair): format_1, format_2 = formats_pair @@ -1381,6 +1380,18 @@ def _merge(formats_pair): formats_info.extend(format_1.get('requested_formats', (format_1,))) formats_info.extend(format_2.get('requested_formats', (format_2,))) + if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: + get_no_more = {"video": False, "audio": False} + for (i, fmt_info) in enumerate(formats_info): + for aud_vid in ["audio", "video"]: + if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': + if get_no_more[aud_vid]: + formats_info.pop(i) + get_no_more[aud_vid] = True + + if len(formats_info) == 1: + return formats_info[0] + video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] @@ -1717,6 +1728,7 @@ def is_wellformed(f): expected=True) if download: + self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download])) if len(formats_to_download) > 1: self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) for format in formats_to_download: @@ -2308,7 +2320,7 @@ def list_formats(self, info_dict): for f in formats if f.get('preference') is None or f['preference'] >= -1000] # if len(formats) > 1: - # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' + # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)' header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index 40fdd8d74..df07016e1 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -354,6 +354,8 @@ def parse_retries(retries): 'format': opts.format, 'format_sort': opts.format_sort, 'format_sort_force': opts.format_sort_force, + 'allow_multiple_video_streams': opts.allow_multiple_video_streams, + 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, 'listformats': opts.listformats, 'outtmpl': outtmpl, 'autonumber_size': opts.autonumber_size, diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py index 2d8d74793..1ffe37bde 100644 --- a/youtube_dlc/extractor/common.py +++ b/youtube_dlc/extractor/common.py @@ -1358,7 +1358,7 @@ def _form_hidden_inputs(self, form_id, html): class FormatSort: regex = r' *((?P\+)?(?P[a-zA-Z0-9_]+)((?P[~:])(?P.*?))?)? *$' - default = ('hidden', 'has_video', 'has_audio', 'extractor', 'lang', 'quality', + default = ('hidden', 'has_video', 'extractor', 'lang', 'quality', 'tbr', 'filesize', 'vbr', 'height', 'width', 'protocol', 'vext', 'abr', 'aext', 'fps', 'filesize_approx', 'source_preference', 'format_id') @@ -1378,7 +1378,7 @@ class FormatSort: 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, 'extractor_preference': {'priority': True, 'type': 'extractor'}, 'has_video': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, - 'has_audio': {'priority': True, 'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, + 'has_audio': {'priority': False, 'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, 'language_preference': {'priority': True, 'convert': 'ignore'}, 'quality': {'priority': True, 'convert': 'float_none'}, 'filesize': {'convert': 'bytes'}, diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py index 21f0620be..2fc42bbae 100644 --- a/youtube_dlc/extractor/vimeo.py +++ b/youtube_dlc/extractor/vimeo.py @@ -181,6 +181,7 @@ def _parse_config(self, config, video_id): 'preference': 1, }) + # Reduntant code! This is already done in common.py # for f in formats: # if f.get('vcodec') == 'none': # f['preference'] = -50 diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index bbec33678..44eba3e9c 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -415,6 +415,22 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser): help=( 'Some fields have precedence over the user specified sort order (default), ' 'see "Sorting Formats" for more details')) + video_format.add_option( + '--video-multistreams', + action='store_true', dest='allow_multiple_video_streams', default=True, + help='Allow multiple video streams to be merged into a single file (default)') + video_format.add_option( + '--no-video-multistreams', + action='store_false', dest='allow_multiple_video_streams', + help='Only one video stream is downloaded for each output file') + video_format.add_option( + '--audio-multistreams', + action='store_true', dest='allow_multiple_audio_streams', default=True, + help='Allow multiple audio streams to be merged into a single file (default)') + video_format.add_option( + '--no-audio-multistreams', + action='store_false', dest='allow_multiple_audio_streams', + help='Only one audio stream is downloaded for each output file') video_format.add_option( '--all-formats', action='store_const', dest='format', const='all',