Improve output template internal formatting

* Allow slicing lists/strings using `field.start🔚step` * A field can also be used as offset like `field1+num+field2` * A default value can be given using `field|default` * Capture all format strings and set it to `None` if invalid. This prevents invalid fields from causing errors
2024-11-22 02:15:12 +00:00 · 2021-05-03 22:36:03 +05:30 · 2021-05-03 22:36:03 +05:30 · e625be0d10
commit e625be0d10
parent 12e73423f1
4 changed files with 71 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -842,13 +842,14 @@ # OUTPUT TEMPLATE
 It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.

 The field names themselves (the part inside the parenthesis) can also have some special formatting:
-1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s` or `%(upload_date>%Y-%m-%d)s`
-2. **Offset numbers**: Numeric fields can have an initial offset specified by using a `+` separator. Eg: `%(playlist_index+10)03d`. This can also be used in conjunction with the date-time formatting. Eg: `%(epoch+-3600>%H-%M-%S)s`
-3. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. Eg: `%(tags.0)s` or `%(subtitles.en.-1.ext)`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields
+1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields
+1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
+1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
+1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`

 To summarize, the general syntax for a field is:
 ```
-%(name[.keys][+offset][>strf])[flags][width][.precision][length]type
+%(name[.keys][addition][>strf][|default])[flags][width][.precision][length]type
 ```

 Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'`  will put the thumbnails in a folder with the same name as the video.
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -843,29 +843,67 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
        if sanitize is None:
            sanitize = lambda k, v: v

-        # Internal Formatting = name.key1.key2+number>strf
-        INTERNAL_FORMAT_RE = FORMAT_RE.format(
-            r'''(?P<final_key>
-                        (?P<fields>\w+(?:\.[-\w]+)*)
-                        (?:\+(?P<add>-?\d+(?:\.\d+)?))?
-                        (?:>(?P<strf_format>.+?))?
-            )''')
-        for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl):
-            mobj = mobj.groupdict()
-            # Object traversal
-            fields = mobj['fields'].split('.')
-            final_key = mobj['final_key']
-            value = traverse_dict(template_dict, fields)
-            # Offset the value
-            if mobj['add']:
-                value = float_or_none(value)
-                if value is not None:
-                    value = value + float(mobj['add'])
-            # Datetime formatting
-            if mobj['strf_format']:
-                value = strftime_or_none(value, mobj['strf_format'])
-            if mobj['type'] in 'crs' and value is not None:  # string
-                value = sanitize('%{}'.format(mobj['type']) % fields[-1], value)
+        EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
+        # Field is of the form key1.key2...
+        # where keys (except first) can be string, int or slice
+        FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
+        INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+            (?P<negate>-)?
+            (?P<fields>{0})
+            (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
+            (?:>(?P<strf_format>.+?))?
+            (?:\|(?P<default>.*?))?
+            $'''.format(FIELD_RE))
+        MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
+        MATH_FUNCTIONS = {
+            '+': float.__add__,
+            '-': float.__sub__,
+        }
+        for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
+            final_key = outer_mobj.group('key')
+            str_type = outer_mobj.group('type')
+            value = None
+            mobj = re.match(INTERNAL_FORMAT_RE, final_key)
+            if mobj is not None:
+                mobj = mobj.groupdict()
+                # Object traversal
+                fields = mobj['fields'].split('.')
+                value = traverse_dict(template_dict, fields)
+                # Negative
+                if mobj['negate']:
+                    value = float_or_none(value)
+                    if value is not None:
+                        value *= -1
+                # Do maths
+                if mobj['maths']:
+                    value = float_or_none(value)
+                    operator = None
+                    for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
+                        if item == '':
+                            value = None
+                        if value is None:
+                            break
+                        if operator:
+                            item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
+                            offset = float_or_none(item)
+                            if offset is None:
+                                offset = float_or_none(traverse_dict(template_dict, item.split('.')))
+                            try:
+                                value = operator(value, multiplier * offset)
+                            except (TypeError, ZeroDivisionError):
+                                value = None
+                            operator = None
+                        else:
+                            operator = MATH_FUNCTIONS[item]
+                # Datetime formatting
+                if mobj['strf_format']:
+                    value = strftime_or_none(value, mobj['strf_format'])
+                # Set default
+                if value is None and mobj['default'] is not None:
+                    value = mobj['default']
+            # Sanitize
+            if str_type in 'crs' and value is not None:  # string
+                value = sanitize('%{}'.format(str_type) % fields[-1], value)
            else:  # numeric
                numeric_fields.append(final_key)
                value = float_or_none(value)
--- a/yt_dlp/postprocessor/execafterdownload.py
+++ b/yt_dlp/postprocessor/execafterdownload.py
@ -24,7 +24,7 @@ def pp_key(cls):

    def parse_cmd(self, cmd, info):
        # If no %(key)s is found, replace {} for backard compatibility
-        if not re.search(FORMAT_RE.format(r'[-\w>.+]+'), cmd):
+        if not re.search(FORMAT_RE.format(r'[^)]*'), cmd):
            if '{}' not in cmd:
                cmd += ' {}'
            return cmd.replace('{}', compat_shlex_quote(info['filepath']))
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -6112,11 +6112,11 @@ def traverse_dict(dictn, keys, casesense=True):
                key = key.lower()
            dictn = dictn.get(key)
        elif isinstance(dictn, (list, tuple, compat_str)):
-            key, n = int_or_none(key), len(dictn)
-            if key is not None and -n <= key < n:
-                dictn = dictn[key]
+            if ':' in key:
+                key = slice(*map(int_or_none, key.split(':')))
            else:
-                dictn = None
+                key = int_or_none(key)
+            dictn = try_get(dictn, lambda x: x[key])
        else:
            return None
    return dictn