mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-04 23:35:04 +00:00
190 lines
4.8 KiB
Python
190 lines
4.8 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
"""
|
||
|
Simple parser for spec compliant toml files
|
||
|
|
||
|
A simple toml parser for files that comply with the spec.
|
||
|
Should only be used to parse `pyproject.toml` for `install_deps.py`.
|
||
|
|
||
|
IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
|
||
|
"""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import datetime
|
||
|
import json
|
||
|
import re
|
||
|
|
||
|
WS = r'(?:[\ \t]*)'
|
||
|
STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
|
||
|
SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
|
||
|
KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
|
||
|
EQUALS_RE = re.compile(rf'={WS}')
|
||
|
WS_RE = re.compile(WS)
|
||
|
|
||
|
_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
|
||
|
EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
|
||
|
|
||
|
LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
|
||
|
LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
|
||
|
|
||
|
|
||
|
def parse_key(value: str):
|
||
|
for match in SINGLE_KEY_RE.finditer(value):
|
||
|
if match[0][0] == '"':
|
||
|
yield json.loads(match[0])
|
||
|
elif match[0][0] == '\'':
|
||
|
yield match[0][1:-1]
|
||
|
else:
|
||
|
yield match[0]
|
||
|
|
||
|
|
||
|
def get_target(root: dict, paths: list[str], is_list=False):
|
||
|
target = root
|
||
|
|
||
|
for index, key in enumerate(paths, 1):
|
||
|
use_list = is_list and index == len(paths)
|
||
|
result = target.get(key)
|
||
|
if result is None:
|
||
|
result = [] if use_list else {}
|
||
|
target[key] = result
|
||
|
|
||
|
if isinstance(result, dict):
|
||
|
target = result
|
||
|
elif use_list:
|
||
|
target = {}
|
||
|
result.append(target)
|
||
|
else:
|
||
|
target = result[-1]
|
||
|
|
||
|
assert isinstance(target, dict)
|
||
|
return target
|
||
|
|
||
|
|
||
|
def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
|
||
|
index += 1
|
||
|
|
||
|
if match := ws_re.match(data, index):
|
||
|
index = match.end()
|
||
|
|
||
|
while data[index] != end:
|
||
|
index = yield True, index
|
||
|
|
||
|
if match := ws_re.match(data, index):
|
||
|
index = match.end()
|
||
|
|
||
|
if data[index] == ',':
|
||
|
index += 1
|
||
|
|
||
|
if match := ws_re.match(data, index):
|
||
|
index = match.end()
|
||
|
|
||
|
assert data[index] == end
|
||
|
yield False, index + 1
|
||
|
|
||
|
|
||
|
def parse_value(data: str, index: int):
|
||
|
if data[index] == '[':
|
||
|
result = []
|
||
|
|
||
|
indices = parse_enclosed(data, index, ']', LIST_WS_RE)
|
||
|
valid, index = next(indices)
|
||
|
while valid:
|
||
|
index, value = parse_value(data, index)
|
||
|
result.append(value)
|
||
|
valid, index = indices.send(index)
|
||
|
|
||
|
return index, result
|
||
|
|
||
|
if data[index] == '{':
|
||
|
result = {}
|
||
|
|
||
|
indices = parse_enclosed(data, index, '}', WS_RE)
|
||
|
valid, index = next(indices)
|
||
|
while valid:
|
||
|
valid, index = indices.send(parse_kv_pair(data, index, result))
|
||
|
|
||
|
return index, result
|
||
|
|
||
|
if match := STRING_RE.match(data, index):
|
||
|
return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
|
||
|
|
||
|
match = LEFTOVER_VALUE_RE.match(data, index)
|
||
|
assert match
|
||
|
value = match[0].strip()
|
||
|
for func in [
|
||
|
int,
|
||
|
float,
|
||
|
datetime.time.fromisoformat,
|
||
|
datetime.date.fromisoformat,
|
||
|
datetime.datetime.fromisoformat,
|
||
|
{'true': True, 'false': False}.get,
|
||
|
]:
|
||
|
try:
|
||
|
value = func(value)
|
||
|
break
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
return match.end(), value
|
||
|
|
||
|
|
||
|
def parse_kv_pair(data: str, index: int, target: dict):
|
||
|
match = KEY_RE.match(data, index)
|
||
|
if not match:
|
||
|
return None
|
||
|
|
||
|
*keys, key = parse_key(match[0])
|
||
|
|
||
|
match = EQUALS_RE.match(data, match.end())
|
||
|
assert match
|
||
|
index = match.end()
|
||
|
|
||
|
index, value = parse_value(data, index)
|
||
|
get_target(target, keys)[key] = value
|
||
|
return index
|
||
|
|
||
|
|
||
|
def parse_toml(data: str):
|
||
|
root = {}
|
||
|
target = root
|
||
|
|
||
|
index = 0
|
||
|
while True:
|
||
|
match = EXPRESSION_RE.search(data, index)
|
||
|
if not match:
|
||
|
break
|
||
|
|
||
|
if match.group('subtable'):
|
||
|
index = match.end()
|
||
|
path, is_list = match.group('path', 'is_list')
|
||
|
target = get_target(root, list(parse_key(path)), bool(is_list))
|
||
|
continue
|
||
|
|
||
|
index = parse_kv_pair(data, match.start(), target)
|
||
|
assert index is not None
|
||
|
|
||
|
return root
|
||
|
|
||
|
|
||
|
def main():
|
||
|
import argparse
|
||
|
from pathlib import Path
|
||
|
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument('infile', type=Path, help='The TOML file to read as input')
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
with args.infile.open('r', encoding='utf-8') as file:
|
||
|
data = file.read()
|
||
|
|
||
|
def default(obj):
|
||
|
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
|
||
|
return obj.isoformat()
|
||
|
|
||
|
print(json.dumps(parse_toml(data), default=default))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|