From fd647775e27e030ab17387c249e2ebeba68f8ff0 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 11 Feb 2024 15:14:42 +0100 Subject: [PATCH] [devscripts] `tomlparse`: Add makeshift toml parser Authored by: Grub4K --- devscripts/tomlparse.py | 189 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100755 devscripts/tomlparse.py diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py new file mode 100755 index 000000000..85ac4eef7 --- /dev/null +++ b/devscripts/tomlparse.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +""" +Simple parser for spec compliant toml files + +A simple toml parser for files that comply with the spec. +Should only be used to parse `pyproject.toml` for `install_deps.py`. + +IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! +""" + +from __future__ import annotations + +import datetime +import json +import re + +WS = r'(?:[\ \t]*)' +STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') +SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') +KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') +EQUALS_RE = re.compile(rf'={WS}') +WS_RE = re.compile(WS) + +_SUBTABLE = rf'(?P^\[(?P\[)?(?P{KEY_RE.pattern})\]\]?)' +EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) + +LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') +LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') + + +def parse_key(value: str): + for match in SINGLE_KEY_RE.finditer(value): + if match[0][0] == '"': + yield json.loads(match[0]) + elif match[0][0] == '\'': + yield match[0][1:-1] + else: + yield match[0] + + +def get_target(root: dict, paths: list[str], is_list=False): + target = root + + for index, key in enumerate(paths, 1): + use_list = is_list and index == len(paths) + result = target.get(key) + if result is None: + result = [] if use_list else {} + target[key] = result + + if isinstance(result, dict): + target = result + elif use_list: + target = {} + result.append(target) + else: + target = result[-1] + + assert isinstance(target, dict) + return target + + +def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + while data[index] != end: + index = yield True, index + + if match := ws_re.match(data, index): + index = match.end() + + if data[index] == ',': + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + assert data[index] == end + yield False, index + 1 + + +def parse_value(data: str, index: int): + if data[index] == '[': + result = [] + + indices = parse_enclosed(data, index, ']', LIST_WS_RE) + valid, index = next(indices) + while valid: + index, value = parse_value(data, index) + result.append(value) + valid, index = indices.send(index) + + return index, result + + if data[index] == '{': + result = {} + + indices = parse_enclosed(data, index, '}', WS_RE) + valid, index = next(indices) + while valid: + valid, index = indices.send(parse_kv_pair(data, index, result)) + + return index, result + + if match := STRING_RE.match(data, index): + return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] + + match = LEFTOVER_VALUE_RE.match(data, index) + assert match + value = match[0].strip() + for func in [ + int, + float, + datetime.time.fromisoformat, + datetime.date.fromisoformat, + datetime.datetime.fromisoformat, + {'true': True, 'false': False}.get, + ]: + try: + value = func(value) + break + except Exception: + pass + + return match.end(), value + + +def parse_kv_pair(data: str, index: int, target: dict): + match = KEY_RE.match(data, index) + if not match: + return None + + *keys, key = parse_key(match[0]) + + match = EQUALS_RE.match(data, match.end()) + assert match + index = match.end() + + index, value = parse_value(data, index) + get_target(target, keys)[key] = value + return index + + +def parse_toml(data: str): + root = {} + target = root + + index = 0 + while True: + match = EXPRESSION_RE.search(data, index) + if not match: + break + + if match.group('subtable'): + index = match.end() + path, is_list = match.group('path', 'is_list') + target = get_target(root, list(parse_key(path)), bool(is_list)) + continue + + index = parse_kv_pair(data, match.start(), target) + assert index is not None + + return root + + +def main(): + import argparse + from pathlib import Path + + parser = argparse.ArgumentParser() + parser.add_argument('infile', type=Path, help='The TOML file to read as input') + args = parser.parse_args() + + with args.infile.open('r', encoding='utf-8') as file: + data = file.read() + + def default(obj): + if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)): + return obj.isoformat() + + print(json.dumps(parse_toml(data), default=default)) + + +if __name__ == '__main__': + main()