From 508827c0740a8c67e3c2ad42abf24649be767de4 Mon Sep 17 00:00:00 2001 From: dakkar Date: Sat, 13 Jul 2024 12:43:16 +0100 Subject: [PATCH] some small improvements to the new documentation simplify some bits, correct some terms, add some details --- src/api.ts | 16 +++++++ src/internal/core/index.ts | 89 ++++++++++++++++++++------------------ src/internal/parser.ts | 76 +++++++++++++++++--------------- 3 files changed, 102 insertions(+), 79 deletions(-) diff --git a/src/api.ts b/src/api.ts index 292d042..1d6a5f0 100644 --- a/src/api.ts +++ b/src/api.ts @@ -14,6 +14,8 @@ export function parse(input: string, opts: Partial<{ nestLimit: number; }> = {}) /** * Generates a MfmSimpleNode tree from the MFM string. + * + * "Simple" MFM only recognises text and emojis */ export function parseSimple(input: string): MfmSimpleNode[] { const nodes = simpleParser(input); @@ -22,6 +24,9 @@ export function parseSimple(input: string): MfmSimpleNode[] { /** * Generates a MFM string from the MfmNode tree. + * + * Notice that the result of `toString(parse(someString))` will very + * probably not be identical to `someString` */ export function toString(tree: MfmNode[]): string export function toString(node: MfmNode): string @@ -36,6 +41,17 @@ export function toString(node: MfmNode | MfmNode[]): string { /** * Inspects the MfmNode tree. + * + * This is the visitor pattern. Your `action` will be called on each + * node of the tree, as a depth-first pre-visit: + * + * ``` + * A + * +B + * |+C + * |+D + * +E + * ``` */ export function inspect(node: MfmNode, action: (node: MfmNode) => void): void export function inspect(nodes: MfmNode[], action: (node: MfmNode) => void): void diff --git a/src/internal/core/index.ts b/src/internal/core/index.ts index 3c5c0eb..2af40ac 100644 --- a/src/internal/core/index.ts +++ b/src/internal/core/index.ts @@ -3,7 +3,7 @@ // /** - * Holds the information from a successful parse. + * Holds the information from a successful parse: a parsed node, and the position where the parsing stopped. */ export type Success = { success: true; @@ -21,6 +21,9 @@ export type Failure = { success: false }; */ export type Result = Success | Failure; +/** + * Parser state: should we print what we're doing? are we inside a link label? how deep can we go? how deep are we? + */ interface State { trace?: boolean, linkLabel?: boolean, @@ -29,12 +32,12 @@ interface State { } /** - * The function used by a parser + * The function that actually does the parsing (of the given string, from the given position) */ export type ParserHandler = (input: string, index: number, state: State) => Result /** - * A function that always returns a parse success. + * Simplified constructor for `Success` * * @param index The index of the success. * @param value The value of the success. @@ -49,7 +52,7 @@ export function success(index: number, value: T): Success { } /** - * A function that always returns a parse failure. + * Simplified constructor for `Failure` * * @returns A {@link Failure} object. */ @@ -58,7 +61,7 @@ export function failure(): Failure { } /** - * The parser class. + * The parser class. Delegates most of the parsing to the `handler`, but provides combinators on top of it. */ export class Parser { public name?: string; @@ -85,8 +88,8 @@ export class Parser { } /** - * A method that maps the result of the parse with the provided function if successful, and returns a {@link Failure} - * otherwise. + * Returns a new parser, just like `this` parser, but the values of + * successful parses are passed through the given function. * * @param fn The function used to map the output of the parser. * @returns The result of the parser mapped with `fn`. @@ -102,8 +105,7 @@ export class Parser { } /** - * A method that returns the portion of the input that matches this {@link Parser Parser's} language and a {@link Failure} - * if the parse failed. + * Returns a new parser, just like `this` parser, but the result is just the matched input text. * * @returns The plaintext related to the successful parse, and a {@link Failure} if the parse failed. */ @@ -119,10 +121,10 @@ export class Parser { } /** - * A method that returns a {@link Parser} that matches at least `min` repetitions of this parser. + * Returns a new parser, that matches at least `min` repetitions of `this` parser. * * @param min The minimum amount of times this parse must succeed to return a {@link Success}. - * @returns A Parser that returns a {@link Success} object it matches enough times, and a {@link Failure} otherwise. + * @returns A Parser that returns a {@link Success} object if it matches enough times, and a {@link Failure} otherwise. */ many(min: number): Parser { return new Parser((input, index, state) => { @@ -145,12 +147,11 @@ export class Parser { } /** - * A method that returns a new {@link Parser} that matches at least `min` times, with each repetition separated - * by `separator`. + * Returns a new parser, matches at least `min` repetitions of `this` parser, separated by `separator`. * * @param separator The parser representing the separator that must appear between this parser's value. - * @param min The minimum amount of times the separator must appear. - * @returns A {@link Success} object if the minimum separator count is met, and a {@link Failure} otherwise. + * @param min The minimum amount of times this parse must succeed to return a {@link Success}. + * @returns A Parser that returns a {@link Success} object if it matches enough times, and a {@link Failure} otherwise. */ sep(separator: Parser, min: number): Parser { if (min < 1) { @@ -165,15 +166,21 @@ export class Parser { ).map(result => [result[0], ...result[1]]); } + /** + * Returns a new parser, whose result is the part of `this` parser's result selected by the given key. + * (so if `this` produces a success value like `{foo:1}`, `this.select('foo')` would result in `1`) + * + * @param key The value used to select a part of the result + * @returns The result of the parser subscripted by `key` + */ select(key: K): Parser { return this.map(v => v[key]); } /** - * A method that returns a new {@link Parser} that attempts to match, but returns a {@link Success} with the value `null` - * on failure. + * Returns a new parser, just like `this` parser, but returns a null success on failure. * - * @returns A {@link Success} object. + * @returns A Parser that always returns a {@link Success} object, maybe with a `null` inside. */ option(): Parser { return alt([ @@ -184,7 +191,7 @@ export class Parser { } /** - * A function that returns a {@link Parser} that succeeds if it matches the supplied string. + * Construct a {@link Parser} that matches the supplied string. * * @param value The string that the returned {@link Parser} checks for. * @returns A {@link Parser} that matches the supplied string. @@ -202,7 +209,7 @@ export function str(value: T): Parser { } /** - * A function that returns {@link Parser} that succeeds if the input matches the supplied regular expression. + * Construct a {@link Parser} that matches the supplied regular expression. * * @param pattern The regular expression that the returned {@link Parser} tries to match. * @returns A {@link Parser} that checks if the input matches the supplied regular expression. @@ -230,18 +237,12 @@ export type SeqParseResult = : unknown[]; /** - * A function that returns a {@link Parser} that goes through the parsers provided, in order, and checks that they all + * Construct a {@link Parser} that goes through the parsers provided, in order, and checks that they all * succeed. A {@link Failure} object is returned if any of the parsers fails. - * The value in the {@link Success} returned by the parser varies depending on the value of `select`: - * - * If select is `null`, then the array of the results of the supplied parsers is returned, otherwise the value - * in the array at the specified index is returned. * * @param parsers The array of {@link Parser Parsers} that are checked to see if it succeeds. - * @param select The index of the result array that is returned. * @returns A {@link Parser} that runs through the parsers in the order that they were provided and returns - * a value based on the state of `select` (the entire array if `null`, else the value held at the - * index specified by `select`). + * all the values they returned. */ export function seq[]>(...parsers: Parsers): Parser> { return new Parser((input, index, state) => { @@ -261,7 +262,7 @@ export function seq[]>(...parsers: Parsers): Par } /** - * A function that returns a {@link Parser} that goes through the parsers provided, in order, and checks if any succeed. + * Construct a {@link Parser} that goes through the parsers provided, in order, and checks if any succeed. * The returned parser produces the result of the first element of `parsers` to succeed, or a failure if none do. * * @param parsers The {@link Parser Parsers} that should be used. @@ -281,7 +282,7 @@ export function alt[]>(parsers: Parsers): Parser } /** - * A function that returns a constant {@link Parser}. + * Construct a constant {@link Parser} that always succeeds with the given value. * * @param value The value to be used in the returned {@link Success} object. * @returns A {@link Parser} that always returns a {@link Success} with the specified value. @@ -293,7 +294,7 @@ function succeeded(value: T): Parser { } /** - * A function that returns a {@link Parser} that inverts the result of the parser supplied. + * Construct a {@link Parser} that succeeds when the given parser fails, and vice versa. * * @param parser The {@link Parser} to be matched. * @returns A {@link Success} with the value `null` if the parser fails, or a {@link Failure} if it succeeds. @@ -308,8 +309,9 @@ export function notMatch(parser: Parser): Parser { } /** - * A function that returns a {@link Parser} that fails if `parserExcluded` succeeds, and returns the result of `parserIncluded` - * otherwise. + * Construct a {@link Parser} just like `parserIncluded`, but fails if + * `parserExcluded` succeeds. So it matches the "included" language, + * minus the "excluded" language. * * @param parserIncluded The {@link Parser} that should succeed * @param parserExcluded The {@link Parser} that should fail @@ -334,12 +336,11 @@ export const cr = str('\r'); export const lf = str('\n'); /** A {@link Parser} that matches the character sequence `\r\n`. */ export const crlf = str('\r\n'); -/** A {@link Parser} that matches for any valid new line sequences. */ +/** A {@link Parser} that matches any valid new line sequence. */ export const newline = alt([crlf, cr, lf]); /** - * A {@link Parser} that succeeds so long as it is not at the end of the input string and returns the value of - * the next character. + * A {@link Parser} that matches a character. */ export const char = new Parser((input, index, _state) => { if ((input.length - index) < 1) { @@ -350,8 +351,7 @@ export const char = new Parser((input, index, _state) => { }); /** - * A {@link Parser} that checks that the current position is the beginning of a line. For this parser to succeed, - * either the current index must be zero, or the previous character is a `\n` or `\r`. + * A {@link Parser} that checks that we are at the beginning of a line or of the input. */ export const lineBegin = new Parser((input, index, state) => { if (index === 0) { @@ -367,8 +367,7 @@ export const lineBegin = new Parser((input, index, state) => { }); /** - * A {@link Parser} that checks that the current position is the end of a line. For this parser to succeed, either the current - * index must be equal to the input length, or the current character is a `\n` or `\r`. + * A {@link Parser} that checks if we are at the end of a line or of the input. */ export const lineEnd = new Parser((input, index, state) => { if (index === input.length) { @@ -384,12 +383,16 @@ export const lineEnd = new Parser((input, index, state) => { }); /** - * A function that lazily loads the supplied {@link Parser} + * Lazily define a parser. This allows for self-recursive parsers (see for example the `url` and `hashtag` rules) * - * @param fn The {@link Parser} that the returned parser should use. - * @returns A {@link Parser} that checks using the supplied parser's {@link Parser.handler} + * @param fn A function that returns the actual {@link Parser} + * @returns A {@link Parser} that becomes the actual parser on its first use */ export function lazy(fn: () => Parser): Parser { + // Convert a parser generator into a parser: when `parser` is first + // invoked, it replaces its own handler with the real one, and calls + // it. On all subsequent invocations, the real handler will be + // called directly const parser: Parser = new Parser((input, index, state) => { parser.handler = fn().handler; return parser.handler(input, index, state); diff --git a/src/internal/parser.ts b/src/internal/parser.ts index 6b71024..3026aea 100644 --- a/src/internal/parser.ts +++ b/src/internal/parser.ts @@ -54,7 +54,7 @@ function seqOrText[]>(...parsers: Parsers): P. } /** - * A {@link P.Parser Parser} that succeeds if the supplied state is not a link label. + * A {@link P.Parser Parser} that succeeds if we're not inside a link label (the bit between `[]`) */ const notLinkLabel = new P.Parser((_input, index, state) => { return (!state.linkLabel) @@ -63,7 +63,7 @@ const notLinkLabel = new P.Parser((_input, index, state) => { }); /** - * A {@link P.Parser Parser} that succeeds if the current nest depth is less than the nest limit. + * A {@link P.Parser Parser} that succeeds if we can still nest nodes */ const nestable = new P.Parser((_input, index, state) => { return (state.depth < state.nestLimit) @@ -236,7 +236,7 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that only matches rules that can be done without disrupting text. + * A {@link P.Parser Parser} that only matches "inline" rules (those don't start paragraphs and the like) * * @param r The rules of SFM * @returns A {@link P.Parser Parser} that matches all inline rules. @@ -290,7 +290,7 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches single lines that begin with a `>` character. + * A {@link P.Parser Parser} that matches "block quoted" SFW (all lines begin with a `>` character). * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -412,7 +412,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `***` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -431,7 +431,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `**` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -450,7 +450,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is opened with `` and closed with ``. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -469,8 +469,7 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `__` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * A {@link P.Parser Parser} that matches text that is surrounded with the `__` mark. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -486,7 +485,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is opened with `` and closed with ``. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -506,7 +505,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is opened with `` and closed with ``. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -525,8 +524,8 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `*` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * A {@link P.Parser Parser} that matches text that is surrounded with the `*` mark. + * The opening mark must not be preceded by a letter or digit (see `foo*bar*` does not generate an italic `bar`). * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -553,8 +552,8 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `_` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * A {@link P.Parser Parser} that matches text that is surrounded with the `_` mark. + * The opening mark must not be preceded by a letter or digit (see `foo_bar_` does not generate an italic `bar`). * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -582,7 +581,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is opened with `` and closed with ``. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -601,8 +600,7 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `~~` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * A {@link P.Parser Parser} that matches inline content that is surrounded with the `~~` mark. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -620,7 +618,7 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches unicode emojis according to a regex. + * A {@link P.Parser Parser} that matches unicode emojis names * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -632,7 +630,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches multi-line text that is opened with `` and closed with ``. - * The marks do not have to be on their own lines and have no restrictions on placement. + * The marks do not have to be on their own lines. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -652,7 +650,14 @@ export const language = P.createLanguage({ ).select(2).map(result => M.PLAIN(result)); }, + /** + * A {@link P.Parser Parser} that matches a call to an "advanced SFM" function (i.e. `$[something ]`) + * + * @param r The rules of SFM + * @returns The {@link P.Parser Parser} for this rule + */ fn: r => { + // parse a function name const fnName = new P.Parser((input, index, state) => { const result = P.regexp(/[a-z0-9_]+/i).handler(input, index, state); if (!result.success) { @@ -660,6 +665,7 @@ export const language = P.createLanguage({ } return P.success(result.index, result.value); }); + // parse an argument: `foo` or `foo=10-bar` const arg: P.Parser = P.seq( P.regexp(/[a-z0-9_]+/i), P.seq( @@ -672,6 +678,7 @@ export const language = P.createLanguage({ v: (result[1] != null) ? result[1] : true, }; }); + // parse a sequence of arguments: `.foo,bar=13,etc` const args = P.seq( P.str('.'), arg.sep(P.str(','), 1), @@ -683,6 +690,7 @@ export const language = P.createLanguage({ return result; }); const fnClose = P.str(']'); + // the parser: `$[`, function name, maybe arguments, a space (required!), then 0 or more inline elements, `]` return seqOrText( P.str('$['), fnName, @@ -700,8 +708,8 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `` ` `` mark. - * The marks do not have to be on their own lines and have no restrictions on placement. + * A {@link P.Parser Parser} that matches text that is surrounded with the `` ` `` mark. + * Notice that `` `foo´ `bar´ `` is explicitly rejected by this parser. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -719,8 +727,7 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches single-line text opened with `\(` and closed with `\)`. - * The marks must be on the same line, but have no other restrictions on placement. + * A {@link P.Parser Parser} that matches text opened with `\(` and closed with `\)`. * * @param r The rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -741,7 +748,6 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches user mentions, which all must begin with `@{username}`, but only have to be followed by * `@{hostname}` if the user is on a different instance. - * The mention must be contained within one line, but it has no other restrictions on placement. * * @param r the rules of SFM * @returns The {@link P.Parser Parser} for this rule @@ -762,7 +768,7 @@ export const language = P.createLanguage({ if (!result.success) { return P.failure(); } - // check before (not mention) + // check before (not mention), so `foo@bar` is not a mention of the user `bar` const beforeStr = input.slice(0, index); if (/[a-z0-9]$/i.test(beforeStr)) { return P.failure(); @@ -815,8 +821,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches single word hashtags starting with the character `#`. * The contents of the hashtag are limited to alphanumeric characters, but cannot be made up of exclusively numbers. - * There are some characters that must be closed with another in order for the parse to succeed. - * There are no restrictions on the placement of hashtags. + * Handles some paired delimiters. * * @param r The rules of SFM * @returns A {@link P.Parser Parser} for this rule @@ -852,7 +857,7 @@ export const language = P.createLanguage({ if (!result.success) { return P.failure(); } - // check before + // check before, so `foo#bar` is not hashtag `bar` const beforeStr = input.slice(0, index); if (/[a-z0-9]$/i.test(beforeStr)) { return P.failure(); @@ -869,8 +874,7 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches single word emojis surrounded by the `:` mark. - * The marks must be on the same line, and alphanumeric characters cannot appear both in front - * of, and behind, the opening and closing marks respectively. + * `foo:bar:baz` is not a reference to the `bar` emoji * * @param r The rules of SFM * @returns A {@link P.Parser Parser} for this rule @@ -888,11 +892,10 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches inline links, which are made up of the label and the url. + * A {@link P.Parser Parser} that matches links, which are made up of the label and the url. * The label must be opened with either `?[` for silent links or `[` for normal links and closed with `]`. * The url must be opened with `(` and closed with `)`, and the contents of the url must follow either the * {@link language.url} or {@link language.urlAlt} rules. - * The inline link must be on a single line, but has no other restrictions on placement. * * @param r The rules of SFM * @returns A {@link P.Parser Parser} for this rule @@ -933,13 +936,14 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches the standard format for urls. + * A {@link P.Parser Parser} that matches something that vaguely looks like a URL (starts with `http://` or `https://`, contains valid URL characters) * * @param r The rules of SFM * @returns A {@link P.Parser Parser} for this rule */ url: () => { const urlChar = P.regexp(/[.,a-z0-9_/:%#@$&?!~=+-]/i); + // this requires that URL have balanced brackets, otherwise the link parser gets very confused const innerItem: P.Parser = P.lazy(() => P.alt([ P.seq( P.str('('), nest(innerItem, urlChar).many(0), P.str(')'), @@ -978,7 +982,8 @@ export const language = P.createLanguage({ }, /** - * A {@link P.Parser Parser} that matches an alternate form for urls, where it is opened with `<` and closed with `>`. + * A {@link P.Parser Parser} that matches something that vaguely looks like a URL, but surrounded by `<>` + * We have to use this in the second half of a link when the URL contains unmatched brackets (e.g. `[foo]()`) * * @param r The rules of SFM * @returns A {@link P.Parser Parser} for this rule @@ -1006,7 +1011,6 @@ export const language = P.createLanguage({ /** * A {@link P.Parser Parser} that matches single line text for a search query. The query must have either `[検索]` or * `[search]` at the end of the line. - * The query and button must be on the same line and there can be nothing else on that line. * * @param r The rules of SFM * @returns A {@link P.Parser Parser} for this rule