some small improvements to the new documentation

simplify some bits, correct some terms, add some details
This commit is contained in:
dakkar 2024-07-13 12:43:16 +01:00
parent eb43eb0298
commit 508827c074
3 changed files with 102 additions and 79 deletions

View file

@ -14,6 +14,8 @@ export function parse(input: string, opts: Partial<{ nestLimit: number; }> = {})
/**
* Generates a MfmSimpleNode tree from the MFM string.
*
* "Simple" MFM only recognises text and emojis
*/
export function parseSimple(input: string): MfmSimpleNode[] {
const nodes = simpleParser(input);
@ -22,6 +24,9 @@ export function parseSimple(input: string): MfmSimpleNode[] {
/**
* Generates a MFM string from the MfmNode tree.
*
* Notice that the result of `toString(parse(someString))` will very
* probably not be identical to `someString`
*/
export function toString(tree: MfmNode[]): string
export function toString(node: MfmNode): string
@ -36,6 +41,17 @@ export function toString(node: MfmNode | MfmNode[]): string {
/**
* Inspects the MfmNode tree.
*
* This is the visitor pattern. Your `action` will be called on each
* node of the tree, as a depth-first pre-visit:
*
* ```
* A
* +B
* |+C
* |+D
* +E
* ```
*/
export function inspect(node: MfmNode, action: (node: MfmNode) => void): void
export function inspect(nodes: MfmNode[], action: (node: MfmNode) => void): void

View file

@ -3,7 +3,7 @@
//
/**
* Holds the information from a successful parse.
* Holds the information from a successful parse: a parsed node, and the position where the parsing stopped.
*/
export type Success<T> = {
success: true;
@ -21,6 +21,9 @@ export type Failure = { success: false };
*/
export type Result<T> = Success<T> | Failure;
/**
* Parser state: should we print what we're doing? are we inside a link label? how deep can we go? how deep are we?
*/
interface State {
trace?: boolean,
linkLabel?: boolean,
@ -29,12 +32,12 @@ interface State {
}
/**
* The function used by a parser
* The function that actually does the parsing (of the given string, from the given position)
*/
export type ParserHandler<T> = (input: string, index: number, state: State) => Result<T>
/**
* A function that always returns a parse success.
* Simplified constructor for `Success`
*
* @param index The index of the success.
* @param value The value of the success.
@ -49,7 +52,7 @@ export function success<T>(index: number, value: T): Success<T> {
}
/**
* A function that always returns a parse failure.
* Simplified constructor for `Failure`
*
* @returns A {@link Failure} object.
*/
@ -58,7 +61,7 @@ export function failure(): Failure {
}
/**
* The parser class.
* The parser class. Delegates most of the parsing to the `handler`, but provides combinators on top of it.
*/
export class Parser<T> {
public name?: string;
@ -85,8 +88,8 @@ export class Parser<T> {
}
/**
* A method that maps the result of the parse with the provided function if successful, and returns a {@link Failure}
* otherwise.
* Returns a new parser, just like `this` parser, but the values of
* successful parses are passed through the given function.
*
* @param fn The function used to map the output of the parser.
* @returns The result of the parser mapped with `fn`.
@ -102,8 +105,7 @@ export class Parser<T> {
}
/**
* A method that returns the portion of the input that matches this {@link Parser Parser's} language and a {@link Failure}
* if the parse failed.
* Returns a new parser, just like `this` parser, but the result is just the matched input text.
*
* @returns The plaintext related to the successful parse, and a {@link Failure} if the parse failed.
*/
@ -119,10 +121,10 @@ export class Parser<T> {
}
/**
* A method that returns a {@link Parser} that matches at least `min` repetitions of this parser.
* Returns a new parser, that matches at least `min` repetitions of `this` parser.
*
* @param min The minimum amount of times this parse must succeed to return a {@link Success}.
* @returns A Parser that returns a {@link Success} object it matches enough times, and a {@link Failure} otherwise.
* @returns A Parser that returns a {@link Success} object if it matches enough times, and a {@link Failure} otherwise.
*/
many(min: number): Parser<T[]> {
return new Parser((input, index, state) => {
@ -145,12 +147,11 @@ export class Parser<T> {
}
/**
* A method that returns a new {@link Parser} that matches at least `min` times, with each repetition separated
* by `separator`.
* Returns a new parser, matches at least `min` repetitions of `this` parser, separated by `separator`.
*
* @param separator The parser representing the separator that must appear between this parser's value.
* @param min The minimum amount of times the separator must appear.
* @returns A {@link Success} object if the minimum separator count is met, and a {@link Failure} otherwise.
* @param min The minimum amount of times this parse must succeed to return a {@link Success}.
* @returns A Parser that returns a {@link Success} object if it matches enough times, and a {@link Failure} otherwise.
*/
sep(separator: Parser<unknown>, min: number): Parser<T[]> {
if (min < 1) {
@ -165,15 +166,21 @@ export class Parser<T> {
).map(result => [result[0], ...result[1]]);
}
/**
* Returns a new parser, whose result is the part of `this` parser's result selected by the given key.
* (so if `this` produces a success value like `{foo:1}`, `this.select('foo')` would result in `1`)
*
* @param key The value used to select a part of the result
* @returns The result of the parser subscripted by `key`
*/
select<K extends keyof T>(key: K): Parser<T[K]> {
return this.map(v => v[key]);
}
/**
* A method that returns a new {@link Parser} that attempts to match, but returns a {@link Success} with the value `null`
* on failure.
* Returns a new parser, just like `this` parser, but returns a null success on failure.
*
* @returns A {@link Success} object.
* @returns A Parser that always returns a {@link Success} object, maybe with a `null` inside.
*/
option(): Parser<T | null> {
return alt([
@ -184,7 +191,7 @@ export class Parser<T> {
}
/**
* A function that returns a {@link Parser} that succeeds if it matches the supplied string.
* Construct a {@link Parser} that matches the supplied string.
*
* @param value The string that the returned {@link Parser} checks for.
* @returns A {@link Parser} that matches the supplied string.
@ -202,7 +209,7 @@ export function str<T extends string>(value: T): Parser<T> {
}
/**
* A function that returns {@link Parser} that succeeds if the input matches the supplied regular expression.
* Construct a {@link Parser} that matches the supplied regular expression.
*
* @param pattern The regular expression that the returned {@link Parser} tries to match.
* @returns A {@link Parser} that checks if the input matches the supplied regular expression.
@ -230,18 +237,12 @@ export type SeqParseResult<T extends unknown[]> =
: unknown[];
/**
* A function that returns a {@link Parser} that goes through the parsers provided, in order, and checks that they all
* Construct a {@link Parser} that goes through the parsers provided, in order, and checks that they all
* succeed. A {@link Failure} object is returned if any of the parsers fails.
* The value in the {@link Success} returned by the parser varies depending on the value of `select`:
*
* If select is `null`, then the array of the results of the supplied parsers is returned, otherwise the value
* in the array at the specified index is returned.
*
* @param parsers The array of {@link Parser Parsers} that are checked to see if it succeeds.
* @param select The index of the result array that is returned.
* @returns A {@link Parser} that runs through the parsers in the order that they were provided and returns
* a value based on the state of `select` (the entire array if `null`, else the value held at the
* index specified by `select`).
* all the values they returned.
*/
export function seq<Parsers extends Parser<unknown>[]>(...parsers: Parsers): Parser<SeqParseResult<Parsers>> {
return new Parser((input, index, state) => {
@ -261,7 +262,7 @@ export function seq<Parsers extends Parser<unknown>[]>(...parsers: Parsers): Par
}
/**
* A function that returns a {@link Parser} that goes through the parsers provided, in order, and checks if any succeed.
* Construct a {@link Parser} that goes through the parsers provided, in order, and checks if any succeed.
* The returned parser produces the result of the first element of `parsers` to succeed, or a failure if none do.
*
* @param parsers The {@link Parser Parsers} that should be used.
@ -281,7 +282,7 @@ export function alt<Parsers extends Parser<unknown>[]>(parsers: Parsers): Parser
}
/**
* A function that returns a constant {@link Parser}.
* Construct a constant {@link Parser} that always succeeds with the given value.
*
* @param value The value to be used in the returned {@link Success} object.
* @returns A {@link Parser} that always returns a {@link Success} with the specified value.
@ -293,7 +294,7 @@ function succeeded<T>(value: T): Parser<T> {
}
/**
* A function that returns a {@link Parser} that inverts the result of the parser supplied.
* Construct a {@link Parser} that succeeds when the given parser fails, and vice versa.
*
* @param parser The {@link Parser} to be matched.
* @returns A {@link Success} with the value `null` if the parser fails, or a {@link Failure} if it succeeds.
@ -308,8 +309,9 @@ export function notMatch(parser: Parser<unknown>): Parser<null> {
}
/**
* A function that returns a {@link Parser} that fails if `parserExcluded` succeeds, and returns the result of `parserIncluded`
* otherwise.
* Construct a {@link Parser} just like `parserIncluded`, but fails if
* `parserExcluded` succeeds. So it matches the "included" language,
* minus the "excluded" language.
*
* @param parserIncluded The {@link Parser} that should succeed
* @param parserExcluded The {@link Parser} that should fail
@ -334,12 +336,11 @@ export const cr = str('\r');
export const lf = str('\n');
/** A {@link Parser} that matches the character sequence `\r\n`. */
export const crlf = str('\r\n');
/** A {@link Parser} that matches for any valid new line sequences. */
/** A {@link Parser} that matches any valid new line sequence. */
export const newline = alt([crlf, cr, lf]);
/**
* A {@link Parser} that succeeds so long as it is not at the end of the input string and returns the value of
* the next character.
* A {@link Parser} that matches a character.
*/
export const char = new Parser((input, index, _state) => {
if ((input.length - index) < 1) {
@ -350,8 +351,7 @@ export const char = new Parser((input, index, _state) => {
});
/**
* A {@link Parser} that checks that the current position is the beginning of a line. For this parser to succeed,
* either the current index must be zero, or the previous character is a `\n` or `\r`.
* A {@link Parser} that checks that we are at the beginning of a line or of the input.
*/
export const lineBegin = new Parser((input, index, state) => {
if (index === 0) {
@ -367,8 +367,7 @@ export const lineBegin = new Parser((input, index, state) => {
});
/**
* A {@link Parser} that checks that the current position is the end of a line. For this parser to succeed, either the current
* index must be equal to the input length, or the current character is a `\n` or `\r`.
* A {@link Parser} that checks if we are at the end of a line or of the input.
*/
export const lineEnd = new Parser((input, index, state) => {
if (index === input.length) {
@ -384,12 +383,16 @@ export const lineEnd = new Parser((input, index, state) => {
});
/**
* A function that lazily loads the supplied {@link Parser}
* Lazily define a parser. This allows for self-recursive parsers (see for example the `url` and `hashtag` rules)
*
* @param fn The {@link Parser} that the returned parser should use.
* @returns A {@link Parser} that checks using the supplied parser's {@link Parser.handler}
* @param fn A function that returns the actual {@link Parser}
* @returns A {@link Parser} that becomes the actual parser on its first use
*/
export function lazy<T>(fn: () => Parser<T>): Parser<T> {
// Convert a parser generator into a parser: when `parser` is first
// invoked, it replaces its own handler with the real one, and calls
// it. On all subsequent invocations, the real handler will be
// called directly
const parser: Parser<T> = new Parser((input, index, state) => {
parser.handler = fn().handler;
return parser.handler(input, index, state);

View file

@ -54,7 +54,7 @@ function seqOrText<Parsers extends P.Parser<unknown>[]>(...parsers: Parsers): P.
}
/**
* A {@link P.Parser Parser} that succeeds if the supplied state is not a link label.
* A {@link P.Parser Parser} that succeeds if we're not inside a link label (the bit between `[]`)
*/
const notLinkLabel = new P.Parser((_input, index, state) => {
return (!state.linkLabel)
@ -63,7 +63,7 @@ const notLinkLabel = new P.Parser((_input, index, state) => {
});
/**
* A {@link P.Parser Parser} that succeeds if the current nest depth is less than the nest limit.
* A {@link P.Parser Parser} that succeeds if we can still nest nodes
*/
const nestable = new P.Parser((_input, index, state) => {
return (state.depth < state.nestLimit)
@ -236,7 +236,7 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that only matches rules that can be done without disrupting text.
* A {@link P.Parser Parser} that only matches "inline" rules (those don't start paragraphs and the like)
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} that matches all inline rules.
@ -290,7 +290,7 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches single lines that begin with a `>` character.
* A {@link P.Parser Parser} that matches "block quoted" SFW (all lines begin with a `>` character).
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -412,7 +412,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `***` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -431,7 +431,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `**` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -450,7 +450,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is opened with `<b>` and closed with `</b>`.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -469,8 +469,7 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `__` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* A {@link P.Parser Parser} that matches text that is surrounded with the `__` mark.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -486,7 +485,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is opened with `<small>` and closed with `</small>`.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -506,7 +505,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is opened with `<i>` and closed with `</i>`.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -525,8 +524,8 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `*` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* A {@link P.Parser Parser} that matches text that is surrounded with the `*` mark.
* The opening mark must not be preceded by a letter or digit (see `foo*bar*` does not generate an italic `bar`).
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -553,8 +552,8 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `_` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* A {@link P.Parser Parser} that matches text that is surrounded with the `_` mark.
* The opening mark must not be preceded by a letter or digit (see `foo_bar_` does not generate an italic `bar`).
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -582,7 +581,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is opened with `<s>` and closed with `</s>`.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -601,8 +600,7 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `~~` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* A {@link P.Parser Parser} that matches inline content that is surrounded with the `~~` mark.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -620,7 +618,7 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches unicode emojis according to a regex.
* A {@link P.Parser Parser} that matches unicode emojis names
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -632,7 +630,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches multi-line text that is opened with `<plain>` and closed with `</plain>`.
* The marks do not have to be on their own lines and have no restrictions on placement.
* The marks do not have to be on their own lines.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -652,7 +650,14 @@ export const language = P.createLanguage<TypeTable>({
).select(2).map(result => M.PLAIN(result));
},
/**
* A {@link P.Parser Parser} that matches a call to an "advanced SFM" function (i.e. `$[something ]`)
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
*/
fn: r => {
// parse a function name
const fnName = new P.Parser((input, index, state) => {
const result = P.regexp(/[a-z0-9_]+/i).handler(input, index, state);
if (!result.success) {
@ -660,6 +665,7 @@ export const language = P.createLanguage<TypeTable>({
}
return P.success(result.index, result.value);
});
// parse an argument: `foo` or `foo=10-bar`
const arg: P.Parser<ArgPair> = P.seq(
P.regexp(/[a-z0-9_]+/i),
P.seq(
@ -672,6 +678,7 @@ export const language = P.createLanguage<TypeTable>({
v: (result[1] != null) ? result[1] : true,
};
});
// parse a sequence of arguments: `.foo,bar=13,etc`
const args = P.seq(
P.str('.'),
arg.sep(P.str(','), 1),
@ -683,6 +690,7 @@ export const language = P.createLanguage<TypeTable>({
return result;
});
const fnClose = P.str(']');
// the parser: `$[`, function name, maybe arguments, a space (required!), then 0 or more inline elements, `]`
return seqOrText(
P.str('$['),
fnName,
@ -700,8 +708,8 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches multi-line text that is surrounded with the `` ` `` mark.
* The marks do not have to be on their own lines and have no restrictions on placement.
* A {@link P.Parser Parser} that matches text that is surrounded with the `` ` `` mark.
* Notice that `` `foo´ `bar´ `` is explicitly rejected by this parser.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -719,8 +727,7 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches single-line text opened with `\(` and closed with `\)`.
* The marks must be on the same line, but have no other restrictions on placement.
* A {@link P.Parser Parser} that matches text opened with `\(` and closed with `\)`.
*
* @param r The rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -741,7 +748,6 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches user mentions, which all must begin with `@{username}`, but only have to be followed by
* `@{hostname}` if the user is on a different instance.
* The mention must be contained within one line, but it has no other restrictions on placement.
*
* @param r the rules of SFM
* @returns The {@link P.Parser Parser} for this rule
@ -762,7 +768,7 @@ export const language = P.createLanguage<TypeTable>({
if (!result.success) {
return P.failure();
}
// check before (not mention)
// check before (not mention), so `foo@bar` is not a mention of the user `bar`
const beforeStr = input.slice(0, index);
if (/[a-z0-9]$/i.test(beforeStr)) {
return P.failure();
@ -815,8 +821,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches single word hashtags starting with the character `#`.
* The contents of the hashtag are limited to alphanumeric characters, but cannot be made up of exclusively numbers.
* There are some characters that must be closed with another in order for the parse to succeed.
* There are no restrictions on the placement of hashtags.
* Handles some paired delimiters.
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} for this rule
@ -852,7 +857,7 @@ export const language = P.createLanguage<TypeTable>({
if (!result.success) {
return P.failure();
}
// check before
// check before, so `foo#bar` is not hashtag `bar`
const beforeStr = input.slice(0, index);
if (/[a-z0-9]$/i.test(beforeStr)) {
return P.failure();
@ -869,8 +874,7 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches single word emojis surrounded by the `:` mark.
* The marks must be on the same line, and alphanumeric characters cannot appear both in front
* of, and behind, the opening and closing marks respectively.
* `foo:bar:baz` is not a reference to the `bar` emoji
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} for this rule
@ -888,11 +892,10 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches inline links, which are made up of the label and the url.
* A {@link P.Parser Parser} that matches links, which are made up of the label and the url.
* The label must be opened with either `?[` for silent links or `[` for normal links and closed with `]`.
* The url must be opened with `(` and closed with `)`, and the contents of the url must follow either the
* {@link language.url} or {@link language.urlAlt} rules.
* The inline link must be on a single line, but has no other restrictions on placement.
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} for this rule
@ -933,13 +936,14 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches the standard format for urls.
* A {@link P.Parser Parser} that matches something that vaguely looks like a URL (starts with `http://` or `https://`, contains valid URL characters)
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} for this rule
*/
url: () => {
const urlChar = P.regexp(/[.,a-z0-9_/:%#@$&?!~=+-]/i);
// this requires that URL have balanced brackets, otherwise the link parser gets very confused
const innerItem: P.Parser<unknown> = P.lazy(() => P.alt([
P.seq(
P.str('('), nest(innerItem, urlChar).many(0), P.str(')'),
@ -978,7 +982,8 @@ export const language = P.createLanguage<TypeTable>({
},
/**
* A {@link P.Parser Parser} that matches an alternate form for urls, where it is opened with `<` and closed with `>`.
* A {@link P.Parser Parser} that matches something that vaguely looks like a URL, but surrounded by `<>`
* We have to use this in the second half of a link when the URL contains unmatched brackets (e.g. `[foo](<http:://bar/)>)`)
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} for this rule
@ -1006,7 +1011,6 @@ export const language = P.createLanguage<TypeTable>({
/**
* A {@link P.Parser Parser} that matches single line text for a search query. The query must have either `[検索]` or
* `[search]` at the end of the line.
* The query and button must be on the same line and there can be nothing else on that line.
*
* @param r The rules of SFM
* @returns A {@link P.Parser Parser} for this rule