merge: allow more non-ascii in markup (#4)

This commit is contained in:
Marie 2023-12-28 12:54:56 +01:00
commit 3ab3ca1cfb
2 changed files with 48 additions and 3 deletions

View file

@ -12,8 +12,8 @@ import twemojiRegex from '@twemoji/parser/dist/lib/regex';
type ArgPair = { k: string, v: string | true };
type Args = Record<string, string | true>;
const space = P.regexp(/[\u0020\u3000\t]/);
const alphaAndNum = P.regexp(/[a-z0-9]/i);
const space = P.regexp(/[\s--[\n\r]]/v);
const alphaAndNum = P.regexp(/\p{Letter}|\p{Number}/iu);
const newLine = P.alt([P.crlf, P.cr, P.lf]);
function seqOrText(parsers: P.Parser<any>[]): P.Parser<any[] | string> {
@ -579,7 +579,7 @@ export const language = P.createLanguage({
hashtag: r => {
const mark = P.str('#');
const hashTagChar = P.seq([
P.notMatch(P.alt([P.regexp(/[ \u3000\t., \u2063\t.,!?'"#:/[\]【】()「」()<>]/), space, newLine])),
P.notMatch(P.regexp(/[\s.,\u2063!?'"#:/[\]()<>]/u)),
P.char,
], 1);
const innerItem: P.Parser<any> = P.lazy(() => P.alt([

View file

@ -565,6 +565,16 @@ hoge`;
assert.deepStrictEqual(mfm.parse(input), output);
});
test('basic non-ascii', () => {
const input = '*aßc*';
const output = [
ITALIC([
TEXT('aßc')
])
];
assert.deepStrictEqual(mfm.parse(input), output);
});
test('ignore a italic syntax if the before char is neither a space nor an LF nor [^a-z0-9]i', () => {
let input = 'before*abc*after';
let output: mfm.MfmNode[] = [TEXT('before*abc*after')];
@ -605,6 +615,16 @@ hoge`;
assert.deepStrictEqual(mfm.parse(input), output);
});
test('basic non-ascii', () => {
const input = '_abç_';
const output = [
ITALIC([
TEXT('abç')
])
];
assert.deepStrictEqual(mfm.parse(input), output);
});
test('ignore a italic syntax if the before char is neither a space nor an LF nor [^a-z0-9]i', () => {
let input = 'before_abc_after';
let output: mfm.MfmNode[] = [TEXT('before_abc_after')];
@ -640,6 +660,14 @@ hoge`;
])];
assert.deepStrictEqual(mfm.parse(input), output);
});
test('basic non-ascii', () => {
const input = '~~föo~~';
const output = [STRIKE([
TEXT('föo')
])];
assert.deepStrictEqual(mfm.parse(input), output);
});
});
describe('inlineCode', () => {
@ -781,6 +809,23 @@ hoge`;
assert.deepStrictEqual(mfm.parse(input), output);
});
test('basic non-ascii', () => {
const input = '#äbc';
const output = [HASHTAG('äbc')];
assert.deepStrictEqual(mfm.parse(input), output);
});
test('newlines and whitespace', () => {
const input = 'before #abc\nafter #def\u3000foo #ghi\tbar #jkl';
const output = [
TEXT('before '), HASHTAG('abc'),
TEXT('\nafter '), HASHTAG('def'),
TEXT('\u3000foo '), HASHTAG('ghi'),
TEXT('\tbar '), HASHTAG('jkl'),
];
assert.deepStrictEqual(mfm.parse(input), output);
});
test('with keycap number sign', () => {
const input = '#⃣abc123 #abc';
const output = [UNI_EMOJI('#️⃣'), TEXT('abc123 '), HASHTAG('abc')];