From 4ff4ee6fd2243eacbea430801f089c77447fcd3c Mon Sep 17 00:00:00 2001 From: dakkar Date: Tue, 26 Dec 2023 18:16:15 +0000 Subject: [PATCH 1/2] allow more non-ascii in markup --- src/internal/parser.ts | 6 +++--- test/parser.ts | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/internal/parser.ts b/src/internal/parser.ts index 7c36803..36f87d6 100644 --- a/src/internal/parser.ts +++ b/src/internal/parser.ts @@ -12,8 +12,8 @@ import twemojiRegex from '@twemoji/parser/dist/lib/regex'; type ArgPair = { k: string, v: string | true }; type Args = Record; -const space = P.regexp(/[\u0020\u3000\t]/); -const alphaAndNum = P.regexp(/[a-z0-9]/i); +const space = P.regexp(/[\s--[\n\r]]/v); +const alphaAndNum = P.regexp(/\p{Letter}|\p{Number}/iu); const newLine = P.alt([P.crlf, P.cr, P.lf]); function seqOrText(parsers: P.Parser[]): P.Parser { @@ -579,7 +579,7 @@ export const language = P.createLanguage({ hashtag: r => { const mark = P.str('#'); const hashTagChar = P.seq([ - P.notMatch(P.alt([P.regexp(/[ \u3000\t., \u2063\t.,!?'"#:/[\]【】()「」()<>]/), space, newLine])), + P.notMatch(P.regexp(/[\s.,\u2063!?'"#:/[\]【】()「」()<>]/u)), P.char, ], 1); const innerItem: P.Parser = P.lazy(() => P.alt([ diff --git a/test/parser.ts b/test/parser.ts index e0a31b7..0d06cb8 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -565,6 +565,16 @@ hoge`; assert.deepStrictEqual(mfm.parse(input), output); }); + test('basic non-ascii', () => { + const input = '*aßc*'; + const output = [ + ITALIC([ + TEXT('aßc') + ]) + ]; + assert.deepStrictEqual(mfm.parse(input), output); + }); + test('ignore a italic syntax if the before char is neither a space nor an LF nor [^a-z0-9]i', () => { let input = 'before*abc*after'; let output: mfm.MfmNode[] = [TEXT('before*abc*after')]; @@ -605,6 +615,16 @@ hoge`; assert.deepStrictEqual(mfm.parse(input), output); }); + test('basic non-ascii', () => { + const input = '_abç_'; + const output = [ + ITALIC([ + TEXT('abç') + ]) + ]; + assert.deepStrictEqual(mfm.parse(input), output); + }); + test('ignore a italic syntax if the before char is neither a space nor an LF nor [^a-z0-9]i', () => { let input = 'before_abc_after'; let output: mfm.MfmNode[] = [TEXT('before_abc_after')]; @@ -640,6 +660,14 @@ hoge`; ])]; assert.deepStrictEqual(mfm.parse(input), output); }); + + test('basic non-ascii', () => { + const input = '~~föo~~'; + const output = [STRIKE([ + TEXT('föo') + ])]; + assert.deepStrictEqual(mfm.parse(input), output); + }); }); describe('inlineCode', () => { @@ -781,6 +809,12 @@ hoge`; assert.deepStrictEqual(mfm.parse(input), output); }); + test('basic non-ascii', () => { + const input = '#äbc'; + const output = [HASHTAG('äbc')]; + assert.deepStrictEqual(mfm.parse(input), output); + }); + test('with keycap number sign', () => { const input = '#️⃣abc123 #abc'; const output = [UNI_EMOJI('#️⃣'), TEXT('abc123 '), HASHTAG('abc')]; From 8818a2df9a816a587b2b9bb5e626771318a4da7c Mon Sep 17 00:00:00 2001 From: dakkar Date: Thu, 28 Dec 2023 08:45:03 +0000 Subject: [PATCH 2/2] test that I've not broken whitespace handling --- test/parser.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/parser.ts b/test/parser.ts index 0d06cb8..e4c1552 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -815,6 +815,17 @@ hoge`; assert.deepStrictEqual(mfm.parse(input), output); }); + test('newlines and whitespace', () => { + const input = 'before #abc\nafter #def\u3000foo #ghi\tbar #jkl'; + const output = [ + TEXT('before '), HASHTAG('abc'), + TEXT('\nafter '), HASHTAG('def'), + TEXT('\u3000foo '), HASHTAG('ghi'), + TEXT('\tbar '), HASHTAG('jkl'), + ]; + assert.deepStrictEqual(mfm.parse(input), output); + }); + test('with keycap number sign', () => { const input = '#️⃣abc123 #abc'; const output = [UNI_EMOJI('#️⃣'), TEXT('abc123 '), HASHTAG('abc')];