From 91c925d86a215a742b93ffaa7663c1457f44cbb5 Mon Sep 17 00:00:00 2001 From: Nick Frasser <1693461+nfrasser@users.noreply.github.com> Date: Tue, 3 Dec 2024 22:49:17 -0500 Subject: [PATCH] Treat object replacement character as whitespace Fixes #495 --- packages/linkifyjs/src/scanner.mjs | 3 +++ test/spec/linkifyjs/parser.test.mjs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/packages/linkifyjs/src/scanner.mjs b/packages/linkifyjs/src/scanner.mjs index b54fb18..4ebc285 100644 --- a/packages/linkifyjs/src/scanner.mjs +++ b/packages/linkifyjs/src/scanner.mjs @@ -13,6 +13,7 @@ import assign from './assign.mjs'; const NL = '\n'; // New line character const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others const EMOJI_JOINER = '\u200d'; // zero-width joiner +const OBJECT_REPLACEMENT = '\ufffc'; // whitespace placeholder that sometimes appears in rich text editors let tlds = null, utlds = null; // don't change so only have to be computed once @@ -112,9 +113,11 @@ export function init(customSchemes = []) { // Tokens of only non-newline whitespace are arbitrarily long // If any whitespace except newline, more whitespace! const Ws = tr(Start, re.SPACE, tk.WS, { [fsm.whitespace]: true }); + tt(Start, OBJECT_REPLACEMENT, Ws); tt(Start, NL, tk.NL, { [fsm.whitespace]: true }); tt(Ws, NL); // non-accepting state to avoid mixing whitespaces tr(Ws, re.SPACE, Ws); + tt(Ws, OBJECT_REPLACEMENT, Ws); // Emoji tokens. They are not grouped by the scanner except in cases where a // zero-width joiner is present diff --git a/test/spec/linkifyjs/parser.test.mjs b/test/spec/linkifyjs/parser.test.mjs index ce6f118..6fefb43 100644 --- a/test/spec/linkifyjs/parser.test.mjs +++ b/test/spec/linkifyjs/parser.test.mjs @@ -313,6 +313,11 @@ const tests = [ [Text, Url, Text, Url, Text], ['Link 1『', 'http://foo.com/blah_blah', '』 Link 2『', 'http://foo.com/blah_blah_(wikipedia)_(again)', '』'], ], + [ + 'https://google.com\ufffcthis', // object replacement character + [Url, Text], + ['https://google.com', '\ufffcthis'], + ], ]; describe('linkifyjs/parser#run()', () => {