From 223e317b8b9bc055fcef3bd67cf96e49f57f8791 Mon Sep 17 00:00:00 2001 From: Nick Frasser <1693461+nfrasser@users.noreply.github.com> Date: Tue, 21 Nov 2023 22:24:54 -0500 Subject: [PATCH] =?UTF-8?q?Bracket=20parsing=20refactor=20and=20support=20?= =?UTF-8?q?for=20=E3=80=8C=E3=80=8D=E3=80=8E=E3=80=8F=EF=BC=9C=EF=BC=9E=20?= =?UTF-8?q?brackets=20(#463)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cleaner brace/bracket/parens state transition definitions * Add additional brace kinds * Check that secrets are defined before running Browserify tests so it does not fail --- packages/linkifyjs/src/parser.js | 161 ++++------- packages/linkifyjs/src/scanner.js | 42 ++- packages/linkifyjs/src/text.js | 25 +- test/run.sh | 6 +- test/spec/linkifyjs/parser.test.js | 444 +++++++++++++++-------------- 5 files changed, 347 insertions(+), 331 deletions(-) diff --git a/packages/linkifyjs/src/parser.js b/packages/linkifyjs/src/parser.js index 1f4d6fd8..7fc15575 100644 --- a/packages/linkifyjs/src/parser.js +++ b/packages/linkifyjs/src/parser.js @@ -43,7 +43,7 @@ export function init({ groups }) { tk.SLASH, tk.SYM, tk.TILDE, - tk.UNDERSCORE + tk.UNDERSCORE, ]); // Types of tokens that can follow a URL and be part of the query string @@ -51,23 +51,29 @@ export function init({ groups }) { // Characters that cannot appear in the URL at all should be excluded const qsNonAccepting = [ tk.APOSTROPHE, - tk.CLOSEANGLEBRACKET, - tk.CLOSEBRACE, - tk.CLOSEBRACKET, - tk.CLOSEPAREN, - tk.FULLWIDTH_CLOSEPAREN, tk.COLON, tk.COMMA, tk.DOT, tk.EXCLAMATION, + tk.QUERY, + tk.QUOTE, + tk.SEMI, tk.OPENANGLEBRACKET, + tk.CLOSEANGLEBRACKET, tk.OPENBRACE, + tk.CLOSEBRACE, + tk.CLOSEBRACKET, tk.OPENBRACKET, tk.OPENPAREN, - tk.FULLWIDTH_OPENPAREN, - tk.QUERY, - tk.QUOTE, - tk.SEMI + tk.CLOSEPAREN, + tk.FULLWIDTHLEFTPAREN, + tk.FULLWIDTHRIGHTPAREN, + tk.LEFTCORNERBRACKET, + tk.RIGHTCORNERBRACKET, + tk.LEFTWHITECORNERBRACKET, + tk.RIGHTWHITECORNERBRACKET, + tk.FULLWIDTHLESSTHAN, + tk.FULLWIDTHGREATERTHAN, ]; // For addresses without the mailto prefix @@ -79,11 +85,11 @@ export function init({ groups }) { tk.BACKSLASH, tk.BACKTICK, tk.CARET, - tk.CLOSEBRACE, tk.DOLLAR, tk.EQUALS, tk.HYPHEN, tk.OPENBRACE, + tk.CLOSEBRACE, tk.PERCENT, tk.PIPE, tk.PLUS, @@ -92,7 +98,7 @@ export function init({ groups }) { tk.SLASH, tk.SYM, tk.TILDE, - tk.UNDERSCORE + tk.UNDERSCORE, ]; // The universal starting state. @@ -104,7 +110,9 @@ export function init({ groups }) { ta(Localpart, localpartAccepting, Localpart); ta(Localpart, groups.domain, Localpart); - const Domain = makeState(), Scheme = makeState(), SlashScheme = makeState(); + const Domain = makeState(), + Scheme = makeState(), + SlashScheme = makeState(); ta(Start, groups.domain, Domain); // parsed string ends with a potential domain name (A) ta(Start, groups.scheme, Scheme); // e.g., 'mailto' ta(Start, groups.slashscheme, SlashScheme); // e.g., 'http' @@ -144,7 +152,7 @@ export function init({ groups }) { // Final possible email states const EmailColon = tt(Email, tk.COLON); // URL followed by colon (potential port number here) - /*const EmailColonPort = */ta(EmailColon, groups.numeric, mtk.Email); // URL followed by colon and port numner + /*const EmailColonPort = */ ta(EmailColon, groups.numeric, mtk.Email); // URL followed by colon and port number // Account for dots and hyphens. Hyphens are usually parts of domain names // (but not TLDs) @@ -206,86 +214,46 @@ export function init({ groups }) { ta(UriPrefix, qsAccepting, Url); tt(UriPrefix, tk.SLASH, Url); - // URL, followed by an opening bracket - const UrlOpenbrace = tt(Url, tk.OPENBRACE); // URL followed by { - const UrlOpenbracket = tt(Url, tk.OPENBRACKET); // URL followed by [ - const UrlOpenanglebracket = tt(Url, tk.OPENANGLEBRACKET); // URL followed by < - const UrlOpenparen = tt(Url, tk.OPENPAREN); // URL followed by ( - const UrlFullwidthOpenparen = tt(Url, tk.FULLWIDTH_OPENPAREN); // URL followed by ( - - tt(UrlNonaccept, tk.OPENBRACE, UrlOpenbrace); - tt(UrlNonaccept, tk.OPENBRACKET, UrlOpenbracket); - tt(UrlNonaccept, tk.OPENANGLEBRACKET, UrlOpenanglebracket); - tt(UrlNonaccept, tk.OPENPAREN, UrlOpenparen); - tt(UrlNonaccept, tk.FULLWIDTH_OPENPAREN, UrlFullwidthOpenparen); - - // Closing bracket component. This character WILL be included in the URL - tt(UrlOpenbrace, tk.CLOSEBRACE, Url); - tt(UrlOpenbracket, tk.CLOSEBRACKET, Url); - tt(UrlOpenanglebracket, tk.CLOSEANGLEBRACKET, Url); - tt(UrlOpenparen, tk.CLOSEPAREN, Url); - tt(UrlFullwidthOpenparen, tk.FULLWIDTH_CLOSEPAREN, Url); - tt(UrlOpenbrace, tk.CLOSEBRACE, Url); - - // URL that beings with an opening bracket, followed by a symbols. - // Note that the final state can still be `UrlOpenbrace` (if the URL only - // has a single opening bracket for some reason). - const UrlOpenbraceQ = makeState(mtk.Url); // URL followed by { and some symbols that the URL can end it - const UrlOpenbracketQ = makeState(mtk.Url); // URL followed by [ and some symbols that the URL can end it - const UrlOpenanglebracketQ = makeState(mtk.Url); // URL followed by < and some symbols that the URL can end it - const UrlOpenparenQ = makeState(mtk.Url); // URL followed by ( and some symbols that the URL can end it - const UrlFullwidthOpenparenQ = makeState(mtk.Url); // URL followed by ( and some symbols that the URL can end it - ta(UrlOpenbrace, qsAccepting, UrlOpenbraceQ); - ta(UrlOpenbracket, qsAccepting, UrlOpenbracketQ); - ta(UrlOpenanglebracket, qsAccepting, UrlOpenanglebracketQ); - ta(UrlOpenparen, qsAccepting, UrlOpenparenQ); - ta(UrlFullwidthOpenparen, qsAccepting, UrlFullwidthOpenparenQ); - - const UrlOpenbraceSyms = makeState(); // UrlOpenbrace followed by some symbols it cannot end it - const UrlOpenbracketSyms = makeState(); // UrlOpenbracketQ followed by some symbols it cannot end it - const UrlOpenanglebracketSyms = makeState(); // UrlOpenanglebracketQ followed by some symbols it cannot end it - const UrlOpenparenSyms = makeState(); // UrlOpenparenQ followed by some symbols it cannot end it - const UrlFullwidthOpenparenSyms = makeState(); // UrlFullwidthOpenparenQ followed by some symbols it cannot end it - ta(UrlOpenbrace, qsNonAccepting); - ta(UrlOpenbracket, qsNonAccepting); - ta(UrlOpenanglebracket, qsNonAccepting); - ta(UrlOpenparen, qsNonAccepting); - ta(UrlFullwidthOpenparen, qsNonAccepting); - - // URL that begins with an opening bracket, followed by some symbols - ta(UrlOpenbraceQ, qsAccepting, UrlOpenbraceQ); - ta(UrlOpenbracketQ, qsAccepting, UrlOpenbracketQ); - ta(UrlOpenanglebracketQ, qsAccepting, UrlOpenanglebracketQ); - ta(UrlOpenparenQ, qsAccepting, UrlOpenparenQ); - ta(UrlFullwidthOpenparenQ, qsAccepting, UrlFullwidthOpenparenQ); - ta(UrlOpenbraceQ, qsNonAccepting, UrlOpenbraceQ); - ta(UrlOpenbracketQ, qsNonAccepting, UrlOpenbracketQ); - ta(UrlOpenanglebracketQ, qsNonAccepting, UrlOpenanglebracketQ); - ta(UrlOpenparenQ, qsNonAccepting, UrlOpenparenQ); - ta(UrlFullwidthOpenparenQ, qsAccepting, UrlFullwidthOpenparenQ); - - ta(UrlOpenbraceSyms, qsAccepting, UrlOpenbraceSyms); - ta(UrlOpenbracketSyms, qsAccepting, UrlOpenbracketQ); - ta(UrlOpenanglebracketSyms, qsAccepting, UrlOpenanglebracketQ); - ta(UrlOpenparenSyms, qsAccepting, UrlOpenparenQ); - ta(UrlFullwidthOpenparenSyms, qsAccepting, UrlFullwidthOpenparenQ); - ta(UrlOpenbraceSyms, qsNonAccepting, UrlOpenbraceSyms); - ta(UrlOpenbracketSyms, qsNonAccepting, UrlOpenbracketSyms); - ta(UrlOpenanglebracketSyms, qsNonAccepting, UrlOpenanglebracketSyms); - ta(UrlOpenparenSyms, qsNonAccepting, UrlOpenparenSyms); - ta(UrlFullwidthOpenparenSyms, qsAccepting, UrlFullwidthOpenparenSyms); - - // Close brace/bracket to become regular URL - tt(UrlOpenbracketQ, tk.CLOSEBRACKET, Url); - tt(UrlOpenanglebracketQ, tk.CLOSEANGLEBRACKET, Url); - tt(UrlOpenparenQ, tk.CLOSEPAREN, Url); - tt(UrlFullwidthOpenparenQ, tk.FULLWIDTH_CLOSEPAREN, Url); - tt(UrlOpenbraceQ, tk.CLOSEBRACE, Url); - tt(UrlOpenbracketSyms, tk.CLOSEBRACKET, Url); - tt(UrlOpenanglebracketSyms, tk.CLOSEANGLEBRACKET, Url); - tt(UrlFullwidthOpenparenSyms, tk.FULLWIDTH_CLOSEPAREN, Url); - tt(UrlOpenbraceSyms, tk.CLOSEPAREN, Url); - tt(UrlOpenbraceSyms, tk.FULLWIDTH_CLOSEPAREN, Url); + const bracketPairs = [ + [tk.OPENBRACE, tk.CLOSEBRACE], // {} + [tk.OPENBRACKET, tk.CLOSEBRACKET], // [] + [tk.OPENPAREN, tk.CLOSEPAREN], // () + [tk.OPENANGLEBRACKET, tk.CLOSEANGLEBRACKET], // <> + [tk.FULLWIDTHLEFTPAREN, tk.FULLWIDTHRIGHTPAREN], // () + [tk.LEFTCORNERBRACKET, tk.RIGHTCORNERBRACKET], // 「」 + [tk.LEFTWHITECORNERBRACKET, tk.RIGHTWHITECORNERBRACKET], // 『』 + [tk.FULLWIDTHLESSTHAN, tk.FULLWIDTHGREATERTHAN], // <> + ]; + + for (let i = 0; i < bracketPairs.length; i++) { + const [OPEN, CLOSE] = bracketPairs[i]; + const UrlOpen = tt(Url, OPEN); // URL followed by open bracket + + // Continue not accepting for open brackets + tt(UrlNonaccept, OPEN, UrlOpen); + + // Closing bracket component. This character WILL be included in the URL + tt(UrlOpen, CLOSE, Url); + + // URL that beings with an opening bracket, followed by a symbols. + // Note that the final state can still be `UrlOpen` (if the URL has a + // single opening bracket for some reason). + const UrlOpenQ = makeState(mtk.Url); + ta(UrlOpen, qsAccepting, UrlOpenQ); + + const UrlOpenSyms = makeState(); // UrlOpen followed by some symbols it cannot end it + ta(UrlOpen, qsNonAccepting); + + // URL that begins with an opening bracket, followed by some symbols + ta(UrlOpenQ, qsAccepting, UrlOpenQ); + ta(UrlOpenQ, qsNonAccepting, UrlOpenSyms); + ta(UrlOpenSyms, qsAccepting, UrlOpenQ); + ta(UrlOpenSyms, qsNonAccepting, UrlOpenSyms); + + // Close brace/bracket to become regular URL + tt(UrlOpenQ, CLOSE, Url); + tt(UrlOpenSyms, CLOSE, Url); + } tt(Start, tk.LOCALHOST, DomainDotTld); // localhost is a valid URL state tt(Start, tk.NL, mtk.Nl); // single new line @@ -323,10 +291,7 @@ export function run(start, input, tokens) { textTokens.push(tokens[cursor++]); } - while (cursor < len && ( - nextState = secondState || state.go(tokens[cursor].t)) - ) { - + while (cursor < len && (nextState = secondState || state.go(tokens[cursor].t))) { // Get the next state secondState = null; state = nextState; diff --git a/packages/linkifyjs/src/scanner.js b/packages/linkifyjs/src/scanner.js index 80d650a5..7de67879 100644 --- a/packages/linkifyjs/src/scanner.js +++ b/packages/linkifyjs/src/scanner.js @@ -14,7 +14,8 @@ const NL = '\n'; // New line character const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others const EMOJI_JOINER = '\u200d'; // zero-width joiner -let tlds = null, utlds = null; // don't change so only have to be computed once +let tlds = null, + utlds = null; // don't change so only have to be computed once /** * Scanner output token: @@ -55,15 +56,21 @@ export function init(customSchemes = []) { // States for special URL symbols that accept immediately after start tt(Start, "'", tk.APOSTROPHE); tt(Start, '{', tk.OPENBRACE); - tt(Start, '[', tk.OPENBRACKET); - tt(Start, '<', tk.OPENANGLEBRACKET); - tt(Start, '(', tk.OPENPAREN); - tt(Start, '(', tk.FULLWIDTH_OPENPAREN); tt(Start, '}', tk.CLOSEBRACE); + tt(Start, '[', tk.OPENBRACKET); tt(Start, ']', tk.CLOSEBRACKET); - tt(Start, '>', tk.CLOSEANGLEBRACKET); + tt(Start, '(', tk.OPENPAREN); tt(Start, ')', tk.CLOSEPAREN); - tt(Start, ')', tk.FULLWIDTH_CLOSEPAREN); + tt(Start, '<', tk.OPENANGLEBRACKET); + tt(Start, '>', tk.CLOSEANGLEBRACKET); + tt(Start, '(', tk.FULLWIDTHLEFTPAREN); + tt(Start, ')', tk.FULLWIDTHRIGHTPAREN); + tt(Start, '「', tk.LEFTCORNERBRACKET); + tt(Start, '」', tk.RIGHTCORNERBRACKET); + tt(Start, '『', tk.LEFTWHITECORNERBRACKET); + tt(Start, '』', tk.RIGHTWHITECORNERBRACKET); + tt(Start, '<', tk.FULLWIDTHLESSTHAN); + tt(Start, '>', tk.FULLWIDTHGREATERTHAN); tt(Start, '&', tk.AMPERSAND); tt(Start, '*', tk.ASTERISK); tt(Start, '@', tk.AT); @@ -122,7 +129,10 @@ export function init(customSchemes = []) { // Generates states for top-level domains // Note that this is most accurate when tlds are in alphabetical order const wordjr = [[re.ASCII_LETTER, Word]]; - const uwordjr = [[re.ASCII_LETTER, null], [re.LETTER, UWord]]; + const uwordjr = [ + [re.ASCII_LETTER, null], + [re.LETTER, UWord], + ]; for (let i = 0; i < tlds.length; i++) { fastts(Start, tlds[i], tk.TLD, tk.WORD, wordjr); } @@ -145,7 +155,7 @@ export function init(customSchemes = []) { addToGroups(tk.SLASH_SCHEME, { slashscheme: true, ascii: true }, groups); // Register custom schemes. Assumes each scheme is asciinumeric with hyphens - customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1); + customSchemes = customSchemes.sort((a, b) => (a[0] > b[0] ? 1 : -1)); for (let i = 0; i < customSchemes.length; i++) { const sch = customSchemes[i][0]; const optionalSlashSlash = customSchemes[i][1]; @@ -233,7 +243,7 @@ export function run(start, str) { t: latestAccepting.t, // token type/name v: str.slice(cursor - tokenLength, cursor), // string value s: cursor - tokenLength, // start index - e: cursor // end index (excluding) + e: cursor, // end index (excluding) }); } @@ -258,10 +268,14 @@ export function stringToArray(str) { while (index < len) { let first = str.charCodeAt(index); let second; - let char = first < 0xd800 || first > 0xdbff || index + 1 === len - || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff - ? str[index] // single character - : str.slice(index, index + 2); // two-index characters + let char = + first < 0xd800 || + first > 0xdbff || + index + 1 === len || + (second = str.charCodeAt(index + 1)) < 0xdc00 || + second > 0xdfff + ? str[index] // single character + : str.slice(index, index + 2); // two-index characters result.push(char); index += char.length; } diff --git a/packages/linkifyjs/src/text.js b/packages/linkifyjs/src/text.js index f3dd9ab1..5b0feb13 100644 --- a/packages/linkifyjs/src/text.js +++ b/packages/linkifyjs/src/text.js @@ -4,8 +4,8 @@ Identifiers for token outputs from the regexp scanner ******************************************************************************/ // A valid web domain token -export const WORD = 'WORD'; // only contains a-z -export const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN +export const WORD = 'WORD'; // only contains a-z +export const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN // Special case of word export const LOCALHOST = 'LOCALHOST'; @@ -36,16 +36,24 @@ export const WS = 'WS'; export const NL = 'NL'; // \n // Opening/closing bracket classes +// TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names +// Also rename angle brackes to LESSTHAN and GREATER THAN export const OPENBRACE = 'OPENBRACE'; // { -export const OPENBRACKET = 'OPENBRACKET'; // [ -export const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // < -export const OPENPAREN = 'OPENPAREN'; // ( export const CLOSEBRACE = 'CLOSEBRACE'; // } +export const OPENBRACKET = 'OPENBRACKET'; // [ export const CLOSEBRACKET = 'CLOSEBRACKET'; // ] -export const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // > +export const OPENPAREN = 'OPENPAREN'; // ( export const CLOSEPAREN = 'CLOSEPAREN'; // ) -export const FULLWIDTH_OPENPAREN = 'FULLWIDTH_OPENPAREN'; // ( -export const FULLWIDTH_CLOSEPAREN = 'FULLWIDTH_CLOSEPAREN'; // ) +export const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // < +export const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // > +export const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; // ( +export const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; // ) +export const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「 +export const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」 +export const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『 +export const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』 +export const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; // < +export const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; // > // Various symbols export const AMPERSAND = 'AMPERSAND'; // & @@ -79,4 +87,3 @@ export const EMOJI = 'EMOJI'; // Default token - anything that is not one of the above export const SYM = 'SYM'; - diff --git a/test/run.sh b/test/run.sh index 4553f6fc..88988efb 100755 --- a/test/run.sh +++ b/test/run.sh @@ -8,8 +8,10 @@ if [[ "$1" == "--dist" ]]; then npm run test:coverage npm run build:ci npm run copy - npm run test:ci - sleep 3 # Wait for threads to exit? + if [[ "${BROWSERSTACK_USERNAME}" != "" ]] && [[ "${BROWSERSTACK_ACCESS_KEY}" != "" ]]; then + npm run test:ci + sleep 3 # Wait for threads to exit? + fi else # Run basic tests echo "Running basic tests..." diff --git a/test/spec/linkifyjs/parser.test.js b/test/spec/linkifyjs/parser.test.js index 5346b892..e862cf97 100644 --- a/test/spec/linkifyjs/parser.test.js +++ b/test/spec/linkifyjs/parser.test.js @@ -9,281 +9,312 @@ import * as parser from 'linkifyjs/src/parser'; [2] - The values of the tokens the text should result in */ const tests = [ + ['google.com', [Url], ['google.com']], + ['I like google.com the most', [Text, Url, Text], ['I like ', 'google.com', ' the most']], + ['I like Google.com the most', [Text, Url, Text], ['I like ', 'Google.com', ' the most']], [ - 'google.com', - [Url], - ['google.com'] - ], [ - 'I like google.com the most', - [Text, Url, Text], - ['I like ', 'google.com', ' the most'] - ], [ - 'I like Google.com the most', - [Text, Url, Text], - ['I like ', 'Google.com', ' the most'] - ], ['there are two tests, brennan.com and nick.ca -- do they work?', + 'there are two tests, brennan.com and nick.ca -- do they work?', [Text, Url, Text, Url, Text], - ['there are two tests, ', 'brennan.com', ' and ', 'nick.ca', ' -- do they work?'] - ], [ + ['there are two tests, ', 'brennan.com', ' and ', 'nick.ca', ' -- do they work?'], + ], + [ 'there are two tests!brennan.com. and nick.ca? -- do they work?', [Text, Url, Text, Url, Text], - ['there are two tests!', 'brennan.com', '. and ', 'nick.ca', '? -- do they work?'] - ], [ + ['there are two tests!', 'brennan.com', '. and ', 'nick.ca', '? -- do they work?'], + ], + [ 'This [i.imgur.com/ckSj2Ba.jpg)] should also work', [Text, Url, Text], - ['This [', 'i.imgur.com/ckSj2Ba.jpg', ')] should also work'] - ], [ + ['This [', 'i.imgur.com/ckSj2Ba.jpg', ')] should also work'], + ], + [ 'A link is http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look', [Text, Url, Text, Url], - ['A link is ', 'http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp', ' another is ', 'http://nick.con/?q=look'] - ], [ + [ + 'A link is ', + 'http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp', + ' another is ', + 'http://nick.con/?q=look', + ], + ], + [ 'SOme UrlS http://google.com https://google1.com google2.com google.com/search?q=potatoes+oven goo.gl/0192n1 google.com?q=asda test bit.ly/0912j www.bob.com indigo.dev.soapbox.co/mobile google.com/?q=.exe flickr.com/linktoimage.jpg', - [Text, Url, Text, Url, Text, Url, Text, Url, Text, Url, Text, Url, Text, Url, Text, Url, Text, Url, Text, Url, Text, Url], - ['SOme UrlS ', 'http://google.com', ' ', 'https://google1.com', ' ', 'google2.com', ' ', 'google.com/search?q=potatoes+oven', ' ', 'goo.gl/0192n1', ' ', 'google.com', '?q=asda test ', 'bit.ly/0912j', ' ', 'www.bob.com', ' ', 'indigo.dev.soapbox.co/mobile', ' ', 'google.com/?q=.exe', ' ', 'flickr.com/linktoimage.jpg'], - ], [ - 'None.of these.should be.Links okay.please?', - [Text], - ['None.of these.should be.Links okay.please?'] - ], [ + [ + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + Text, + Url, + ], + [ + 'SOme UrlS ', + 'http://google.com', + ' ', + 'https://google1.com', + ' ', + 'google2.com', + ' ', + 'google.com/search?q=potatoes+oven', + ' ', + 'goo.gl/0192n1', + ' ', + 'google.com', + '?q=asda test ', + 'bit.ly/0912j', + ' ', + 'www.bob.com', + ' ', + 'indigo.dev.soapbox.co/mobile', + ' ', + 'google.com/?q=.exe', + ' ', + 'flickr.com/linktoimage.jpg', + ], + ], + ['None.of these.should be.Links okay.please?', [Text], ['None.of these.should be.Links okay.please?']], + [ 'Here are some random emails: nick@soapbox.com, nick@soapbox.soda (invalid), Nick@dev.dev.soapbox.co, random nick.frasser_hitsend@http://facebook.com', [Text, Email, Text, Email, Text, Url], - ['Here are some random emails: ', 'nick@soapbox.com', ', nick@soapbox.soda (invalid), ', 'Nick@dev.dev.soapbox.co', ', random nick.frasser_hitsend@', 'http://facebook.com'] - ], [ + [ + 'Here are some random emails: ', + 'nick@soapbox.com', + ', nick@soapbox.soda (invalid), ', + 'Nick@dev.dev.soapbox.co', + ', random nick.frasser_hitsend@', + 'http://facebook.com', + ], + ], + [ 't.c.com/sadqad is a great domain, so is ftp://i.am.a.b.ca/ okay?', [Url, Text, Url, Text], - ['t.c.com/sadqad', ' is a great domain, so is ', 'ftp://i.am.a.b.ca/', ' okay?'] - ], [ + ['t.c.com/sadqad', ' is a great domain, so is ', 'ftp://i.am.a.b.ca/', ' okay?'], + ], + [ 'This port is too short someport.com: this port is too long http://googgle.com:789023/myQuery this port is just right https://github.com:8080/SoapBox/jQuery-linkify/', [Text, Url, Text, Url, Text, Url], - ['This port is too short ', 'someport.com', ': this port is too long ', 'http://googgle.com:789023/myQuery', ' this port is just right ', 'https://github.com:8080/SoapBox/jQuery-linkify/'] - ], [ + [ + 'This port is too short ', + 'someport.com', + ': this port is too long ', + 'http://googgle.com:789023/myQuery', + ' this port is just right ', + 'https://github.com:8080/SoapBox/jQuery-linkify/', + ], + ], + [ 'The best Url http://google.com/?love=true, and t.co', [Text, Url, Text, Url], - ['The best Url ', 'http://google.com/?love=true', ', and ', 't.co'] - ], [ - 'Please email me at testy.test+123@gmail.com', - [Text, Email], - ['Please email me at ', 'testy.test+123@gmail.com'], - ], [ + ['The best Url ', 'http://google.com/?love=true', ', and ', 't.co'], + ], + ['Please email me at testy.test+123@gmail.com', [Text, Email], ['Please email me at ', 'testy.test+123@gmail.com']], + [ 'http://aws.amazon.com:8080/nick?was=here and localhost:3000 are also domains', [Url, Text, Url, Text], - ['http://aws.amazon.com:8080/nick?was=here', ' and ', 'localhost:3000', ' are also domains'] - ], [ - 'http://500-px.com is a real domain?', - [Url, Text], - ['http://500-px.com', ' is a real domain?'] - ], [ + ['http://aws.amazon.com:8080/nick?was=here', ' and ', 'localhost:3000', ' are also domains'], + ], + ['http://500-px.com is a real domain?', [Url, Text], ['http://500-px.com', ' is a real domain?']], + [ 'IP loops like email? 192.168.0.1@gmail.com! works!!', [Text, Email, Text], - ['IP loops like email? ', '192.168.0.1@gmail.com', '! works!!'] - ], [ - 'Url like bro-215.co; with a hyphen?', - [Text, Url, Text], - ['Url like ', 'bro-215.co', '; with a hyphen?'] - ], [ + ['IP loops like email? ', '192.168.0.1@gmail.com', '! works!!'], + ], + ['Url like bro-215.co; with a hyphen?', [Text, Url, Text], ['Url like ', 'bro-215.co', '; with a hyphen?']], + [ 'This Url http://23456789098.sydney is a number', [Text, Url, Text], - ['This Url ', 'http://23456789098.sydney', ' is a number'] - ], [ + ['This Url ', 'http://23456789098.sydney', ' is a number'], + ], + [ 'This Url http://23456789098.sydney is a number', [Text, Url, Text], - ['This Url ', 'http://23456789098.sydney', ' is a number'] - ], [ + ['This Url ', 'http://23456789098.sydney', ' is a number'], + ], + [ 'A Url with only numbers is 123.456.ca another is //7.8.com/?wat=1 is valid', [Text, Url, Text, Url, Text], - ['A Url with only numbers is ', '123.456.ca', ' another is //', '7.8.com/?wat=1', ' is valid'] - ], [ + ['A Url with only numbers is ', '123.456.ca', ' another is //', '7.8.com/?wat=1', ' is valid'], + ], + [ 'Url Numbers 6.wat.78.where.eu and u.0.e.9.kp', [Text, Url, Text, Url], - ['Url Numbers ', '6.wat.78.where.eu', ' and ', 'u.0.e.9.kp'] - ], [ + ['Url Numbers ', '6.wat.78.where.eu', ' and ', 'u.0.e.9.kp'], + ], + [ 'Emails like nick:f@gmail.com do not have colons in them', [Text, Email, Text], - ['Emails like nick:', 'f@gmail.com', ' do not have colons in them'] - ], [ + ['Emails like nick:', 'f@gmail.com', ' do not have colons in them'], + ], + [ 'Emails cannot have two dots, e.g.: nick..f@yahoo.ca', [Text, Email], - ['Emails cannot have two dots, e.g.: nick..', 'f@yahoo.ca'] - ], [ + ['Emails cannot have two dots, e.g.: nick..', 'f@yahoo.ca'], + ], + [ 'The `mailto:` part should be included in mailto:this.is.a.test@yandex.ru', [Text, Url], - ['The `mailto:` part should be included in ', 'mailto:this.is.a.test@yandex.ru'] - ], [ + ['The `mailto:` part should be included in ', 'mailto:this.is.a.test@yandex.ru'], + ], + [ 'mailto:echalk-dev@logicify.com?Subject=Hello%20again is another test', [Url, Text], - ['mailto:echalk-dev@logicify.com?Subject=Hello%20again', ' is another test'] - ], [ + ['mailto:echalk-dev@logicify.com?Subject=Hello%20again', ' is another test'], + ], + [ 'Mailto is greedy mailto:localhost?subject=Hello%20World.', [Text, Url, Text], - ['Mailto is greedy ', 'mailto:localhost?subject=Hello%20World', '.'] - ], [ + ['Mailto is greedy ', 'mailto:localhost?subject=Hello%20World', '.'], + ], + [ 'Emails like: test@42.domain.com and test@42.abc.11.domain.com should be matched in its entirety.', [Text, Email, Text, Email, Text], - ['Emails like: ', 'test@42.domain.com', ' and ', 'test@42.abc.11.domain.com', ' should be matched in its entirety.'] - ], [ - 'Bu haritanın verileri Direniş İzleme Grubu\'nun yaptığı Türkiye İşçi Eylemleri haritası ile birleşebilir esasen. https://graphcommons.com/graphs/00af1cd8-5a67-40b1-86e5-32beae436f7c?show=Comments', + [ + 'Emails like: ', + 'test@42.domain.com', + ' and ', + 'test@42.abc.11.domain.com', + ' should be matched in its entirety.', + ], + ], + [ + "Bu haritanın verileri Direniş İzleme Grubu'nun yaptığı Türkiye İşçi Eylemleri haritası ile birleşebilir esasen. https://graphcommons.com/graphs/00af1cd8-5a67-40b1-86e5-32beae436f7c?show=Comments", [Text, Url], - ['Bu haritanın verileri Direniş İzleme Grubu\'nun yaptığı Türkiye İşçi Eylemleri haritası ile birleşebilir esasen. ', 'https://graphcommons.com/graphs/00af1cd8-5a67-40b1-86e5-32beae436f7c?show=Comments'] - ], [ + [ + "Bu haritanın verileri Direniş İzleme Grubu'nun yaptığı Türkiye İşçi Eylemleri haritası ile birleşebilir esasen. ", + 'https://graphcommons.com/graphs/00af1cd8-5a67-40b1-86e5-32beae436f7c?show=Comments', + ], + ], + [ 'Links with brackets and parens https://en.wikipedia.org/wiki/Blur_[band] wat', [Text, Url, Text], ['Links with brackets and parens ', 'https://en.wikipedia.org/wiki/Blur_[band]', ' wat'], - ], [ + ], + [ 'This has dots {https://msdn.microsoft.com/en-us/library/aa752574(VS.85).aspx}', [Text, Url, Text], - ['This has dots {', 'https://msdn.microsoft.com/en-us/library/aa752574(VS.85).aspx', '}'] - ], [ // This test is correct, will count nested brackets as being part of the first + ['This has dots {', 'https://msdn.microsoft.com/en-us/library/aa752574(VS.85).aspx', '}'], + ], + [ + // This test is correct, will count nested brackets as being part of the first 'A really funky one (example.com/?id=asd2{hellow}and%20it%20continues(23&((@)) and it ends', [Text, Url, Text], - ['A really funky one (', 'example.com/?id=asd2{hellow}and%20it%20continues(23&((@)', ') and it ends'] - ], [ + ['A really funky one (', 'example.com/?id=asd2{hellow}and%20it%20continues(23&((@)', ') and it ends'], + ], + [ 'Url enclosed in angle brackets: should not include trailing bracket', [Text, Url, Text], - ['Url enclosed in angle brackets: <', 'http://example.com/exemplary', '> should not include trailing bracket'] - ], [ + ['Url enclosed in angle brackets: <', 'http://example.com/exemplary', '> should not include trailing bracket'], + ], + [ 'Url with angle brackets in it: http://example.com/exemplary_ should be included', [Text, Url, Text], - ['Url with angle brackets in it: ', 'http://example.com/exemplary_', ' should be included'] - ], [ + ['Url with angle brackets in it: ', 'http://example.com/exemplary_', ' should be included'], + ], + [ 'Force http:/ and http:// are not but http://a and http://b.local?qeasd3qas=23 are all links', [Text, Url, Text, Url, Text], - ['Force http:/ and http:// are not but ', 'http://a', ' and ', 'http://b.local?qeasd3qas=23', ' are all links'] - ], [ + ['Force http:/ and http:// are not but ', 'http://a', ' and ', 'http://b.local?qeasd3qas=23', ' are all links'], + ], + [ 'HTTP Auth Urls should work: http://username:password@example.com', [Text, Url], - ['HTTP Auth Urls should work: ', 'http://username:password@example.com'] - ], [ + ['HTTP Auth Urls should work: ', 'http://username:password@example.com'], + ], + [ 'Trailing equal symbol should work: http://example.com/foo/bar?token=CtFOYuk0wjiqvHZF==', [Text, Url], - ['Trailing equal symbol should work: ', 'http://example.com/foo/bar?token=CtFOYuk0wjiqvHZF=='] - ], [ - '"https://surrounded.by.quotes/"', - [Text, Url, Text], - ['"', 'https://surrounded.by.quotes/', '"'] - ], [ - 'More weird character in http://facebook.com/#aZ?/:@-._~!$&\'()*+,;= that Url', + ['Trailing equal symbol should work: ', 'http://example.com/foo/bar?token=CtFOYuk0wjiqvHZF=='], + ], + ['"https://surrounded.by.quotes/"', [Text, Url, Text], ['"', 'https://surrounded.by.quotes/', '"']], + [ + "More weird character in http://facebook.com/#aZ?/:@-._~!$&'()*+,;= that Url", [Text, Url, Text], - ['More weird character in ', 'http://facebook.com/#aZ?/:@-._~!$&\'()*+,;=', ' that Url'] - ], [ + ['More weird character in ', "http://facebook.com/#aZ?/:@-._~!$&'()*+,;=", ' that Url'], + ], + [ 'Email with a underscore is n_frasser@example.xyz asd', [Text, Email, Text], - ['Email with a underscore is ', 'n_frasser@example.xyz', ' asd'] - ], [ + ['Email with a underscore is ', 'n_frasser@example.xyz', ' asd'], + ], + [ 'Url followed by nbsp: example.com/foo\u00a0bar', [Text, Url, Text], - ['Url followed by nbsp: ', 'example.com/foo', '\u00a0bar'] - ], [ - 'A link in \'singlequote.club/wat\' extra fluff at the end', + ['Url followed by nbsp: ', 'example.com/foo', '\u00a0bar'], + ], + [ + "A link in 'singlequote.club/wat' extra fluff at the end", [Text, Url, Text], - ['A link in \'', 'singlequote.club/wat', '\' extra fluff at the end'] - ], [ + ["A link in '", 'singlequote.club/wat', "' extra fluff at the end"], + ], + [ 'Email with mailsomething dot com domain in foo@mailsomething.com', [Text, Email], - ['Email with mailsomething dot com domain in ', 'foo@mailsomething.com'] - ], [ - 'http://über.de', - [Url], - ['http://über.de'] - ], [ - 'www.öko.de', - [Url], - ['www.öko.de'] - ], [ - 'www.🍕💩.ws i❤️.ws', - [Url, Text, Url], - ['www.🍕💩.ws', ' ', 'i❤️.ws'] - ], [ - 'o\'malley@example.com.au', // Email with apostrophe - [Email], - ['o\'malley@example.com.au'] - ], [ - 'foohttp://example.com bar', - [Text, Url, Text], - ['foohttp://', 'example.com', ' bar'], - ], [ - 'テストhttp://example.comテスト', - [Text, Url], - ['テスト', 'http://example.comテスト'], - ], [ - 'file:/etc/motd', - [Url], - ['file:/etc/motd'] - ], [ - 'file:///etc/motd', - [Url], - ['file:///etc/motd'] - ], [ - '~a@example.org', - [Email], - ['~a@example.org'] - ], [ - '~@example.org', - [Email], - ['~@example.org'] - ], [ - '~emersion/soju-dev@lists.sr.ht', - [Email], - ['~emersion/soju-dev@lists.sr.ht'] - ], [ - 'test@example2.com', - [Email], - ['test@example2.com'] - ], [ - 'noreply@500px.so', - [Email], - ['noreply@500px.so'] - ], [ - 'http@example.com', - [Email], - ['http@example.com'] - ], [ - 'mailto@example.com', + ['Email with mailsomething dot com domain in ', 'foo@mailsomething.com'], + ], + ['http://über.de', [Url], ['http://über.de']], + ['www.öko.de', [Url], ['www.öko.de']], + ['www.🍕💩.ws i❤️.ws', [Url, Text, Url], ['www.🍕💩.ws', ' ', 'i❤️.ws']], + [ + "o'malley@example.com.au", // Email with apostrophe [Email], - ['mailto@example.com'] - ], [ - 'http.org', - [Url], - ['http.org'] - ], [ - 'http123.org', - [Url], - ['http123.org'] - ], [ - 'http-help.org', - [Url], - ['http-help.org'] - ], [ - 'view-source.net', - [Url], - ['view-source.net'] - ], [ - 'steam.com', - [Url], - ['steam.com'] - ], [ - 'Hello\nWorld', - [Text, Nl, Text], - ['Hello', '\n', 'World'], - ], [ - 'And http://↑↑↓↓←→←→ba.tk/ is also a URL', - [Text, Url, Text], - ['And ', 'http://↑↑↓↓←→←→ba.tk/', ' is also a URL'] - ], [ + ["o'malley@example.com.au"], + ], + ['foohttp://example.com bar', [Text, Url, Text], ['foohttp://', 'example.com', ' bar']], + ['テストhttp://example.comテスト', [Text, Url], ['テスト', 'http://example.comテスト']], + ['file:/etc/motd', [Url], ['file:/etc/motd']], + ['file:///etc/motd', [Url], ['file:///etc/motd']], + ['~a@example.org', [Email], ['~a@example.org']], + ['~@example.org', [Email], ['~@example.org']], + ['~emersion/soju-dev@lists.sr.ht', [Email], ['~emersion/soju-dev@lists.sr.ht']], + ['test@example2.com', [Email], ['test@example2.com']], + ['noreply@500px.so', [Email], ['noreply@500px.so']], + ['http@example.com', [Email], ['http@example.com']], + ['mailto@example.com', [Email], ['mailto@example.com']], + ['http.org', [Url], ['http.org']], + ['http123.org', [Url], ['http123.org']], + ['http-help.org', [Url], ['http-help.org']], + ['view-source.net', [Url], ['view-source.net']], + ['steam.com', [Url], ['steam.com']], + ['Hello\nWorld', [Text, Nl, Text], ['Hello', '\n', 'World']], + ['And http://↑↑↓↓←→←→ba.tk/ is also a URL', [Text, Url, Text], ['And ', 'http://↑↑↓↓←→←→ba.tk/', ' is also a URL']], + [ 'This Url www.drive1.com with www and digits also www.500px.com', [Text, Url, Text, Url], - ['This Url ', 'www.drive1.com', ' with www and digits also ', 'www.500px.com'] - ], [ + ['This Url ', 'www.drive1.com', ' with www and digits also ', 'www.500px.com'], + ], + [ 'Link 1(http://foo.com/blah_blah) Link 2(http://foo.com/blah_blah_(wikipedia)_(again))', [Text, Url, Text, Url, Text], - ['Link 1(', 'http://foo.com/blah_blah', ') Link 2(', 'http://foo.com/blah_blah_(wikipedia)_(again)', ')'] - ], [ + ['Link 1(', 'http://foo.com/blah_blah', ') Link 2(', 'http://foo.com/blah_blah_(wikipedia)_(again)', ')'], + ], + [ 'Link 1(http://foo.com/blah_blah) Link 2(http://foo.com/blah_blah_(wikipedia)_(again))', [Text, Url, Text, Url, Text], - ['Link 1(', 'http://foo.com/blah_blah', ') Link 2(', 'http://foo.com/blah_blah_(wikipedia)_(again)', ')'] + ['Link 1(', 'http://foo.com/blah_blah', ') Link 2(', 'http://foo.com/blah_blah_(wikipedia)_(again)', ')'], + ], + [ + 'Link 1『http://foo.com/blah_blah』 Link 2『http://foo.com/blah_blah_(wikipedia)_(again)』', + [Text, Url, Text, Url, Text], + ['Link 1『', 'http://foo.com/blah_blah', '』 Link 2『', 'http://foo.com/blah_blah_(wikipedia)_(again)', '』'], ], ]; - describe('linkifyjs/parser#run()', () => { let scannerStart, scannerTokens, start; @@ -300,14 +331,11 @@ describe('linkifyjs/parser#run()', () => { function makeTest(test) { return it('Tokenizes the string "' + test[0] + '"', () => { - var str = test[0]; - var types = test[1]; - var values = test[2]; - var result = parser.run(start, str, scanner.run(scannerStart, str)); - - expect(result.map(token => token.v)).to.eql(values); - expect(result.map(token => token.toString())).to.eql(values); - expect(result.map(token => token.constructor)).to.eql(types); + const [str, types, values] = test; + const result = parser.run(start, str, scanner.run(scannerStart, str)); + expect(result.map((token) => token.v)).to.eql(values); + expect(result.map((token) => token.toString())).to.eql(values); + expect(result.map((token) => token.constructor)).to.eql(types); }); } @@ -317,10 +345,10 @@ describe('linkifyjs/parser#run()', () => { it('Correctly sets start and end indexes', () => { const input = 'Hello github.com!'; const result = parser.run(start, input, scanner.run(scannerStart, input)); - expect(result.map(t => t.toObject())).to.eql([ + expect(result.map((t) => t.toObject())).to.eql([ { type: 'text', value: 'Hello ', href: 'Hello ', isLink: false, start: 0, end: 6 }, { type: 'url', value: 'github.com', href: 'http://github.com', isLink: true, start: 6, end: 16 }, - { type: 'text', value: '!', href: '!', isLink: false, start: 16, end: 17 } + { type: 'text', value: '!', href: '!', isLink: false, start: 16, end: 17 }, ]); }); });