From baa1d711c3168fc6047fc0df039cb37ce934a42a Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Fri, 10 May 2024 22:30:19 +0800 Subject: [PATCH 1/3] Safari fix with compat mode --- package.json | 1 + src/index.ts | 13 ++-- src/regex.ts | 22 +++--- src/types.ts | 37 ++++++---- test/_global.test.ts | 24 +++++++ test/a_unit/groups.test.ts | 91 ++++++++++++++++++++++++ test/b_integration/variable.urls.test.ts | 2 +- 7 files changed, 160 insertions(+), 30 deletions(-) create mode 100644 test/_global.test.ts create mode 100644 test/a_unit/groups.test.ts diff --git a/package.json b/package.json index 59b5987..c43c2d4 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,7 @@ "types": "./dist/node/index.d.ts", "scripts": { "test": "mocha", + "test-no-lookbehind": "SIMULATE_NO_LOOKBEHIND=1 mocha", "testw": "mocha -w", "build": "ts-node ./build/build.ts", "buildw": "ts-node ./build/build.ts -w", diff --git a/src/index.ts b/src/index.ts index 0c48a8b..34c2525 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,6 +8,7 @@ import { ipRegex, urlRegex, iidxes, + NO_LOOKBEHIND_COMPAT_MODE, } from "./regex"; import { checkParenthesis as parenthesisIsPartOfTheURL, @@ -25,6 +26,11 @@ const list = function (input: string, skipHTML:boolean=true) { while ((result = finalRegex.exec(input)) !== null) { const start = result.index; + if (NO_LOOKBEHIND_COMPAT_MODE) { + const discard = result[1].length; + result.index -= discard; + result[0] = result[0].slice(discard); + } let end = start + result[0].length; let string = result[0]; @@ -152,12 +158,7 @@ const list = function (input: string, skipHTML:boolean=true) { reason: "email", }); } else { - found.push({ - start, - end, - string, - reason: "unknown", - }); + throw new Error('Unreachable'); } } return found; diff --git a/src/regex.ts b/src/regex.ts index 17e86fe..dab39f9 100644 --- a/src/regex.ts +++ b/src/regex.ts @@ -15,15 +15,21 @@ export const email = `\\b(mailto:)?${emailAddress}@(${domain}|${ipv4})`; export const url = `(${fqdn})${path}?`; export const file = `(file:\\/\\/\\/)(?:[a-z]+:(?:\\/|\\\\)+)?([\\w.]+(?:[\\/\\\\]?)+)+`; -// since safari doesn't like lookbehind, we're trying an alternative -export const final1 = `(?<=\\b|_)((${email})|(${file})|(${url}))(\\b)?`; -export const final2 = `((\\b)(${email})|(\\b)(${file})|(\\b)(${url}))(\\b)?`; +const common = `((${email})|(${file})|(${url}))(\\b)?`; +// since safari doesn't like lookbehind, we're trying an alternative. +// `final` must have same number of capture groups as `finalNoLookbehindCompatMode`: +// - In non-compat-mode, the first group is always empty as it only contains a lookbehind +// - In compat mode, we truncate whatever is in the first group (0 or 1 chars) as it's not part of the URL. +export const final = `((?<=\\b|_))${common}`; +export const finalNoLookbehindCompatMode = `(\\b|_)${common}`; -export let finalRegex = new RegExp(final2, "gi"); +export let finalRegex = new RegExp(finalNoLookbehindCompatMode, "gi"); +export let NO_LOOKBEHIND_COMPAT_MODE = false; try { - finalRegex = new RegExp(final1, "gi"); + finalRegex = new RegExp(final, "gi"); } catch (e) { - finalRegex = new RegExp(final2, "gi"); + finalRegex = new RegExp(finalNoLookbehindCompatMode, "gi"); + NO_LOOKBEHIND_COMPAT_MODE = true; } // for validation purposes @@ -35,7 +41,7 @@ export const urlRegex = new RegExp(`^(${url})$`, "i"); // identifying parts of the link // the initial value of this object is precomputed. // https://github.com/alexcorvi/anchorme.js/blob/098843bc0d042601cff592c4f8c9f6d0424c09cd/src/regex.ts -const iidxes = {"isFile":8,"file":{"fileName":10,"protocol":9},"isEmail":2,"email":{"protocol":3,"local":4,"host":5},"isURL":11,"url":{"TLD":[18,6],"protocol":[15,22],"host":[17],"ipv4":19,"byProtocol":13,"port":21,"protocolWithDomain":12,"path":24}}; +const iidxes = {"isFile":9,"file":{"fileName":11,"protocol":10},"isEmail":3,"email":{"protocol":4,"local":5,"host":6},"isURL":12,"url":{"TLD":[19,7],"protocol":[16,23],"host":[18],"ipv4":20,"byProtocol":14,"port":22,"protocolWithDomain":13,"path":25}}; /*** @@ -107,4 +113,4 @@ for (let i = 0; i < testers.length; i++) { console.log(JSON.stringify(iidxes)); */ -export { iidxes }; \ No newline at end of file +export { iidxes }; diff --git a/src/types.ts b/src/types.ts index 3571139..e2889c0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,40 +1,47 @@ export interface BaseTokenProps { start: number; end: number; - reason: string; string: string; } export interface Email extends BaseTokenProps { + reason: 'email'; isEmail: true; - protocol: string; + isURL?: false; + isFile?: false; + protocol?: string; local: string; host: string; } export interface URL extends BaseTokenProps { + reason: 'url'; isURL: true; - protocol: string; - host: string; - port: string; - ipv4: string; - ipv6: string; + isEmail?: false; + isFile?: false; + protocol?: string; + host?: string; + port?: string; + ipv4?: string; + ipv6?: string; confirmedByProtocol: boolean; - path: string; - query: string; - fragment: string; + path?: string; + query?: string; + fragment?: string; } export interface File extends BaseTokenProps { + reason: 'file'; isFile: true; - filename: string; + isURL?: false; + isEmail?: false; + protocol: string; + filename?: string; filePath: string; - fileDirectory: string; + fileDirectory?: string; } -type TokenProps = Email & File & URL; - -export type ListingProps = Partial & BaseTokenProps; +export type ListingProps = Email | File | URL; export type DesiredValues = | { [key: string]: string | undefined | true } diff --git a/test/_global.test.ts b/test/_global.test.ts new file mode 100644 index 0000000..306f07b --- /dev/null +++ b/test/_global.test.ts @@ -0,0 +1,24 @@ +// @ts-ignore +const { SIMULATE_NO_LOOKBEHIND } = process.env; + +if (SIMULATE_NO_LOOKBEHIND) { + const OriginalRegExp = globalThis.RegExp; + + class MockRegExp extends OriginalRegExp { + constructor(pattern: string | RegExp, flags?: string) { + super(pattern, flags) + if (String(pattern).includes('(?<=')) { + throw new SyntaxError('idk what (?<= even means') + } + } + } + + // @ts-ignore + globalThis.RegExp = MockRegExp; +} + +import { NO_LOOKBEHIND_COMPAT_MODE } from '../src/regex'; + +if (NO_LOOKBEHIND_COMPAT_MODE !== Boolean(SIMULATE_NO_LOOKBEHIND)) { + throw new Error('Failed to set `NO_LOOKBEHIND_COMPAT_MODE` correctly.'); +} diff --git a/test/a_unit/groups.test.ts b/test/a_unit/groups.test.ts new file mode 100644 index 0000000..c2ccef1 --- /dev/null +++ b/test/a_unit/groups.test.ts @@ -0,0 +1,91 @@ +/// +import anchorme from "../../src/index"; +import * as expect from "expect"; + +describe("UNIT: groups", () => { + it("groups", () => { + const matches = anchorme.list( + `https://example.xyz example.com user@email.com file:///filename.txt 192.168.1.1`, + false, + ); + + for (const match of matches) { + switch (match.reason) { + case "email": { + expect(match.isEmail).toBe(true); + expect(match.isFile).toBeFalsy(); + expect(match.isURL).toBeFalsy(); + + // ts + match.protocol; + match.local; + match.host; + expect(match).toHaveProperty('protocol'); + expect(match).toHaveProperty('local'); + expect(match).toHaveProperty('host'); + + // @ts-expect-error + match.filename; + expect(match).not.toHaveProperty('filename'); + + break; + } + case "file": { + expect(match.isFile).toBe(true); + expect(match.isEmail).toBeFalsy(); + expect(match.isURL).toBeFalsy(); + + // ts + match.protocol; + match.filename; + match.filePath; + match.fileDirectory; + expect(match).toHaveProperty('protocol'); + expect(match).toHaveProperty('filename'); + expect(match).toHaveProperty('filePath'); + expect(match).toHaveProperty('fileDirectory'); + + // @ts-expect-error + match.fragment; + expect(match).not.toHaveProperty('fragment'); + + break; + } + case "url": { + expect(match.isURL).toBe(true); + expect(match.isEmail).toBeFalsy(); + expect(match.isFile).toBeFalsy(); + + // ts + match.protocol; + match.host; + match.port; + match.ipv4; + match.ipv6; + match.confirmedByProtocol; + match.path; + match.query; + match.fragment; + expect(match).toHaveProperty('protocol'); + expect(match).toHaveProperty('host'); + expect(match).toHaveProperty('port'); + expect(match).toHaveProperty('ipv4'); + expect(match).toHaveProperty('ipv6'); + expect(match).toHaveProperty('confirmedByProtocol'); + expect(match).toHaveProperty('path'); + expect(match).toHaveProperty('query'); + expect(match).toHaveProperty('fragment'); + + // @ts-expect-error + match.local; + expect(match).not.toHaveProperty('local'); + + break; + } + default: { + throw new Error('Unreachable'); + } + } + } + }); +}); diff --git a/test/b_integration/variable.urls.test.ts b/test/b_integration/variable.urls.test.ts index c97b3f3..3e51e6a 100644 --- a/test/b_integration/variable.urls.test.ts +++ b/test/b_integration/variable.urls.test.ts @@ -561,4 +561,4 @@ describe("variable URLs", ()=>{ }); }); }); -}); \ No newline at end of file +}); From a1ecc7f8eabc1aa70854bdbd67f3f46f24d28c1b Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Fri, 10 May 2024 22:48:44 +0800 Subject: [PATCH 2/3] Fold Safari compat mode into normal mode to simplify logic --- package.json | 1 - src/index.ts | 15 ++-- src/regex.ts | 21 ++---- src/types.ts | 6 +- test/_global.test.ts | 30 +++----- test/a_unit/groups.test.ts | 96 ++++++++++++++++++------ test/b_integration/variable.urls.test.ts | 2 +- 7 files changed, 103 insertions(+), 68 deletions(-) diff --git a/package.json b/package.json index c43c2d4..59b5987 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,6 @@ "types": "./dist/node/index.d.ts", "scripts": { "test": "mocha", - "test-no-lookbehind": "SIMULATE_NO_LOOKBEHIND=1 mocha", "testw": "mocha -w", "build": "ts-node ./build/build.ts", "buildw": "ts-node ./build/build.ts -w", diff --git a/src/index.ts b/src/index.ts index 34c2525..9a164c2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,7 +8,6 @@ import { ipRegex, urlRegex, iidxes, - NO_LOOKBEHIND_COMPAT_MODE, } from "./regex"; import { checkParenthesis as parenthesisIsPartOfTheURL, @@ -26,11 +25,15 @@ const list = function (input: string, skipHTML:boolean=true) { while ((result = finalRegex.exec(input)) !== null) { const start = result.index; - if (NO_LOOKBEHIND_COMPAT_MODE) { - const discard = result[1].length; - result.index -= discard; - result[0] = result[0].slice(discard); + + // To support environments without lookbehind, we use a normal capture + // group to check for presence of "_", then truncate it if it exists as + // it's not part of the URL. + if (result[1]) { + result.index -= result[1].length; + result[0] = result[0].slice(result[1].length); } + let end = start + result[0].length; let string = result[0]; @@ -158,7 +161,7 @@ const list = function (input: string, skipHTML:boolean=true) { reason: "email", }); } else { - throw new Error('Unreachable'); + throw new Error("Unreachable"); } } return found; diff --git a/src/regex.ts b/src/regex.ts index dab39f9..542523d 100644 --- a/src/regex.ts +++ b/src/regex.ts @@ -15,22 +15,11 @@ export const email = `\\b(mailto:)?${emailAddress}@(${domain}|${ipv4})`; export const url = `(${fqdn})${path}?`; export const file = `(file:\\/\\/\\/)(?:[a-z]+:(?:\\/|\\\\)+)?([\\w.]+(?:[\\/\\\\]?)+)+`; -const common = `((${email})|(${file})|(${url}))(\\b)?`; -// since safari doesn't like lookbehind, we're trying an alternative. -// `final` must have same number of capture groups as `finalNoLookbehindCompatMode`: -// - In non-compat-mode, the first group is always empty as it only contains a lookbehind -// - In compat mode, we truncate whatever is in the first group (0 or 1 chars) as it's not part of the URL. -export const final = `((?<=\\b|_))${common}`; -export const finalNoLookbehindCompatMode = `(\\b|_)${common}`; - -export let finalRegex = new RegExp(finalNoLookbehindCompatMode, "gi"); -export let NO_LOOKBEHIND_COMPAT_MODE = false; -try { - finalRegex = new RegExp(final, "gi"); -} catch (e) { - finalRegex = new RegExp(finalNoLookbehindCompatMode, "gi"); - NO_LOOKBEHIND_COMPAT_MODE = true; -} +// Since Safari doesn't like lookbehind, we're trying an alternative. +// Upon matching, we truncate whatever is in the very first capture group +// ("" or "_") as it's not part of the URL. +export const final = `(\\b|_)((${email})|(${file})|(${url}))(\\b)?`; +export const finalRegex = new RegExp(final, "gi"); // for validation purposes export const ipRegex = new RegExp(`^(${ipv4}|${ipv6})$`, "i"); diff --git a/src/types.ts b/src/types.ts index e2889c0..3f3f57f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -5,7 +5,7 @@ export interface BaseTokenProps { } export interface Email extends BaseTokenProps { - reason: 'email'; + reason: "email"; isEmail: true; isURL?: false; isFile?: false; @@ -15,7 +15,7 @@ export interface Email extends BaseTokenProps { } export interface URL extends BaseTokenProps { - reason: 'url'; + reason: "url"; isURL: true; isEmail?: false; isFile?: false; @@ -31,7 +31,7 @@ export interface URL extends BaseTokenProps { } export interface File extends BaseTokenProps { - reason: 'file'; + reason: "file"; isFile: true; isURL?: false; isEmail?: false; diff --git a/test/_global.test.ts b/test/_global.test.ts index 306f07b..070dabb 100644 --- a/test/_global.test.ts +++ b/test/_global.test.ts @@ -1,24 +1,14 @@ -// @ts-ignore -const { SIMULATE_NO_LOOKBEHIND } = process.env; - -if (SIMULATE_NO_LOOKBEHIND) { - const OriginalRegExp = globalThis.RegExp; - - class MockRegExp extends OriginalRegExp { - constructor(pattern: string | RegExp, flags?: string) { - super(pattern, flags) - if (String(pattern).includes('(?<=')) { - throw new SyntaxError('idk what (?<= even means') - } +const OriginalRegExp = globalThis.RegExp; + +// Mock RegExp constructor used in tests to ensure regexes contain no lookbehind syntax +class SafariRegExp extends OriginalRegExp { + constructor(pattern: string | RegExp, flags?: string) { + super(pattern, flags); + if (String(pattern).includes("(?<=")) { + throw new SyntaxError("Lookbehind syntax (?<=...) is disallowed to support legacy Safari"); } } - - // @ts-ignore - globalThis.RegExp = MockRegExp; } -import { NO_LOOKBEHIND_COMPAT_MODE } from '../src/regex'; - -if (NO_LOOKBEHIND_COMPAT_MODE !== Boolean(SIMULATE_NO_LOOKBEHIND)) { - throw new Error('Failed to set `NO_LOOKBEHIND_COMPAT_MODE` correctly.'); -} +// @ts-ignore +globalThis.RegExp = SafariRegExp; diff --git a/test/a_unit/groups.test.ts b/test/a_unit/groups.test.ts index c2ccef1..e4c9178 100644 --- a/test/a_unit/groups.test.ts +++ b/test/a_unit/groups.test.ts @@ -5,10 +5,64 @@ import * as expect from "expect"; describe("UNIT: groups", () => { it("groups", () => { const matches = anchorme.list( - `https://example.xyz example.com user@email.com file:///filename.txt 192.168.1.1`, + "https://example.xyz example.com user@email.com file:///filename.txt 192.168.1.1", false, ); + const expected = [ + { + start: 0, + end: 19, + string: "https://example.xyz", + isURL: true, + protocol: "https://", + host: "example.xyz", + confirmedByProtocol: true, + reason: "url", + }, + { + start: 20, + end: 31, + string: "example.com", + isURL: true, + host: "example.com", + confirmedByProtocol: false, + reason: "url", + }, + { + start: 32, + end: 46, + string: "user@email.com", + isEmail: true, + local: "user", + host: "email.com", + reason: "email", + }, + { + start: 47, + end: 67, + string: "file:///filename.txt", + isFile: true, + protocol: "file:///", + filename: "filename.txt", + filePath: "filename.txt", + fileDirectory: "", + reason: "file", + }, + { + start: 68, + end: 79, + string: "192.168.1.1", + isURL: true, + ipv4: "192.168.1.1", + host: "192.168.1.1", + confirmedByProtocol: false, + reason: "url", + }, + ]; + + expect(matches).toEqual(expected); + for (const match of matches) { switch (match.reason) { case "email": { @@ -20,13 +74,13 @@ describe("UNIT: groups", () => { match.protocol; match.local; match.host; - expect(match).toHaveProperty('protocol'); - expect(match).toHaveProperty('local'); - expect(match).toHaveProperty('host'); + expect(match).toHaveProperty("protocol"); + expect(match).toHaveProperty("local"); + expect(match).toHaveProperty("host"); // @ts-expect-error match.filename; - expect(match).not.toHaveProperty('filename'); + expect(match).not.toHaveProperty("filename"); break; } @@ -40,14 +94,14 @@ describe("UNIT: groups", () => { match.filename; match.filePath; match.fileDirectory; - expect(match).toHaveProperty('protocol'); - expect(match).toHaveProperty('filename'); - expect(match).toHaveProperty('filePath'); - expect(match).toHaveProperty('fileDirectory'); + expect(match).toHaveProperty("protocol"); + expect(match).toHaveProperty("filename"); + expect(match).toHaveProperty("filePath"); + expect(match).toHaveProperty("fileDirectory"); // @ts-expect-error match.fragment; - expect(match).not.toHaveProperty('fragment'); + expect(match).not.toHaveProperty("fragment"); break; } @@ -66,24 +120,24 @@ describe("UNIT: groups", () => { match.path; match.query; match.fragment; - expect(match).toHaveProperty('protocol'); - expect(match).toHaveProperty('host'); - expect(match).toHaveProperty('port'); - expect(match).toHaveProperty('ipv4'); - expect(match).toHaveProperty('ipv6'); - expect(match).toHaveProperty('confirmedByProtocol'); - expect(match).toHaveProperty('path'); - expect(match).toHaveProperty('query'); - expect(match).toHaveProperty('fragment'); + expect(match).toHaveProperty("protocol"); + expect(match).toHaveProperty("host"); + expect(match).toHaveProperty("port"); + expect(match).toHaveProperty("ipv4"); + expect(match).toHaveProperty("ipv6"); + expect(match).toHaveProperty("confirmedByProtocol"); + expect(match).toHaveProperty("path"); + expect(match).toHaveProperty("query"); + expect(match).toHaveProperty("fragment"); // @ts-expect-error match.local; - expect(match).not.toHaveProperty('local'); + expect(match).not.toHaveProperty("local"); break; } default: { - throw new Error('Unreachable'); + throw new Error("Unreachable"); } } } diff --git a/test/b_integration/variable.urls.test.ts b/test/b_integration/variable.urls.test.ts index 3e51e6a..c97b3f3 100644 --- a/test/b_integration/variable.urls.test.ts +++ b/test/b_integration/variable.urls.test.ts @@ -561,4 +561,4 @@ describe("variable URLs", ()=>{ }); }); }); -}); +}); \ No newline at end of file From ec2f85eb281388c66b95dfafa7bb0cf477d49b43 Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Sat, 11 May 2024 11:08:56 +0800 Subject: [PATCH 3/3] Also test for negative lookbehind in SafariRegExp mock --- test/_global.test.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/_global.test.ts b/test/_global.test.ts index 070dabb..ad16a8d 100644 --- a/test/_global.test.ts +++ b/test/_global.test.ts @@ -4,8 +4,9 @@ const OriginalRegExp = globalThis.RegExp; class SafariRegExp extends OriginalRegExp { constructor(pattern: string | RegExp, flags?: string) { super(pattern, flags); - if (String(pattern).includes("(?<=")) { - throw new SyntaxError("Lookbehind syntax (?<=...) is disallowed to support legacy Safari"); + pattern = String(pattern) + if (pattern.includes("(?<=") || pattern.includes("(?