diff --git a/src/index.ts b/src/index.ts index 0c48a8b..9a164c2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -25,6 +25,15 @@ const list = function (input: string, skipHTML:boolean=true) { while ((result = finalRegex.exec(input)) !== null) { const start = result.index; + + // To support environments without lookbehind, we use a normal capture + // group to check for presence of "_", then truncate it if it exists as + // it's not part of the URL. + if (result[1]) { + result.index -= result[1].length; + result[0] = result[0].slice(result[1].length); + } + let end = start + result[0].length; let string = result[0]; @@ -152,12 +161,7 @@ const list = function (input: string, skipHTML:boolean=true) { reason: "email", }); } else { - found.push({ - start, - end, - string, - reason: "unknown", - }); + throw new Error("Unreachable"); } } return found; diff --git a/src/regex.ts b/src/regex.ts index 17e86fe..542523d 100644 --- a/src/regex.ts +++ b/src/regex.ts @@ -15,16 +15,11 @@ export const email = `\\b(mailto:)?${emailAddress}@(${domain}|${ipv4})`; export const url = `(${fqdn})${path}?`; export const file = `(file:\\/\\/\\/)(?:[a-z]+:(?:\\/|\\\\)+)?([\\w.]+(?:[\\/\\\\]?)+)+`; -// since safari doesn't like lookbehind, we're trying an alternative -export const final1 = `(?<=\\b|_)((${email})|(${file})|(${url}))(\\b)?`; -export const final2 = `((\\b)(${email})|(\\b)(${file})|(\\b)(${url}))(\\b)?`; - -export let finalRegex = new RegExp(final2, "gi"); -try { - finalRegex = new RegExp(final1, "gi"); -} catch (e) { - finalRegex = new RegExp(final2, "gi"); -} +// Since Safari doesn't like lookbehind, we're trying an alternative. +// Upon matching, we truncate whatever is in the very first capture group +// ("" or "_") as it's not part of the URL. +export const final = `(\\b|_)((${email})|(${file})|(${url}))(\\b)?`; +export const finalRegex = new RegExp(final, "gi"); // for validation purposes export const ipRegex = new RegExp(`^(${ipv4}|${ipv6})$`, "i"); @@ -35,7 +30,7 @@ export const urlRegex = new RegExp(`^(${url})$`, "i"); // identifying parts of the link // the initial value of this object is precomputed. // https://github.com/alexcorvi/anchorme.js/blob/098843bc0d042601cff592c4f8c9f6d0424c09cd/src/regex.ts -const iidxes = {"isFile":8,"file":{"fileName":10,"protocol":9},"isEmail":2,"email":{"protocol":3,"local":4,"host":5},"isURL":11,"url":{"TLD":[18,6],"protocol":[15,22],"host":[17],"ipv4":19,"byProtocol":13,"port":21,"protocolWithDomain":12,"path":24}}; +const iidxes = {"isFile":9,"file":{"fileName":11,"protocol":10},"isEmail":3,"email":{"protocol":4,"local":5,"host":6},"isURL":12,"url":{"TLD":[19,7],"protocol":[16,23],"host":[18],"ipv4":20,"byProtocol":14,"port":22,"protocolWithDomain":13,"path":25}}; /*** @@ -107,4 +102,4 @@ for (let i = 0; i < testers.length; i++) { console.log(JSON.stringify(iidxes)); */ -export { iidxes }; \ No newline at end of file +export { iidxes }; diff --git a/src/types.ts b/src/types.ts index 3571139..3f3f57f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,40 +1,47 @@ export interface BaseTokenProps { start: number; end: number; - reason: string; string: string; } export interface Email extends BaseTokenProps { + reason: "email"; isEmail: true; - protocol: string; + isURL?: false; + isFile?: false; + protocol?: string; local: string; host: string; } export interface URL extends BaseTokenProps { + reason: "url"; isURL: true; - protocol: string; - host: string; - port: string; - ipv4: string; - ipv6: string; + isEmail?: false; + isFile?: false; + protocol?: string; + host?: string; + port?: string; + ipv4?: string; + ipv6?: string; confirmedByProtocol: boolean; - path: string; - query: string; - fragment: string; + path?: string; + query?: string; + fragment?: string; } export interface File extends BaseTokenProps { + reason: "file"; isFile: true; - filename: string; + isURL?: false; + isEmail?: false; + protocol: string; + filename?: string; filePath: string; - fileDirectory: string; + fileDirectory?: string; } -type TokenProps = Email & File & URL; - -export type ListingProps = Partial & BaseTokenProps; +export type ListingProps = Email | File | URL; export type DesiredValues = | { [key: string]: string | undefined | true } diff --git a/test/_global.test.ts b/test/_global.test.ts new file mode 100644 index 0000000..ad16a8d --- /dev/null +++ b/test/_global.test.ts @@ -0,0 +1,15 @@ +const OriginalRegExp = globalThis.RegExp; + +// Mock RegExp constructor used in tests to ensure regexes contain no lookbehind syntax +class SafariRegExp extends OriginalRegExp { + constructor(pattern: string | RegExp, flags?: string) { + super(pattern, flags); + pattern = String(pattern) + if (pattern.includes("(?<=") || pattern.includes("(? +import anchorme from "../../src/index"; +import * as expect from "expect"; + +describe("UNIT: groups", () => { + it("groups", () => { + const matches = anchorme.list( + "https://example.xyz example.com user@email.com file:///filename.txt 192.168.1.1", + false, + ); + + const expected = [ + { + start: 0, + end: 19, + string: "https://example.xyz", + isURL: true, + protocol: "https://", + host: "example.xyz", + confirmedByProtocol: true, + reason: "url", + }, + { + start: 20, + end: 31, + string: "example.com", + isURL: true, + host: "example.com", + confirmedByProtocol: false, + reason: "url", + }, + { + start: 32, + end: 46, + string: "user@email.com", + isEmail: true, + local: "user", + host: "email.com", + reason: "email", + }, + { + start: 47, + end: 67, + string: "file:///filename.txt", + isFile: true, + protocol: "file:///", + filename: "filename.txt", + filePath: "filename.txt", + fileDirectory: "", + reason: "file", + }, + { + start: 68, + end: 79, + string: "192.168.1.1", + isURL: true, + ipv4: "192.168.1.1", + host: "192.168.1.1", + confirmedByProtocol: false, + reason: "url", + }, + ]; + + expect(matches).toEqual(expected); + + for (const match of matches) { + switch (match.reason) { + case "email": { + expect(match.isEmail).toBe(true); + expect(match.isFile).toBeFalsy(); + expect(match.isURL).toBeFalsy(); + + // ts + match.protocol; + match.local; + match.host; + expect(match).toHaveProperty("protocol"); + expect(match).toHaveProperty("local"); + expect(match).toHaveProperty("host"); + + // @ts-expect-error + match.filename; + expect(match).not.toHaveProperty("filename"); + + break; + } + case "file": { + expect(match.isFile).toBe(true); + expect(match.isEmail).toBeFalsy(); + expect(match.isURL).toBeFalsy(); + + // ts + match.protocol; + match.filename; + match.filePath; + match.fileDirectory; + expect(match).toHaveProperty("protocol"); + expect(match).toHaveProperty("filename"); + expect(match).toHaveProperty("filePath"); + expect(match).toHaveProperty("fileDirectory"); + + // @ts-expect-error + match.fragment; + expect(match).not.toHaveProperty("fragment"); + + break; + } + case "url": { + expect(match.isURL).toBe(true); + expect(match.isEmail).toBeFalsy(); + expect(match.isFile).toBeFalsy(); + + // ts + match.protocol; + match.host; + match.port; + match.ipv4; + match.ipv6; + match.confirmedByProtocol; + match.path; + match.query; + match.fragment; + expect(match).toHaveProperty("protocol"); + expect(match).toHaveProperty("host"); + expect(match).toHaveProperty("port"); + expect(match).toHaveProperty("ipv4"); + expect(match).toHaveProperty("ipv6"); + expect(match).toHaveProperty("confirmedByProtocol"); + expect(match).toHaveProperty("path"); + expect(match).toHaveProperty("query"); + expect(match).toHaveProperty("fragment"); + + // @ts-expect-error + match.local; + expect(match).not.toHaveProperty("local"); + + break; + } + default: { + throw new Error("Unreachable"); + } + } + } + }); +});