diff --git a/src/regex.ts b/src/regex.ts index 17e86fe..f754017 100644 --- a/src/regex.ts +++ b/src/regex.ts @@ -2,8 +2,11 @@ import { nonLatinAlphabetRanges } from "./dictionary"; const emailAddress = "([\\w!#$%&'*+=?^`{|}~-]+(?:\\.[\\w!#$%&'*+=?^`{|}~-]+)*)"; const domain = `(?:(?:(?:[a-z\\d]|[a-z\\d][\\w\\-]*[a-z\\d]))\\.)+(xn--[a-z\\d]{2,}|[a-z]{2,})(?=[^.]|\\b)`; -const allowedInPath = `\\w\\-.~\\!$&*+,;=:@%'"\\[\\]()?#`; -const path = `((?:\/|\\?)(?:([${allowedInPath}${nonLatinAlphabetRanges}\\/](?:[\\w\\-~+=#&\\/${nonLatinAlphabetRanges}]|\\b)+)*)+)`; + +export const _allowedInPath = `\\w\\-~+=#&\\/${nonLatinAlphabetRanges}`; +export const _allowedAtStartOfPath = `.\\!$*,;:@%'"\\[\\]()?`; +const path = `([/?]([${_allowedAtStartOfPath}]?[${_allowedInPath}])*)`; + const ipv4 = `((?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?))`; const ipv6 = `\\[(?:(?:[a-f\\d:]+:+)+[a-f\\d]+)\\]`; const port = `(:(\\d{1,5}))?`; @@ -107,4 +110,4 @@ for (let i = 0; i < testers.length; i++) { console.log(JSON.stringify(iidxes)); */ -export { iidxes }; \ No newline at end of file +export { iidxes }; diff --git a/test/b_integration/issues.test.ts b/test/b_integration/issues.test.ts index a28fd8a..2bdb194 100644 --- a/test/b_integration/issues.test.ts +++ b/test/b_integration/issues.test.ts @@ -1,4 +1,6 @@ +import { nonLatinAlphabetRanges } from "../../dist/node/dictionary"; import anchorme from "../../dist/node/index"; +import { _allowedInPath, _allowedAtStartOfPath } from "../../dist/node/regex"; import * as expect from "expect"; describe("Issues", () => { /** @@ -68,4 +70,47 @@ describe("Issues", () => { `What's the best way to clean your smartphone? 📱🚿https://t.co/cxjsA6j60J` ); }); + + describe("Catastrophic backtracking - https://github.com/alexcorvi/anchorme.js/issues/115 and https://github.com/alexcorvi/anchorme.js/issues/82", () => { + it("check logic for backtrack-vulnerable sequence", () => { + const charsToTest = Array.from({ length: 1000 }, (_, i) => String.fromCodePoint(i)).join(""); + const matchedByAllowedInPath = new Set(charsToTest.match(new RegExp(`[${_allowedInPath}]`, "g"))); + const matchedByAllowedAtStartOfPath = new Set(charsToTest.match(new RegExp(`[${_allowedAtStartOfPath}]`, "g"))); + + // no overlap between chars matched by `_allowedInPath` and `_allowedAtStartOfPath` + expect(new Set(Array.from(matchedByAllowedInPath).concat(Array.from(matchedByAllowedAtStartOfPath))).size) + .toBe(matchedByAllowedInPath.size + matchedByAllowedAtStartOfPath.size); + }); + + // should be significantly less than this but we allow some leeway to avoid flaky tests + const MAX_MILLISECONDS_PER_VALIDATION = 50; + + const examplesFromIssues = [ + "https://respond.vitally.io/work/team/users/6e92f9e7-2204-478c-9a7f-965bdd54dd0e@", + "https://pages.getpostman.com/rs/067-UMD-991/images/ban-api-builder (1).jpg", + "https://en.wikipedia.org/wiki/Robert_Cranston_(Scottish_politician)", + "https://en.wikipedia.org/wiki/Robert_Cranston(abcdefg)", + "https://en.wikipedia.org/wiki/Robert_Cranston(a)", + ]; + + for (const example of examplesFromIssues) { + it(example, () => { + const start = Date.now(); + anchorme.validate.url(example); + expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); + }); + } + + it("very long path `aaaaaa...`", () => { + const start = Date.now(); + anchorme.validate.url(`https://example.com/${"a".repeat(1000)}@`); + expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); + }); + + it("very long path `@a@a@a@a@a@a...`", () => { + const start = Date.now(); + anchorme.validate.url(`https://example.com/${"@a".repeat(1000)}@`); + expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); + }); + }); });