Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Perf: speed up reject parsing even more
Browse files Browse the repository at this point in the history
SukkaW committed Dec 17, 2023
1 parent e2f14d9 commit 292d154
Showing 1 changed file with 115 additions and 84 deletions.
199 changes: 115 additions & 84 deletions Build/lib/parse-filter.ts
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@ import { processLine } from './process-line';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';

import { traceAsync } from './trace-runner';
import { traceAsync, traceSync } from './trace-runner';
import picocolors from 'picocolors';
import { normalizeDomain } from './normalize-domain';
import { fetchAssets } from './fetch-assets';
@@ -156,9 +156,13 @@ export async function processFilterRules(
() => fetchAssets(filterRulesUrl, fallbackUrls),
picocolors.gray
)).split('\n');

const key = picocolors.gray(`- parse adguard filter ${filterRulesUrl}`);
console.time(key);
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
}
console.timeEnd(key);
}
});

@@ -305,11 +309,11 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
}

/** @example line.endsWith('^') */
const linedEndsWithCaret = lastCharCode === 94; // lastChar === '^';
const lineEndsWithCaret = lastCharCode === 94; // lastChar === '^';
/** @example line.endsWith('^|') */
const lineEndsWithCaretVerticalBar = (lastCharCode === 124 /** lastChar === '|' */) && line[len - 2] === '^';
/** @example line.endsWith('^') || line.endsWith('^|') */
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
const lineEndsWithCaretOrCaretVerticalBar = lineEndsWithCaret || lineEndsWithCaretVerticalBar;

// whitelist (exception)
if (
@@ -386,7 +390,11 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
];
}

if (firstCharCode === 124) { // 124 `|`
if (
// 124 `|`
// line.startsWith('|')
firstCharCode === 124
) {
if (lineEndsWithCaretOrCaretVerticalBar) {
/**
* Some malformed filters can not be parsed by NetworkFilter:
@@ -432,75 +440,78 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
* `.m.bookben.com^`
* `.wap.x4399.com^`
*/
const _domain = line.slice(
const sliced = line.slice(
1, // remove prefix dot
linedEndsWithCaret // replaceAll('^', '')
lineEndsWithCaret // replaceAll('^', '')
? -1
: (lineEndsWithCaretVerticalBar ? -2 : 0) // replace('^|', '')
: (lineEndsWithCaretVerticalBar ? -2 : undefined) // replace('^|', '')
);

const suffix = gorhill.getPublicSuffix(_domain);
const suffix = gorhill.getPublicSuffix(sliced);
if (!gorhill.suffixInPSL(suffix)) {
// This exclude domain-like resource like `1.1.4.514.js`
return null;
}

const domain = normalizeDomain(_domain);
const domain = normalizeDomain(sliced);
if (domain) {
return [domain, ParseType.BlackIncludeSubdomain];
}

return [
`[paparse-filter E0003] (black) invalid domain: ${_domain}`,
`[paparse-filter E0003] (black) invalid domain: ${sliced}`,
ParseType.ErrorMessage
];
}

/**
* `|http://x.o2.pl^`
* `://mine.torrent.pw^`
* `://say.ac^`
*/
if (
(
line.startsWith('://')
|| line.startsWith('http://')
|| line.startsWith('https://')
|| line.startsWith('|http://')
|| line.startsWith('|https://')
)
&& lineEndsWithCaretOrCaretVerticalBar
) {
const _domain = line
.replace('|https://', '')
.replace('https://', '')
.replace('|http://', '')
.replace('http://', '')
.replace('://', '')
.replace('^|', '')
.replaceAll('^', '')
.trim();

const domain = normalizeDomain(_domain);
if (domain) {
return [domain, ParseType.BlackAbsolute];
* `|http://x.o2.pl^`
* `://mine.torrent.pw^`
* `://say.ac^`
*/
if (lineEndsWithCaretOrCaretVerticalBar) {
let sliceStart = 0;
let sliceEnd;
if (lineEndsWithCaret) { // line.endsWith('^')
sliceEnd = -1;
} else if (lineEndsWithCaretVerticalBar) { // line.endsWith('^|')
sliceEnd = -2;
}
if (line.startsWith('://')) {
sliceStart = 3;
} else if (line.startsWith('http://')) {
sliceStart = 7;
} else if (line.startsWith('https://')) {
sliceStart = 8;
} else if (line.startsWith('|http://')) {
sliceStart = 8;
} else if (line.startsWith('|https://')) {
sliceStart = 9;
}

return [
`[parse-filter E0004] (black) invalid domain: ${_domain}`,
ParseType.ErrorMessage
];
if (sliceStart !== 0 || sliceEnd !== undefined) {
const sliced = line.slice(sliceStart, sliceEnd);
const domain = normalizeDomain(sliced);
if (domain) {
return [domain, ParseType.BlackIncludeSubdomain];
}
return [
`[parse-filter E0004] (black) invalid domain: ${JSON.stringify({
line, sliced, sliceStart, sliceEnd
})}`,
ParseType.ErrorMessage
];
}
}

/**
* `_vmind.qqvideo.tc.qq.com^`
* `arketing.indianadunes.com^`
* `charlestownwyllie.oaklawnnonantum.com^`
* `-telemetry.officeapps.live.com^`
* `-tracker.biliapi.net`
* `-logging.nextmedia.com`
* `_social_tracking.js^`
*/
* `_vmind.qqvideo.tc.qq.com^`
* `arketing.indianadunes.com^`
* `charlestownwyllie.oaklawnnonantum.com^`
* `-telemetry.officeapps.live.com^`
* `-tracker.biliapi.net`
* `-logging.nextmedia.com`
* `_social_tracking.js^`
*/
if (
firstCharCode !== 124 // 124 `|`
&& lastCharCode === 94 // 94 `^`
@@ -524,43 +535,63 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
];
}

// Possibly that entire rule is domain

/**
* lineStartsWithSingleDot:
*
* `.cookielaw.js`
* `.content_tracking.js`
* `.ads.css`
*
* else:
*
* `_prebid.js`
* `t.yesware.com`
* `ubmcmm.baidustatic.com`
* `://www.smfg-card.$document`
* `portal.librus.pl$$advertisement-module`
* `@@-ds.metric.gstatic.com^|`
* `://gom.ge/cookie.js`
* `://accout-update-smba.jp.$document`
* `_200x250.png`
* `@@://www.liquidweb.com/kb/wp-content/themes/lw-kb-theme/images/ads/vps-sidebar.jpg`
*/
let sliceStart = 0;
let sliceEnd: number | undefined;
if (lineStartsWithSingleDot) {
/**
* `.cookielaw.js`
* `.content_tracking.js`
* `.ads.css`
*/
const _domain = line.slice(1);
sliceStart = 1;
}
if (line.endsWith('^$all')) { // This salvage line `thepiratebay3.com^$all`
sliceEnd = -5;
} else if (
// Try to salvage line like `://account.smba.$document`
// For this specific line, it will fail anyway though.
line.endsWith('$document')
) {
sliceEnd = -9;
}
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;

const suffix = gorhill.getPublicSuffix(_domain);
if (!suffix || !gorhill.suffixInPSL(suffix)) {
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
return null;
}
const suffix = gorhill.getPublicSuffix(sliced);
/**
* Fast exclude definitely not domain-like resource
*
* `.gatracking.js`, suffix is `js`,
* `.ads.css`, suffix is `css`,
* `-cpm-ads.$badfilter`, suffix is `$badfilter`,
* `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module`
*/
if (!suffix || !gorhill.suffixInPSL(suffix)) {
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
console.log({ line, suffix });
return null;
}

const tryNormalizeDomain = normalizeDomain(_domain);
if (tryNormalizeDomain === _domain) {
// the entire rule is domain
return [line, ParseType.BlackIncludeSubdomain];
}
} else {
/**
* `_prebid.js`
* `t.yesware.com`
* `ubmcmm.baidustatic.com`
* `://www.smfg-card.$document`
* `portal.librus.pl$$advertisement-module`
* `@@-ds.metric.gstatic.com^|`
* `://gom.ge/cookie.js`
* `://accout-update-smba.jp.$document`
* `_200x250.png`
* `@@://www.liquidweb.com/kb/wp-content/themes/lw-kb-theme/images/ads/vps-sidebar.jpg`
*/
const tryNormalizeDomain = normalizeDomain(line);
if (tryNormalizeDomain === line) {
// the entire rule is domain
return [line, ParseType.BlackIncludeSubdomain];
}
const tryNormalizeDomain = normalizeDomain(sliced);
if (tryNormalizeDomain === sliced) {
// the entire rule is domain
return [line, ParseType.BlackIncludeSubdomain];
}

return [

0 comments on commit 292d154

Please sign in to comment.