Skip to content

Commit

Permalink
Perf: reduce operations in normalizeDomain
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Jan 17, 2025
1 parent c07fe1d commit c397fc1
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 24 deletions.
29 changes: 15 additions & 14 deletions Build/lib/normalize-domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,29 @@ import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';

type TldTsParsed = ReturnType<typeof tldts.parse>;

export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
if (domain.length === 0) return null;

parsed ??= tldts.parse(domain, normalizeTldtsOpt);

/**
* Skipped the input non-empty check, the `domain` should not be empty.
*/
export function fastNormalizeDomain(domain: string, parsed: TldTsParsed = tldts.parse(domain, normalizeTldtsOpt)) {
if (parsed.isIp) return null;

let h = parsed.hostname;
const h = parsed.hostname;
if (h === null) return null;
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;

let sliceStart = 0;
let sliceEnd = 0;
return h.length > 0 ? h : null;
}

if (h[0] === '.') sliceStart = 1;
// eslint-disable-next-line sukka/string/prefer-string-starts-ends-with -- performance
if (h[h.length - 1] === '.') sliceEnd = -1;
export function normalizeDomain(domain: string, parsed: TldTsParsed = tldts.parse(domain, normalizeTldtsOpt)) {
if (domain.length === 0) return null;

if (sliceStart !== 0 || sliceEnd !== 0) {
h = h.slice(sliceStart, sliceEnd);
}
if (parsed.isIp) return null;

const h = parsed.hostname;
if (h === null) return null;
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;

return h.length > 0 ? h : null;
}
4 changes: 2 additions & 2 deletions Build/lib/parse-filter/domainlists.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import picocolors from 'picocolors';
import { normalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';
import { fetchAssetsWithout304 } from '../fetch-assets';
Expand All @@ -9,7 +9,7 @@ function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean
const line = processLine(l);
if (!line) return;

const domain = normalizeDomain(line);
const domain = fastNormalizeDomain(line);
if (!domain) return;
if (domain !== line) {
console.log(
Expand Down
11 changes: 8 additions & 3 deletions Build/lib/parse-filter/filters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { Span } from '../../trace';
import { fetchAssetsWithout304 } from '../fetch-assets';
import { onBlackFound, onWhiteFound } from './shared';
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
import { normalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { looseTldtsOpt } from '../../constants/loose-tldts-opt';
import tldts from 'tldts-experimental';
import { NetworkFilter } from '@ghostery/adblocker';
Expand Down Expand Up @@ -227,7 +227,7 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
&& filter.isPlain() // isPlain() === !isRegex()
&& (!filter.isFullRegex())
) {
const hostname = normalizeDomain(filter.hostname);
const hostname = fastNormalizeDomain(filter.hostname);
if (!hostname) {
result[1] = ParseType.Null;
return result;
Expand Down Expand Up @@ -421,6 +421,11 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
}

const sliced = (sliceStart > 0 || sliceEnd < 0) ? line.slice(sliceStart, sliceEnd === 0 ? undefined : sliceEnd) : line;
if (sliced.length === 0) {
result[1] = ParseType.Null;
return result;
}

if (sliced.charCodeAt(0) === 45 /* - */) {
// line.startsWith('-') is not a valid domain
result[1] = ParseType.ErrorMessage;
Expand All @@ -437,7 +442,7 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
return result;
}

const domain = normalizeDomain(sliced);
const domain = fastNormalizeDomain(sliced);

if (domain && domain === sliced) {
result[0] = domain;
Expand Down
4 changes: 2 additions & 2 deletions Build/lib/parse-filter/hosts.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { Span } from '../../trace';
import { fetchAssetsWithout304 } from '../fetch-assets';
import { normalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';

Expand All @@ -14,7 +14,7 @@ function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, met
if (!_domain) {
return;
}
const domain = normalizeDomain(_domain);
const domain = fastNormalizeDomain(_domain);
if (!domain) {
return;
}
Expand Down
4 changes: 2 additions & 2 deletions Build/validate-gfwlist.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { processLine } from './lib/process-line';
import { normalizeDomain } from './lib/normalize-domain';
import { fastNormalizeDomain } from './lib/normalize-domain';
import { HostnameSmolTrie } from './lib/trie';
// import { Readable } from 'stream';
import { parse } from 'csv-parse/sync';
Expand Down Expand Up @@ -54,7 +54,7 @@ export async function parseGfwList() {
trie.add(line);
continue;
}
const d = normalizeDomain(line);
const d = fastNormalizeDomain(line);
if (d) {
trie.add(d);
continue;
Expand Down
2 changes: 1 addition & 1 deletion Source/stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ export const HK: StreamService[] = [
NOW_E,
VIUTV,
MYTV_SUPER,
HBO_ASIA,
HBO_ASIA
// BILIBILI_INTL
];

Expand Down

0 comments on commit c397fc1

Please sign in to comment.