From d13e21a9ded7fc5d73d1ef9e4761305b0e4d0ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my?= Date: Thu, 29 Sep 2022 12:08:55 +0200 Subject: [PATCH] Parse more efficiently by preventing unnecessary String allocation and skip cleaning operations as already done by the updater. --- .../DomainParser/BasicRulesParser.swift | 2 +- DomainParser/DomainParser/DomainParser.swift | 2 +- DomainParser/DomainParser/Model/Rule.swift | 12 +++++------ .../DomainParser/Model/RuleLabel.swift | 6 +++--- DomainParser/DomainParser/RulesParser.swift | 20 ++++++++----------- script/UpdatePSL.swift | 4 +++- 6 files changed, 22 insertions(+), 24 deletions(-) diff --git a/DomainParser/DomainParser/BasicRulesParser.swift b/DomainParser/DomainParser/BasicRulesParser.swift index 44cc60f..e5bd2dc 100644 --- a/DomainParser/DomainParser/BasicRulesParser.swift +++ b/DomainParser/DomainParser/BasicRulesParser.swift @@ -16,7 +16,7 @@ public struct BasicRulesParser { } public func parse(host: String) -> ParsedHost? { let lowercasedHost = host.lowercased() - let hostComponents = lowercasedHost.components(separatedBy: ".") + let hostComponents = lowercasedHost.split(separator: ".") var hostSlices = ArraySlice(hostComponents) /// A host must have at least two parts else it's a TLD diff --git a/DomainParser/DomainParser/DomainParser.swift b/DomainParser/DomainParser/DomainParser.swift index 89f2c39..e90e779 100644 --- a/DomainParser/DomainParser/DomainParser.swift +++ b/DomainParser/DomainParser/DomainParser.swift @@ -41,7 +41,7 @@ public struct DomainParser { } func parseExceptionsAndWildCardRules(host: String) -> ParsedHost? { - let hostComponents = host.components(separatedBy: ".") + let hostComponents = host.split(separator: ".") let isMatching: (Rule) -> Bool = { $0.isMatching(hostLabels: hostComponents) } let rule = parsedRules.exceptions.first(where: isMatching) ?? parsedRules.wildcardRules.first(where: isMatching) return rule?.parse(hostLabels: hostComponents) diff --git a/DomainParser/DomainParser/Model/Rule.swift b/DomainParser/DomainParser/Model/Rule.swift index 4718601..e638a32 100644 --- a/DomainParser/DomainParser/Model/Rule.swift +++ b/DomainParser/DomainParser/Model/Rule.swift @@ -23,12 +23,12 @@ struct Rule { /// Score used to sort the rules. If a URL match multiple rules, the one with the highest Score is prevailing let rankingScore: Int - init(raw: String) { + init(raw: Substring) { /// If the line starts with "!" it's an exceptional Rule exception = raw.starts(with: C.exceptionMarker) - source = exception ? String(raw.dropFirst()) : raw - parts = source.components(separatedBy: ".").map(RuleLabel.init) + source = exception ? String(raw.dropFirst()) : String(raw) + parts = source.split(separator: ".").map(RuleLabel.init) /// Exceptions should have a higher Rank than regular rules rankingScore = (exception ? 1000 : 0) + parts.count @@ -44,7 +44,7 @@ extension Rule { /// - Beginning with the right-most labels of both the domain and the rule, /// and continuing for all labels in the rule, one finds that for every pair, /// either they are identical, or that the label from the rule is "*". - func isMatching(hostLabels: [String]) -> Bool { + func isMatching(hostLabels: [Substring]) -> Bool { let delta = hostLabels.count - self.parts.count /// The url should have at least the same number of labels than the url @@ -55,7 +55,7 @@ extension Rule { let zipped = zip(self.parts, trimmedHostLabels) /// Closure that check if a RuleLabel match a given string - let matchingClosure:(RuleLabel, String) -> Bool = {ruleComponent, hostComponent in + let matchingClosure:(RuleLabel, Substring) -> Bool = {ruleComponent, hostComponent in return ruleComponent.isMatching(label: hostComponent) } @@ -73,7 +73,7 @@ extension Rule { /// ⚠️ Should be called only for host matching the rule - func parse(hostLabels: [String]) -> ParsedHost { + func parse(hostLabels: [Substring]) -> ParsedHost { let partsCount = parts.count - (self.exception ? 1 : 0) let delta = hostLabels.count - partsCount diff --git a/DomainParser/DomainParser/Model/RuleLabel.swift b/DomainParser/DomainParser/Model/RuleLabel.swift index 5488c71..f764b5c 100644 --- a/DomainParser/DomainParser/Model/RuleLabel.swift +++ b/DomainParser/DomainParser/Model/RuleLabel.swift @@ -14,12 +14,12 @@ enum RuleLabel { /// Wildcards are not restricted to appear only in the leftmost position, but they must wildcard an entire label. (I.e. *.*.foo is a valid rule: *bar.foo is not.) case wildcard - init(fromComponent component: String) { - self = component == Constant.wildcardComponent ? .wildcard : .text(component) + init(fromComponent component: Substring) { + self = component == Constant.wildcardComponent ? .wildcard : .text(String(component)) } /// Return true if self matches the given label - func isMatching(label: String) -> Bool { + func isMatching(label: Substring) -> Bool { switch self { case let .text(text): return text == label diff --git a/DomainParser/DomainParser/RulesParser.swift b/DomainParser/DomainParser/RulesParser.swift index a2a7682..ade2441 100644 --- a/DomainParser/DomainParser/RulesParser.swift +++ b/DomainParser/DomainParser/RulesParser.swift @@ -22,24 +22,20 @@ class RulesParser { throw DomainParserError.parsingError(details: nil) } rulesText - .components(separatedBy: .newlines) + .split(separator: "\n") .forEach(parseRule) return ParsedRules.init(exceptions: exceptions, wildcardRules: wildcardRules, basicRules: basicRules) } - - private func parseRule(line: String) { - guard let trimmedLine = line.components(separatedBy: .whitespaces).first, - !trimmedLine.isComment && !trimmedLine.isEmpty else { return } - - /// From `publicsuffix.org/list/` Each line is only read up to the first whitespace; entire lines can also be commented using //. - if trimmedLine.contains("*") { - wildcardRules.append(Rule(raw: trimmedLine)) - } else if trimmedLine.starts(with: "!") { - exceptions.append(Rule(raw: trimmedLine)) + + private func parseRule(line: Substring) { + if line.contains("*") { + wildcardRules.append(Rule(raw: line)) + } else if line.starts(with: "!") { + exceptions.append(Rule(raw: line)) } else { - basicRules.insert(trimmedLine) + basicRules.insert(String(line)) } } } diff --git a/script/UpdatePSL.swift b/script/UpdatePSL.swift index 92ed464..b850a04 100644 --- a/script/UpdatePSL.swift +++ b/script/UpdatePSL.swift @@ -44,7 +44,8 @@ struct PublicSuffixListMinimifier { init(data: Data) { self.data = data } - // A valid line is a non-empty, non-comment line + + /// A valid line is a non-empty, non-comment line func isLineValid(line: String) -> Bool { return !line.isEmpty && !line.starts(with: "//") } @@ -52,6 +53,7 @@ struct PublicSuffixListMinimifier { func minimify() throws -> Data { guard let stringifiedData = String.init(data: data, encoding: .utf8) else { throw ErrorType.notUTF8Convertible(data: data) } + // From `publicsuffix.org/list/` Each line is only read up to the first whitespace; entire lines can also be commented using //. let validLinesArray = stringifiedData.components(separatedBy: .newlines) .map { $0.trimmingCharacters(in: CharacterSet.whitespaces) } .compactMap { $0.components(separatedBy: CharacterSet.whitespaces).first }