Skip to content

Commit

Permalink
Parse more efficiently by preventing unnecessary String allocation an…
Browse files Browse the repository at this point in the history
…d skip cleaning operations as already done by the updater.
  • Loading branch information
kodlian committed Sep 29, 2022
1 parent f5b1b12 commit d13e21a
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 24 deletions.
2 changes: 1 addition & 1 deletion DomainParser/DomainParser/BasicRulesParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public struct BasicRulesParser {
}
public func parse(host: String) -> ParsedHost? {
let lowercasedHost = host.lowercased()
let hostComponents = lowercasedHost.components(separatedBy: ".")
let hostComponents = lowercasedHost.split(separator: ".")
var hostSlices = ArraySlice(hostComponents)

/// A host must have at least two parts else it's a TLD
Expand Down
2 changes: 1 addition & 1 deletion DomainParser/DomainParser/DomainParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public struct DomainParser {
}

func parseExceptionsAndWildCardRules(host: String) -> ParsedHost? {
let hostComponents = host.components(separatedBy: ".")
let hostComponents = host.split(separator: ".")
let isMatching: (Rule) -> Bool = { $0.isMatching(hostLabels: hostComponents) }
let rule = parsedRules.exceptions.first(where: isMatching) ?? parsedRules.wildcardRules.first(where: isMatching)
return rule?.parse(hostLabels: hostComponents)
Expand Down
12 changes: 6 additions & 6 deletions DomainParser/DomainParser/Model/Rule.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ struct Rule {
/// Score used to sort the rules. If a URL match multiple rules, the one with the highest Score is prevailing
let rankingScore: Int

init(raw: String) {
init(raw: Substring) {

/// If the line starts with "!" it's an exceptional Rule
exception = raw.starts(with: C.exceptionMarker)
source = exception ? String(raw.dropFirst()) : raw
parts = source.components(separatedBy: ".").map(RuleLabel.init)
source = exception ? String(raw.dropFirst()) : String(raw)
parts = source.split(separator: ".").map(RuleLabel.init)

/// Exceptions should have a higher Rank than regular rules
rankingScore = (exception ? 1000 : 0) + parts.count
Expand All @@ -44,7 +44,7 @@ extension Rule {
/// - Beginning with the right-most labels of both the domain and the rule,
/// and continuing for all labels in the rule, one finds that for every pair,
/// either they are identical, or that the label from the rule is "*".
func isMatching(hostLabels: [String]) -> Bool {
func isMatching(hostLabels: [Substring]) -> Bool {
let delta = hostLabels.count - self.parts.count

/// The url should have at least the same number of labels than the url
Expand All @@ -55,7 +55,7 @@ extension Rule {

let zipped = zip(self.parts, trimmedHostLabels)
/// Closure that check if a RuleLabel match a given string
let matchingClosure:(RuleLabel, String) -> Bool = {ruleComponent, hostComponent in
let matchingClosure:(RuleLabel, Substring) -> Bool = {ruleComponent, hostComponent in
return ruleComponent.isMatching(label: hostComponent)
}

Expand All @@ -73,7 +73,7 @@ extension Rule {


/// ⚠️ Should be called only for host matching the rule
func parse(hostLabels: [String]) -> ParsedHost {
func parse(hostLabels: [Substring]) -> ParsedHost {
let partsCount = parts.count - (self.exception ? 1 : 0)
let delta = hostLabels.count - partsCount

Expand Down
6 changes: 3 additions & 3 deletions DomainParser/DomainParser/Model/RuleLabel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ enum RuleLabel {
/// Wildcards are not restricted to appear only in the leftmost position, but they must wildcard an entire label. (I.e. *.*.foo is a valid rule: *bar.foo is not.)
case wildcard

init(fromComponent component: String) {
self = component == Constant.wildcardComponent ? .wildcard : .text(component)
init(fromComponent component: Substring) {
self = component == Constant.wildcardComponent ? .wildcard : .text(String(component))
}

/// Return true if self matches the given label
func isMatching(label: String) -> Bool {
func isMatching(label: Substring) -> Bool {
switch self {
case let .text(text):
return text == label
Expand Down
20 changes: 8 additions & 12 deletions DomainParser/DomainParser/RulesParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,20 @@ class RulesParser {
throw DomainParserError.parsingError(details: nil)
}
rulesText
.components(separatedBy: .newlines)
.split(separator: "\n")
.forEach(parseRule)
return ParsedRules.init(exceptions: exceptions,
wildcardRules: wildcardRules,
basicRules: basicRules)
}

private func parseRule(line: String) {
guard let trimmedLine = line.components(separatedBy: .whitespaces).first,
!trimmedLine.isComment && !trimmedLine.isEmpty else { return }

/// From `publicsuffix.org/list/` Each line is only read up to the first whitespace; entire lines can also be commented using //.
if trimmedLine.contains("*") {
wildcardRules.append(Rule(raw: trimmedLine))
} else if trimmedLine.starts(with: "!") {
exceptions.append(Rule(raw: trimmedLine))

private func parseRule(line: Substring) {
if line.contains("*") {
wildcardRules.append(Rule(raw: line))
} else if line.starts(with: "!") {
exceptions.append(Rule(raw: line))
} else {
basicRules.insert(trimmedLine)
basicRules.insert(String(line))
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion script/UpdatePSL.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ struct PublicSuffixListMinimifier {
init(data: Data) {
self.data = data
}
// A valid line is a non-empty, non-comment line

/// A valid line is a non-empty, non-comment line
func isLineValid(line: String) -> Bool {
return !line.isEmpty && !line.starts(with: "//")
}

func minimify() throws -> Data {
guard let stringifiedData = String.init(data: data, encoding: .utf8) else { throw ErrorType.notUTF8Convertible(data: data) }

// From `publicsuffix.org/list/` Each line is only read up to the first whitespace; entire lines can also be commented using //.
let validLinesArray = stringifiedData.components(separatedBy: .newlines)
.map { $0.trimmingCharacters(in: CharacterSet.whitespaces) }
.compactMap { $0.components(separatedBy: CharacterSet.whitespaces).first }
Expand Down

0 comments on commit d13e21a

Please sign in to comment.