Skip to content

Commit

Permalink
Merge pull request #11 from Dashlane/optimisation
Browse files Browse the repository at this point in the history
Parse optimisation
  • Loading branch information
kodlian committed Sep 29, 2022
2 parents f5b1b12 + d13e21a commit ac1df32
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 24 deletions.
2 changes: 1 addition & 1 deletion DomainParser/DomainParser/BasicRulesParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public struct BasicRulesParser {
}
public func parse(host: String) -> ParsedHost? {
let lowercasedHost = host.lowercased()
let hostComponents = lowercasedHost.components(separatedBy: ".")
let hostComponents = lowercasedHost.split(separator: ".")
var hostSlices = ArraySlice(hostComponents)

/// A host must have at least two parts else it's a TLD
Expand Down
2 changes: 1 addition & 1 deletion DomainParser/DomainParser/DomainParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public struct DomainParser {
}

func parseExceptionsAndWildCardRules(host: String) -> ParsedHost? {
let hostComponents = host.components(separatedBy: ".")
let hostComponents = host.split(separator: ".")
let isMatching: (Rule) -> Bool = { $0.isMatching(hostLabels: hostComponents) }
let rule = parsedRules.exceptions.first(where: isMatching) ?? parsedRules.wildcardRules.first(where: isMatching)
return rule?.parse(hostLabels: hostComponents)
Expand Down
12 changes: 6 additions & 6 deletions DomainParser/DomainParser/Model/Rule.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ struct Rule {
/// Score used to sort the rules. If a URL match multiple rules, the one with the highest Score is prevailing
let rankingScore: Int

init(raw: String) {
init(raw: Substring) {

/// If the line starts with "!" it's an exceptional Rule
exception = raw.starts(with: C.exceptionMarker)
source = exception ? String(raw.dropFirst()) : raw
parts = source.components(separatedBy: ".").map(RuleLabel.init)
source = exception ? String(raw.dropFirst()) : String(raw)
parts = source.split(separator: ".").map(RuleLabel.init)

/// Exceptions should have a higher Rank than regular rules
rankingScore = (exception ? 1000 : 0) + parts.count
Expand All @@ -44,7 +44,7 @@ extension Rule {
/// - Beginning with the right-most labels of both the domain and the rule,
/// and continuing for all labels in the rule, one finds that for every pair,
/// either they are identical, or that the label from the rule is "*".
func isMatching(hostLabels: [String]) -> Bool {
func isMatching(hostLabels: [Substring]) -> Bool {
let delta = hostLabels.count - self.parts.count

/// The url should have at least the same number of labels than the url
Expand All @@ -55,7 +55,7 @@ extension Rule {

let zipped = zip(self.parts, trimmedHostLabels)
/// Closure that check if a RuleLabel match a given string
let matchingClosure:(RuleLabel, String) -> Bool = {ruleComponent, hostComponent in
let matchingClosure:(RuleLabel, Substring) -> Bool = {ruleComponent, hostComponent in
return ruleComponent.isMatching(label: hostComponent)
}

Expand All @@ -73,7 +73,7 @@ extension Rule {


/// ⚠️ Should be called only for host matching the rule
func parse(hostLabels: [String]) -> ParsedHost {
func parse(hostLabels: [Substring]) -> ParsedHost {
let partsCount = parts.count - (self.exception ? 1 : 0)
let delta = hostLabels.count - partsCount

Expand Down
6 changes: 3 additions & 3 deletions DomainParser/DomainParser/Model/RuleLabel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ enum RuleLabel {
/// Wildcards are not restricted to appear only in the leftmost position, but they must wildcard an entire label. (I.e. *.*.foo is a valid rule: *bar.foo is not.)
case wildcard

init(fromComponent component: String) {
self = component == Constant.wildcardComponent ? .wildcard : .text(component)
init(fromComponent component: Substring) {
self = component == Constant.wildcardComponent ? .wildcard : .text(String(component))
}

/// Return true if self matches the given label
func isMatching(label: String) -> Bool {
func isMatching(label: Substring) -> Bool {
switch self {
case let .text(text):
return text == label
Expand Down
20 changes: 8 additions & 12 deletions DomainParser/DomainParser/RulesParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,20 @@ class RulesParser {
throw DomainParserError.parsingError(details: nil)
}
rulesText
.components(separatedBy: .newlines)
.split(separator: "\n")
.forEach(parseRule)
return ParsedRules.init(exceptions: exceptions,
wildcardRules: wildcardRules,
basicRules: basicRules)
}

private func parseRule(line: String) {
guard let trimmedLine = line.components(separatedBy: .whitespaces).first,
!trimmedLine.isComment && !trimmedLine.isEmpty else { return }

/// From `publicsuffix.org/list/` Each line is only read up to the first whitespace; entire lines can also be commented using //.
if trimmedLine.contains("*") {
wildcardRules.append(Rule(raw: trimmedLine))
} else if trimmedLine.starts(with: "!") {
exceptions.append(Rule(raw: trimmedLine))

private func parseRule(line: Substring) {
if line.contains("*") {
wildcardRules.append(Rule(raw: line))
} else if line.starts(with: "!") {
exceptions.append(Rule(raw: line))
} else {
basicRules.insert(trimmedLine)
basicRules.insert(String(line))
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion script/UpdatePSL.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ struct PublicSuffixListMinimifier {
init(data: Data) {
self.data = data
}
// A valid line is a non-empty, non-comment line

/// A valid line is a non-empty, non-comment line
func isLineValid(line: String) -> Bool {
return !line.isEmpty && !line.starts(with: "//")
}

func minimify() throws -> Data {
guard let stringifiedData = String.init(data: data, encoding: .utf8) else { throw ErrorType.notUTF8Convertible(data: data) }

// From `publicsuffix.org/list/` Each line is only read up to the first whitespace; entire lines can also be commented using //.
let validLinesArray = stringifiedData.components(separatedBy: .newlines)
.map { $0.trimmingCharacters(in: CharacterSet.whitespaces) }
.compactMap { $0.components(separatedBy: CharacterSet.whitespaces).first }
Expand Down

0 comments on commit ac1df32

Please sign in to comment.