-
Notifications
You must be signed in to change notification settings - Fork 77
/
CSV.swift
117 lines (103 loc) · 3.04 KB
/
CSV.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import Benchmark
import Foundation
import Parsing
/// This benchmark demonstrates how to define a simple CSV parser with quoted fields and measures
/// its performance against a more ad hoc approach at the same level of abstraction.
let csvSuite = BenchmarkSuite(name: "CSV") { suite in
struct FieldParser: ParserPrinter {
var body: some ParserPrinter<Substring.UTF8View, String> {
OneOf {
Parse {
"\"".utf8
Prefix { $0 != UInt8(ascii: "\"") }
"\"".utf8
}
Prefix { $0 != UInt8(ascii: ",") && $0 != UInt8(ascii: "\n") }
}
.map(.string)
}
}
struct LineParser: ParserPrinter {
var body: some ParserPrinter<Substring.UTF8View, [String]> {
Many {
FieldParser()
} separator: {
",".utf8
}
}
}
struct CSVParser: ParserPrinter {
var body: some ParserPrinter<Substring.UTF8View, [[String]]> {
Many {
LineParser()
} separator: {
"\n".utf8
} terminator: {
End()
}
}
}
let expectedRowCount = 1_000
let expectedColumnCount = 5
var output: [[String]] = []
let csv = CSVParser()
suite.benchmark("Parser") {
output = try csv.parse(csvInput)
} tearDown: {
precondition(output.count == expectedRowCount)
precondition(output.allSatisfy { $0.count == expectedColumnCount })
precondition(try! csv.parse(csv.print(output)) == output)
}
suite.benchmark("Ad hoc mutating methods") {
var input = csvInput[...].utf8
output = input.parseCsv()
} tearDown: {
precondition(output.count == expectedRowCount)
precondition(output.allSatisfy { $0.count == expectedColumnCount })
}
}
extension Substring.UTF8View {
fileprivate mutating func parseCsv() -> [[String]] {
var results: [[String]] = []
while !self.isEmpty {
results.append(self.parseLine())
}
return results
}
fileprivate mutating func parseLine() -> [String] {
var row: [String] = []
while !self.isEmpty {
row.append(self.parseField())
if self.first == UTF8.CodeUnit(ascii: "\n") {
self.removeFirst()
break
} else if self.first == UTF8.CodeUnit(ascii: ",") {
self.removeFirst()
}
}
return row
}
fileprivate mutating func parseField() -> String {
if self.first == UTF8.CodeUnit(ascii: "\"") {
return String(Substring(self.parseQuotedField()))
} else {
return String(Substring(self.parsePlainField()))
}
}
fileprivate mutating func parseQuotedField() -> Substring.UTF8View {
self.removeFirst()
let field = self.remove(while: { $0 != UTF8.CodeUnit(ascii: "\"") })
self.removeFirst()
return field
}
fileprivate mutating func parsePlainField() -> Substring.UTF8View {
self.remove(while: { $0 != UTF8.CodeUnit(ascii: "\n") && $0 != UTF8.CodeUnit(ascii: ",") })
}
fileprivate mutating func remove(
while condition: (Substring.UTF8View.Element) -> Bool
) -> Substring.UTF8View {
let prefix = self.prefix(while: condition)
self.removeFirst(prefix.count)
return prefix
}
}