Skip to content

Commit daba6df

Browse files
committed
Extract String extensions to separate file
1 parent d706d44 commit daba6df

File tree

2 files changed

+156
-155
lines changed

2 files changed

+156
-155
lines changed

Sources/Tokenizers/PreTokenizer.swift

Lines changed: 0 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -238,158 +238,3 @@ class SplitPreTokenizer: PreTokenizer {
238238
return pattern.split(text, invert: invert)
239239
}
240240
}
241-
242-
enum StringSplitPattern {
243-
case regexp(regexp: String)
244-
case string(pattern: String)
245-
246-
func split(_ text: String, invert: Bool = true) -> [String] {
247-
switch self {
248-
case let .regexp(regexp):
249-
text.split(by: regexp, includeSeparators: true)
250-
case let .string(substring):
251-
text.split(by: substring, options: [], includeSeparators: !invert)
252-
}
253-
}
254-
255-
static func from(config: Config) -> StringSplitPattern? {
256-
if let pattern = config.pattern.String.string() {
257-
return .string(pattern: pattern)
258-
}
259-
if let pattern = config.pattern.Regex.string() {
260-
return .regexp(regexp: pattern)
261-
}
262-
return nil
263-
}
264-
}
265-
266-
enum SplitDelimiterBehavior {
267-
case removed
268-
case isolated
269-
case mergedWithPrevious
270-
case mergedWithNext
271-
}
272-
273-
extension String {
274-
func ranges(of string: String, options: CompareOptions = .regularExpression) -> [Range<Index>] {
275-
var result: [Range<Index>] = []
276-
var start = startIndex
277-
while let range = range(of: string, options: options, range: start..<endIndex) {
278-
result.append(range)
279-
start = range.lowerBound < range.upperBound ? range.upperBound : index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
280-
}
281-
return result
282-
}
283-
284-
func split(by string: String, options: CompareOptions = .regularExpression, includeSeparators: Bool = false, omittingEmptySubsequences: Bool = true) -> [String] {
285-
var result: [String] = []
286-
var start = startIndex
287-
while let range = range(of: string, options: options, range: start..<endIndex) {
288-
// Prevent empty strings
289-
if omittingEmptySubsequences, start < range.lowerBound {
290-
result.append(String(self[start..<range.lowerBound]))
291-
}
292-
if includeSeparators {
293-
result.append(String(self[range]))
294-
}
295-
start = range.upperBound
296-
}
297-
298-
if omittingEmptySubsequences, start < endIndex {
299-
result.append(String(self[start...]))
300-
}
301-
return result
302-
}
303-
304-
/// This version supports capture groups, wheres the one above doesn't
305-
func split(by captureRegex: NSRegularExpression) -> [String] {
306-
// Find the matching capture groups
307-
let selfRange = NSRange(startIndex..<endIndex, in: self)
308-
let matches = captureRegex.matches(in: self, options: [], range: selfRange)
309-
310-
if matches.isEmpty { return [self] }
311-
312-
var result: [String] = []
313-
var start = startIndex
314-
315-
for match in matches {
316-
// IMPORTANT: convert from NSRange to Range<String.Index>
317-
// https://stackoverflow.com/questions/75543272/convert-a-given-utf8-nsrange-in-a-string-to-a-utf16-nsrange
318-
guard let matchRange = Range(match.range, in: self) else { continue }
319-
320-
// Add text before the match
321-
if start < matchRange.lowerBound {
322-
result.append(String(self[start..<matchRange.lowerBound]))
323-
}
324-
325-
// Move start to after the match
326-
start = matchRange.upperBound
327-
328-
// Append separator, supporting capture groups
329-
for r in (0..<match.numberOfRanges).reversed() {
330-
let nsRange = match.range(at: r)
331-
if let sepRange = Range(nsRange, in: self) {
332-
result.append(String(self[sepRange]))
333-
break
334-
}
335-
}
336-
}
337-
338-
// Append remaining suffix
339-
if start < endIndex {
340-
result.append(String(self[start...]))
341-
}
342-
343-
return result
344-
}
345-
346-
func split(by string: String, options: CompareOptions = .regularExpression, behavior: SplitDelimiterBehavior) -> [String] {
347-
func mergedWithNext(ranges: [Range<String.Index>]) -> [Range<String.Index>] {
348-
var merged: [Range<String.Index>] = []
349-
var currentStart = startIndex
350-
for range in ranges {
351-
if range.lowerBound == startIndex { continue }
352-
let mergedRange = currentStart..<range.lowerBound
353-
currentStart = range.lowerBound
354-
merged.append(mergedRange)
355-
}
356-
if currentStart < endIndex {
357-
merged.append(currentStart..<endIndex)
358-
}
359-
return merged
360-
}
361-
362-
func mergedWithPrevious(ranges: [Range<String.Index>]) -> [Range<String.Index>] {
363-
var merged: [Range<String.Index>] = []
364-
var currentStart = startIndex
365-
for range in ranges {
366-
let mergedRange = currentStart..<range.upperBound
367-
currentStart = range.upperBound
368-
merged.append(mergedRange)
369-
}
370-
if currentStart < endIndex {
371-
merged.append(currentStart..<endIndex)
372-
}
373-
return merged
374-
}
375-
376-
switch behavior {
377-
case .removed:
378-
return split(by: string, options: options, includeSeparators: false)
379-
case .isolated:
380-
return split(by: string, options: options, includeSeparators: true)
381-
case .mergedWithNext:
382-
// Obtain ranges and merge them
383-
// "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 2), (3, 8), (9, 9), (10, end)
384-
let ranges = ranges(of: string, options: options)
385-
let merged = mergedWithNext(ranges: ranges)
386-
return merged.map { String(self[$0]) }
387-
case .mergedWithPrevious:
388-
// Obtain ranges and merge them
389-
// "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 3), (4, 9), (10, 10), (11, end)
390-
let ranges = ranges(of: string, options: options)
391-
let merged = mergedWithPrevious(ranges: ranges)
392-
return merged.map { String(self[$0]) }
393-
}
394-
}
395-
}
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import Foundation
2+
3+
enum StringSplitPattern {
4+
case regexp(regexp: String)
5+
case string(pattern: String)
6+
7+
func split(_ text: String, invert: Bool = true) -> [String] {
8+
switch self {
9+
case let .regexp(regexp):
10+
text.split(by: regexp, includeSeparators: true)
11+
case let .string(substring):
12+
text.split(by: substring, options: [], includeSeparators: !invert)
13+
}
14+
}
15+
16+
static func from(config: Config) -> StringSplitPattern? {
17+
if let pattern = config.pattern.String.string() {
18+
return .string(pattern: pattern)
19+
}
20+
if let pattern = config.pattern.Regex.string() {
21+
return .regexp(regexp: pattern)
22+
}
23+
return nil
24+
}
25+
}
26+
27+
enum SplitDelimiterBehavior {
28+
case removed
29+
case isolated
30+
case mergedWithPrevious
31+
case mergedWithNext
32+
}
33+
34+
extension String {
35+
func ranges(of string: String, options: CompareOptions = .regularExpression) -> [Range<Index>] {
36+
var result: [Range<Index>] = []
37+
var start = startIndex
38+
while let range = range(of: string, options: options, range: start..<endIndex) {
39+
result.append(range)
40+
start = range.lowerBound < range.upperBound ? range.upperBound : index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
41+
}
42+
return result
43+
}
44+
45+
func split(by string: String, options: CompareOptions = .regularExpression, includeSeparators: Bool = false, omittingEmptySubsequences: Bool = true) -> [String] {
46+
var result: [String] = []
47+
var start = startIndex
48+
while let range = range(of: string, options: options, range: start..<endIndex) {
49+
// Prevent empty strings
50+
if omittingEmptySubsequences, start < range.lowerBound {
51+
result.append(String(self[start..<range.lowerBound]))
52+
}
53+
if includeSeparators {
54+
result.append(String(self[range]))
55+
}
56+
start = range.upperBound
57+
}
58+
59+
if omittingEmptySubsequences, start < endIndex {
60+
result.append(String(self[start...]))
61+
}
62+
return result
63+
}
64+
65+
/// This version supports capture groups, wheres the one above doesn't
66+
func split(by captureRegex: NSRegularExpression) -> [String] {
67+
// Find the matching capture groups
68+
let selfRange = NSRange(startIndex..<endIndex, in: self)
69+
let matches = captureRegex.matches(in: self, options: [], range: selfRange)
70+
71+
if matches.isEmpty { return [self] }
72+
73+
var result: [String] = []
74+
var start = startIndex
75+
76+
for match in matches {
77+
// IMPORTANT: convert from NSRange to Range<String.Index>
78+
// https://stackoverflow.com/questions/75543272/convert-a-given-utf8-nsrange-in-a-string-to-a-utf16-nsrange
79+
guard let matchRange = Range(match.range, in: self) else { continue }
80+
81+
// Add text before the match
82+
if start < matchRange.lowerBound {
83+
result.append(String(self[start..<matchRange.lowerBound]))
84+
}
85+
86+
// Move start to after the match
87+
start = matchRange.upperBound
88+
89+
// Append separator, supporting capture groups
90+
for r in (0..<match.numberOfRanges).reversed() {
91+
let nsRange = match.range(at: r)
92+
if let sepRange = Range(nsRange, in: self) {
93+
result.append(String(self[sepRange]))
94+
break
95+
}
96+
}
97+
}
98+
99+
// Append remaining suffix
100+
if start < endIndex {
101+
result.append(String(self[start...]))
102+
}
103+
104+
return result
105+
}
106+
107+
func split(by string: String, options: CompareOptions = .regularExpression, behavior: SplitDelimiterBehavior) -> [String] {
108+
func mergedWithNext(ranges: [Range<String.Index>]) -> [Range<String.Index>] {
109+
var merged: [Range<String.Index>] = []
110+
var currentStart = startIndex
111+
for range in ranges {
112+
if range.lowerBound == startIndex { continue }
113+
let mergedRange = currentStart..<range.lowerBound
114+
currentStart = range.lowerBound
115+
merged.append(mergedRange)
116+
}
117+
if currentStart < endIndex {
118+
merged.append(currentStart..<endIndex)
119+
}
120+
return merged
121+
}
122+
123+
func mergedWithPrevious(ranges: [Range<String.Index>]) -> [Range<String.Index>] {
124+
var merged: [Range<String.Index>] = []
125+
var currentStart = startIndex
126+
for range in ranges {
127+
let mergedRange = currentStart..<range.upperBound
128+
currentStart = range.upperBound
129+
merged.append(mergedRange)
130+
}
131+
if currentStart < endIndex {
132+
merged.append(currentStart..<endIndex)
133+
}
134+
return merged
135+
}
136+
137+
switch behavior {
138+
case .removed:
139+
return split(by: string, options: options, includeSeparators: false)
140+
case .isolated:
141+
return split(by: string, options: options, includeSeparators: true)
142+
case .mergedWithNext:
143+
// Obtain ranges and merge them
144+
// "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 2), (3, 8), (9, 9), (10, end)
145+
let ranges = ranges(of: string, options: options)
146+
let merged = mergedWithNext(ranges: ranges)
147+
return merged.map { String(self[$0]) }
148+
case .mergedWithPrevious:
149+
// Obtain ranges and merge them
150+
// "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 3), (4, 9), (10, 10), (11, end)
151+
let ranges = ranges(of: string, options: options)
152+
let merged = mergedWithPrevious(ranges: ranges)
153+
return merged.map { String(self[$0]) }
154+
}
155+
}
156+
}

0 commit comments

Comments
 (0)