Skip to content

Commit f5ec29f

Browse files
committed
Consolidate string splitting tests into PreTokenizerTests
1 parent a3f17e3 commit f5ec29f

File tree

2 files changed

+64
-123
lines changed

2 files changed

+64
-123
lines changed

Tests/TokenizersTests/PreTokenizerTests.swift

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,70 @@ struct PreTokenizerTests {
176176
)
177177
}
178178

179+
@Test("Split behavior merged with previous")
180+
func splitBehaviorMergedWithPrevious() {
181+
#expect(
182+
"the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) ==
183+
["the-", "final-", "-", "countdown"]
184+
)
185+
186+
#expect(
187+
"the-final--countdown-".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) ==
188+
["the-", "final-", "-", "countdown-"]
189+
)
190+
191+
#expect(
192+
"the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) ==
193+
["the-", "final-", "-", "countdown-", "-"]
194+
)
195+
196+
#expect(
197+
"-the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) ==
198+
["-", "the-", "final-", "-", "countdown-", "-"]
199+
)
200+
201+
#expect(
202+
"--the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) ==
203+
["-", "-", "the-", "final-", "-", "countdown-", "-"]
204+
)
205+
}
206+
207+
@Test("Split behavior merged with next")
208+
func splitBehaviorMergedWithNext() {
209+
#expect(
210+
"the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) ==
211+
["the", "-final", "-", "-countdown"]
212+
)
213+
214+
#expect(
215+
"-the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) ==
216+
["-the", "-final", "-", "-countdown"]
217+
)
218+
219+
#expect(
220+
"--the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) ==
221+
["-", "-the", "-final", "-", "-countdown"]
222+
)
223+
224+
#expect(
225+
"--the-final--countdown-".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) ==
226+
["-", "-the", "-final", "-", "-countdown", "-"]
227+
)
228+
}
229+
230+
@Test("Split behavior other")
231+
func splitBehaviorOther() {
232+
#expect(
233+
"the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .isolated) ==
234+
["the", "-", "final", "-", "-", "countdown"]
235+
)
236+
237+
#expect(
238+
"the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .removed) ==
239+
["the", "final", "countdown"]
240+
)
241+
}
242+
179243
/// https://github.com/huggingface/tokenizers/pull/1357
180244
@Test("Metaspace pre-tokenizer with prefix space handling")
181245
func metaspacePreTokenizer() {

Tests/TokenizersTests/SplitTests.swift

Lines changed: 0 additions & 123 deletions
This file was deleted.

0 commit comments

Comments
 (0)