Skip to content

Commit 94e9312

Browse files
committed
add tokenizerFromLocalFolder test
1 parent f781c3b commit 94e9312

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

Tests/TokenizersTests/TokenizerTests.swift

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,41 @@ struct TokenizerTests {
177177
#expect(tokenizer.encode(text: "<|im_start|>user<|im_sep|>Who are you?<|im_end|><|im_start|>assistant<|im_sep|>") == [100264, 882, 100266, 15546, 527, 499, 30, 100265, 100264, 78191, 100266])
178178
}
179179

180+
@Test
181+
func tokenizerFromLocalFolder() async throws {
182+
let bundle = Bundle.module
183+
guard
184+
let tokenizerConfigURL = bundle.url(
185+
forResource: "tokenizer_config",
186+
withExtension: "json",
187+
subdirectory: "Offline"
188+
),
189+
bundle.url(
190+
forResource: "tokenizer",
191+
withExtension: "json",
192+
subdirectory: "Offline"
193+
) != nil
194+
else {
195+
Issue.record("Missing offline tokenizer fixtures")
196+
return
197+
}
198+
199+
let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerConfigURL.deletingLastPathComponent())
200+
201+
let tokenizerConfigOpt = try await configuration.tokenizerConfig
202+
#expect(tokenizerConfigOpt != nil)
203+
let tokenizerConfig = tokenizerConfigOpt!
204+
let tokenizerData = try await configuration.tokenizerData
205+
206+
let tokenizer = try AutoTokenizer.from(
207+
tokenizerConfig: tokenizerConfig,
208+
tokenizerData: tokenizerData
209+
)
210+
211+
let encoded = tokenizer.encode(text: "offline path")
212+
#expect(!encoded.isEmpty)
213+
}
214+
180215
/// https://github.com/huggingface/swift-transformers/issues/96
181216
@Test
182217
func legacyLlamaBehaviour() async throws {

0 commit comments

Comments
 (0)