Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ struct TransformersCLI: AsyncParsableCommand {
@Option(help: "Repetition penalty to discourage repeating tokens (typical: 1.0-2.0, 1.0 = no penalty)")
var repetitionPenalty: Float?

@Option(help: "Path to a local folder containing tokenizer_config.json and tokenizer.json")
var tokenizerPath: String?

func generate(
model: LanguageModel,
config: GenerationConfig,
Expand Down Expand Up @@ -104,7 +107,17 @@ struct TransformersCLI: AsyncParsableCommand {
let url = URL(filePath: modelPath)
let compiledURL = try compile(at: url)
print("Loading model \(compiledURL)")
let model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
let model: LanguageModel
if let tokenizerFolder {
let tokenizerURL = URL(filePath: tokenizerFolder, directoryHint: .isDirectory)
model = try LanguageModel.loadCompiled(
url: compiledURL,
tokenizerFolder: tokenizerURL,
computeUnits: computeUnits.asMLComputeUnits
)
} else {
model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
}

var config = model.defaultGenerationConfig
config.doSample = doSample
Expand Down
32 changes: 30 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,36 @@ example converting and running Mistral 7B using CoreML [here](https://github.com

The [modernization of Core ML](https://github.com/huggingface/swift-transformers/pull/257) and corresponding examples were primarily contributed by @joshnewnham, @1duo, @alejandro-isaza, @aseemw. Thank you 🙏

### Offline CoreML tokenizers

When you bundle a compiled CoreML model and tokenizer files with your app, you can skip any network requests by injecting
the tokenizer when constructing `LanguageModel`:

```swift
let compiledURL: URL = ... // path to .mlmodelc
let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json

// Construct the tokenizer from local files (inside an async context)
let tokenizer = try await AutoTokenizer.from(modelFolder: tokenizerFolder)
let model = try LanguageModel.loadCompiled(
url: compiledURL,
tokenizer: tokenizer
)
```

Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint or are compatible with the model you use. For the
Mistral example in `Examples/Mistral7B/`, you can fetch the tokenizer like this:

```bash
huggingface-cli download \
mistralai/Mistral-7B-Instruct-v0.3 \
tokenizer.json tokenizer_config.json \
--local-dir Examples/Mistral7B/local-tokenizer
```

If the repo is gated, authenticate with `huggingface-cli login` first. Both initializers reuse the tokenizer
you pass in and never reach out to the Hugging Face Hub.

## Usage via SwiftPM

To use `swift-transformers` with SwiftPM, you can add this to your `Package.swift`:
Expand Down Expand Up @@ -139,5 +169,3 @@ To format your code, run `swift format -i --recursive .`.
## License

[Apache 2](LICENSE).


61 changes: 50 additions & 11 deletions Sources/Models/LanguageModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,26 @@ public class LanguageModel {

/// Creates a new language model instance from a CoreML model.
///
/// - Parameter model: The CoreML model to wrap
/// - Parameters:
/// - model: The CoreML model to wrap
/// - tokenizer: Optional preconstructed tokenizer to reuse
/// - Important: Triggers a fatal error if the model doesn't have the expected input shape information
public required init(model: MLModel) {
public required init(
model: MLModel,
tokenizer: Tokenizer? = nil
) {
self.model = model
_tokenizer = tokenizer
(minContextLength, maxContextLength) = Self.contextRange(from: model)
configuration = LanguageModelConfigurationFromHub(modelName: modelName)
if tokenizer == nil {
self.configuration = LanguageModelConfigurationFromHub(modelName: modelName)
} else {
self.configuration = nil
}
}

public convenience required init(model: MLModel) {
self.init(model: model, tokenizer: nil)
}

public func resetState() async {}
Expand Down Expand Up @@ -142,15 +156,28 @@ public extension LanguageModel {
/// - Parameters:
/// - url: The URL of the compiled CoreML model file (.mlmodelc)
/// - computeUnits: The compute units to use for model inference
/// - tokenizer: Optional tokenizer instance to reuse
/// - Returns: A configured `LanguageModel` instance
/// - Throws: An error if the model cannot be loaded from the specified URL
static func loadCompiled(url: URL, computeUnits: MLComputeUnits = .cpuAndGPU) throws -> LanguageModel {
static func loadCompiled(
url: URL,
computeUnits: MLComputeUnits = .cpuAndGPU,
tokenizer: Tokenizer? = nil
) throws -> LanguageModel {
let config = MLModelConfiguration()
config.computeUnits = computeUnits
let model = try MLModel(contentsOf: url, configuration: config)
return switch kvCacheAvailability(for: model) {
case .statefulKVCache: LanguageModelWithStatefulKVCache(model: model)
default: LanguageModel(model: model)
case .statefulKVCache:
LanguageModelWithStatefulKVCache(
model: model,
tokenizer: tokenizer
)
default:
LanguageModel(
model: model,
tokenizer: tokenizer
)
}
}
}
Expand Down Expand Up @@ -304,7 +331,8 @@ public extension LanguageModel {
/// - Throws: An error if the configuration cannot be loaded
var modelConfig: Config? {
get async throws {
try await configuration!.modelConfig
guard let configuration else { return nil }
return try await configuration.modelConfig
}
}

Expand All @@ -314,7 +342,8 @@ public extension LanguageModel {
/// - Throws: An error if the configuration cannot be loaded
var tokenizerConfig: Config? {
get async throws {
try await configuration!.tokenizerConfig
guard let configuration else { return nil }
return try await configuration.tokenizerConfig
}
}

Expand All @@ -324,7 +353,10 @@ public extension LanguageModel {
/// - Throws: An error if the tokenizer data cannot be loaded
var tokenizerData: Config {
get async throws {
try await configuration!.tokenizerData
guard let configuration else {
throw TokenizerError.missingConfig
}
return try await configuration.tokenizerData
}
}

Expand Down Expand Up @@ -459,8 +491,11 @@ public class LanguageModelWithStatefulKVCache: LanguageModel {

var state: MLState?

public required init(model: MLModel) {
super.init(model: model)
public required init(
model: MLModel,
tokenizer: Tokenizer? = nil
) {
super.init(model: model, tokenizer: tokenizer)
// To support pre-filling and extend, the input must support
// flexible shapes.
guard maxContextLength - minContextLength > 1 else {
Expand Down Expand Up @@ -531,11 +566,15 @@ public class LanguageModelWithStatefulKVCache: LanguageModel {
public enum TokenizerError: LocalizedError {
/// The tokenizer configuration file could not be found.
case tokenizerConfigNotFound
/// The language model configuration required to load tokenizer data is missing.
case missingConfig

public var errorDescription: String? {
switch self {
case .tokenizerConfigNotFound:
String(localized: "Tokenizer configuration could not be found. The model may be missing required tokenizer files.", comment: "Error when tokenizer configuration is missing")
case .missingConfig:
String(localized: "Language model configuration was not set, tokenizer assets could not be loaded.", comment: "Error when configuration needed for tokenizer data is missing")
}
}
}
Expand Down
51 changes: 51 additions & 0 deletions Tests/TokenizersTests/Resources/tokenizer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<bos>"
},
{
"id": 1,
"content": "<pad>"
},
{
"id": 2,
"content": "<eos>"
},
{
"id": 3,
"content": "<unk>"
}
],
"model": {
"type": "BPE",
"vocab": {
"<bos>": 0,
"<pad>": 1,
"<eos>": 2,
"<unk>": 3,
"offline": 4,
"path": 5,
"_": 6
},
"merges": [
"off line",
"li ne",
"pa th",
"_ of",
"_ pa"
],
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"unk_token": "<unk>"
},
"normalizer": {
"type": "Lowercase"
},
"pre_tokenizer": {
"type": "Whitespace"
}
}
9 changes: 9 additions & 0 deletions Tests/TokenizersTests/Resources/tokenizer_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"tokenizer_class": "GPT2Tokenizer",
"bos_token": "<bos>",
"eos_token": "<eos>",
"unk_token": "<unk>",
"pad_token": "<pad>",
"model_max_length": 128,
"do_lower_case": false
}
33 changes: 33 additions & 0 deletions Tests/TokenizersTests/TokenizerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,39 @@ struct TokenizerTests {
#expect(tokenizer.encode(text: "<|im_start|>user<|im_sep|>Who are you?<|im_end|><|im_start|>assistant<|im_sep|>") == [100264, 882, 100266, 15546, 527, 499, 30, 100265, 100264, 78191, 100266])
}

@Test
func tokenizerFromLocalFolder() async throws {
let bundle = Bundle.module
guard
let tokenizerConfigURL = bundle.url(
forResource: "tokenizer_config",
withExtension: "json"
),
bundle.url(
forResource: "tokenizer",
withExtension: "json"
) != nil
else {
Issue.record("Missing offline tokenizer fixtures")
return
}

let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerConfigURL.deletingLastPathComponent())

let tokenizerConfigOpt = try await configuration.tokenizerConfig
#expect(tokenizerConfigOpt != nil)
let tokenizerConfig = tokenizerConfigOpt!
let tokenizerData = try await configuration.tokenizerData

let tokenizer = try AutoTokenizer.from(
tokenizerConfig: tokenizerConfig,
tokenizerData: tokenizerData
)

let encoded = tokenizer.encode(text: "offline path")
#expect(!encoded.isEmpty)
}

/// https://github.com/huggingface/swift-transformers/issues/96
@Test
func legacyLlamaBehaviour() async throws {
Expand Down