Skip to content

Commit 5a00a6c

Browse files
committed
Fix data race in PreTrainedTokenizer
1 parent 6ec89b6 commit 5a00a6c

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

Sources/Tokenizers/Tokenizer.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
477477
/// Cache for compiled Jinja templates keyed by their literal template string
478478
private var compiledChatTemplateCache: [String: Template] = [:]
479479

480+
/// Lock to protect the compiled chat template cache from concurrent access
481+
private let cacheLock = NSLock()
482+
480483
/// Initializes a tokenizer from Hugging Face configuration files.
481484
///
482485
/// - Parameters:
@@ -531,10 +534,26 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
531534
}
532535

533536
private func compiledTemplate(for templateString: String) throws -> Template {
537+
// Fast path: check cache under lock
538+
cacheLock.lock()
534539
if let cached = compiledChatTemplateCache[templateString] {
540+
cacheLock.unlock()
535541
return cached
536542
}
543+
cacheLock.unlock()
544+
545+
// Compile template outside of lock to avoid holding lock during expensive operation
537546
let compiled = try Template(templateString)
547+
548+
// Insert into cache under lock (using double-checked locking pattern)
549+
cacheLock.lock()
550+
defer { cacheLock.unlock() }
551+
552+
// Check again in case another thread compiled the same template
553+
if let cached = compiledChatTemplateCache[templateString] {
554+
return cached
555+
}
556+
538557
compiledChatTemplateCache[templateString] = compiled
539558
return compiled
540559
}

0 commit comments

Comments
 (0)