@@ -14,7 +14,7 @@ import Hub
1414/// This tokenizer performs basic tokenization (whitespace and punctuation splitting)
1515/// followed by WordPiece subword tokenization, which is the approach used by BERT
1616/// and related models.
17- public class BertTokenizer : @ unchecked Sendable {
17+ public final class BertTokenizer : Sendable {
1818 private let basicTokenizer : BasicTokenizer
1919 private let wordpieceTokenizer : WordpieceTokenizer
2020 private let maxLen = 512
@@ -27,16 +27,16 @@ public class BertTokenizer: @unchecked Sendable {
2727 private let ids_to_tokens : [ Int : String ]
2828
2929 /// The beginning-of-sequence token string, if defined.
30- public var bosToken : String ?
30+ public let bosToken : String ?
3131
3232 /// The numeric ID of the beginning-of-sequence token, if defined.
33- public var bosTokenId : Int ?
33+ public let bosTokenId : Int ?
3434
3535 /// The end-of-sequence token string, if defined.
36- public var eosToken : String ?
36+ public let eosToken : String ?
3737
3838 /// The numeric ID of the end-of-sequence token, if defined.
39- public var eosTokenId : Int ?
39+ public let eosTokenId : Int ?
4040
4141 /// Whether consecutive unknown tokens should be fused together.
4242 public let fuseUnknownTokens : Bool
@@ -225,7 +225,7 @@ extension BertTokenizer: PreTrainedTokenizerModel {
225225 }
226226}
227227
228- class BasicTokenizer {
228+ final class BasicTokenizer : Sendable {
229229 let doLowerCase : Bool
230230
231231 init ( doLowerCase: Bool = true ) {
@@ -291,7 +291,7 @@ private extension Character {
291291 }
292292}
293293
294- class WordpieceTokenizer {
294+ final class WordpieceTokenizer : Sendable {
295295 let unkToken = " [UNK] "
296296 private let maxInputCharsPerWord = 100
297297 private let vocab : [ String : Int ]
0 commit comments