Skip to content

Commit 4bb7533

Browse files
authored
[Rust] Bump huggingface tokenizer to 0.20.0 (#49)
This PR bumps the huggignface tokenizer dependency to version 0.20.0 to address the tokenizer issue in some latest models with latest trained tokenizers.
1 parent 528d695 commit 4bb7533

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

rust/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ crate-type = ["staticlib"]
88

99
[dependencies]
1010

11-
tokenizers = { version = "0.19.1", default-features = false, features = ["onig"] }
11+
tokenizers = { version = "0.20.0", default-features = false, features = ["onig"] }
1212
serde = { version = "1.0", features = [ "derive" ] }
1313
serde_json = "1.0"

rust/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ impl TokenizerWrapper {
7777
);
7878
let mut tokenizer = Tokenizer::new(BPE::new(vocab, merges));
7979
tokenizer
80-
.with_pre_tokenizer(byte_level)
81-
.with_decoder(byte_level);
80+
.with_pre_tokenizer(Some(byte_level))
81+
.with_decoder(Some(byte_level));
8282
TokenizerWrapper {
8383
tokenizer: tokenizer,
8484
decode_str: String::new(),

0 commit comments

Comments
 (0)