Skip to content

Commit 7bf5093

Browse files
author
Hendrik van Antwerpen
committed
Use the right tiktoken encoding for comparison
1 parent aa14609 commit 7bf5093

File tree

3 files changed

+6
-5
lines changed

3 files changed

+6
-5
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ Cargo.lock
22
/target/
33
/crates/*/target/
44
/crates/*/Cargo.lock
5-
.vscode/
5+
.vscode/

crates/bpe/benches/counting.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,10 @@ fn counting_benchmark(c: &mut Criterion) {
3434
}
3535

3636
fn encoding_benchmark(c: &mut Criterion) {
37-
for (name, bpe) in [
38-
("cl100k", BytePairEncoding::cl100k()),
39-
("o200k", BytePairEncoding::o200k()),
37+
for (name, bpe, tiktoken) in [
38+
("cl100k", BytePairEncoding::cl100k(), tiktoken_rs::cl100k_base().unwrap()),
39+
("o200k", BytePairEncoding::o200k(), tiktoken_rs::o200k_base().unwrap()),
4040
] {
41-
let tiktoken = tiktoken_rs::cl100k_base().unwrap();
4241
let text = create_test_string(&bpe, 20000);
4342
let input = text.as_bytes();
4443

criterion.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# save report in this directory, even if a custom target directory is set
2+
criterion_home = "./target/criterion"

0 commit comments

Comments
 (0)