Skip to content

Commit

Permalink
Restore parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikvanantwerpen committed Oct 16, 2024
1 parent 87731b7 commit 7b94237
Showing 1 changed file with 4 additions and 18 deletions.
22 changes: 4 additions & 18 deletions crates/bpe/benchmarks/performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,31 +192,22 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) {
let input = text.as_bytes();

let mut group = c.benchmark_group(format!("worstcase-{name}"));
for bytes in [10, 100, 1000] { //, 5000, 10000, 25000, 50000, 75000, 100000] {
for bytes in [10, 100, 1000, 5000, 10000, 25000, 50000, 75000, 100000] {
group.throughput(criterion::Throughput::Bytes(bytes as u64));
group.bench_with_input(
BenchmarkId::new("backtracking", bytes),
&bytes,
|b, bytes| {
b.iter_batched(
|| {
let text =
std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap();
assert!(bpe.split(text).nth(1).is_none());
text
},
|| std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(),
|text| bpe.encode(text),
criterion::BatchSize::SmallInput,
)
},
);
group.bench_with_input(BenchmarkId::new("tiktoken", bytes), &bytes, |b, bytes| {
b.iter_batched(
|| {
let text = std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap();
assert!(bpe.split(text).nth(1).is_none());
text
},
|| std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(),
|text| tiktoken.encode_ordinary(text),
criterion::BatchSize::SmallInput,
)
Expand All @@ -226,12 +217,7 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) {
&bytes,
|b, bytes| {
b.iter_batched(
|| {
let text =
std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap();
assert!(bpe.split(text).nth(1).is_none());
text
},
|| std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(),
|text| huggingface.encode_fast(text, false).unwrap(),
criterion::BatchSize::SmallInput,
)
Expand Down

0 comments on commit 7b94237

Please sign in to comment.