From 7b9423734c74e1c32dcad07a62f3275e0943813b Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Wed, 16 Oct 2024 15:23:39 +0200 Subject: [PATCH] Restore parameters --- crates/bpe/benchmarks/performance.rs | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/crates/bpe/benchmarks/performance.rs b/crates/bpe/benchmarks/performance.rs index c3225ff..3aeb0dc 100644 --- a/crates/bpe/benchmarks/performance.rs +++ b/crates/bpe/benchmarks/performance.rs @@ -192,19 +192,14 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) { let input = text.as_bytes(); let mut group = c.benchmark_group(format!("worstcase-{name}")); - for bytes in [10, 100, 1000] { //, 5000, 10000, 25000, 50000, 75000, 100000] { + for bytes in [10, 100, 1000, 5000, 10000, 25000, 50000, 75000, 100000] { group.throughput(criterion::Throughput::Bytes(bytes as u64)); group.bench_with_input( BenchmarkId::new("backtracking", bytes), &bytes, |b, bytes| { b.iter_batched( - || { - let text = - std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(); - assert!(bpe.split(text).nth(1).is_none()); - text - }, + || std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(), |text| bpe.encode(text), criterion::BatchSize::SmallInput, ) @@ -212,11 +207,7 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) { ); group.bench_with_input(BenchmarkId::new("tiktoken", bytes), &bytes, |b, bytes| { b.iter_batched( - || { - let text = std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(); - assert!(bpe.split(text).nth(1).is_none()); - text - }, + || std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(), |text| tiktoken.encode_ordinary(text), criterion::BatchSize::SmallInput, ) @@ -226,12 +217,7 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) { &bytes, |b, bytes| { b.iter_batched( - || { - let text = - std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(); - assert!(bpe.split(text).nth(1).is_none()); - text - }, + || std::str::from_utf8(select_test_bytes(input, *bytes)).unwrap(), |text| huggingface.encode_fast(text, false).unwrap(), criterion::BatchSize::SmallInput, )