@@ -192,31 +192,22 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) {
192192 let input = text. as_bytes ( ) ;
193193
194194 let mut group = c. benchmark_group ( format ! ( "worstcase-{name}" ) ) ;
195- for bytes in [ 10 , 100 , 1000 ] { // , 5000, 10000, 25000, 50000, 75000, 100000] {
195+ for bytes in [ 10 , 100 , 1000 , 5000 , 10000 , 25000 , 50000 , 75000 , 100000 ] {
196196 group. throughput ( criterion:: Throughput :: Bytes ( bytes as u64 ) ) ;
197197 group. bench_with_input (
198198 BenchmarkId :: new ( "backtracking" , bytes) ,
199199 & bytes,
200200 |b, bytes| {
201201 b. iter_batched (
202- || {
203- let text =
204- std:: str:: from_utf8 ( select_test_bytes ( input, * bytes) ) . unwrap ( ) ;
205- assert ! ( bpe. split( text) . nth( 1 ) . is_none( ) ) ;
206- text
207- } ,
202+ || std:: str:: from_utf8 ( select_test_bytes ( input, * bytes) ) . unwrap ( ) ,
208203 |text| bpe. encode ( text) ,
209204 criterion:: BatchSize :: SmallInput ,
210205 )
211206 } ,
212207 ) ;
213208 group. bench_with_input ( BenchmarkId :: new ( "tiktoken" , bytes) , & bytes, |b, bytes| {
214209 b. iter_batched (
215- || {
216- let text = std:: str:: from_utf8 ( select_test_bytes ( input, * bytes) ) . unwrap ( ) ;
217- assert ! ( bpe. split( text) . nth( 1 ) . is_none( ) ) ;
218- text
219- } ,
210+ || std:: str:: from_utf8 ( select_test_bytes ( input, * bytes) ) . unwrap ( ) ,
220211 |text| tiktoken. encode_ordinary ( text) ,
221212 criterion:: BatchSize :: SmallInput ,
222213 )
@@ -226,12 +217,7 @@ fn worstcase_comparison_benchmark(c: &mut Criterion) {
226217 & bytes,
227218 |b, bytes| {
228219 b. iter_batched (
229- || {
230- let text =
231- std:: str:: from_utf8 ( select_test_bytes ( input, * bytes) ) . unwrap ( ) ;
232- assert ! ( bpe. split( text) . nth( 1 ) . is_none( ) ) ;
233- text
234- } ,
220+ || std:: str:: from_utf8 ( select_test_bytes ( input, * bytes) ) . unwrap ( ) ,
235221 |text| huggingface. encode_fast ( text, false ) . unwrap ( ) ,
236222 criterion:: BatchSize :: SmallInput ,
237223 )
0 commit comments