@@ -415,6 +415,219 @@ impl Decompressor {
415
415
output : & mut [ u8 ] ,
416
416
mut output_index : usize ,
417
417
) -> Result < usize , DecompressionError > {
418
+ // Fast decoding loop.
419
+ //
420
+ // This loop is optimized for speed and is the main decoding loop for the decompressor,
421
+ // which is used when there are at least 8 bytes of input and output data available. It
422
+ // assumes that the bitbuffer is full (nbits >= 56) and that litlen_entry has been loaded.
423
+ //
424
+ // These assumptions enable a few optimizations:
425
+ // - Nearly all checks for nbits are avoided.
426
+ // - Checking the input size is optimized out in the refill function call.
427
+ // - The litlen_entry for the next loop iteration can be loaded in parallel with refilling
428
+ // the bit buffer. This is because when the input is non-empty, the bit buffer actually
429
+ // has 64-bits of valid data (even though nbits will be in 56..=63).
430
+ self . fill_buffer ( remaining_input) ;
431
+ let mut litlen_entry = self . compression . litlen_table [ ( self . buffer & 0xfff ) as usize ] ;
432
+ while self . state == State :: CompressedData
433
+ && output_index + 8 <= output. len ( )
434
+ && remaining_input. len ( ) >= 8
435
+ {
436
+ // First check whether the next symbol is a literal. This code does up to 2 additional
437
+ // table lookups to decode more literals.
438
+ let mut bits;
439
+ let mut litlen_code_bits = litlen_entry as u8 ;
440
+ if litlen_entry & LITERAL_ENTRY != 0 {
441
+ let litlen_entry2 = self . compression . litlen_table
442
+ [ ( self . buffer >> litlen_code_bits & 0xfff ) as usize ] ;
443
+ let litlen_code_bits2 = litlen_entry2 as u8 ;
444
+ let litlen_entry3 = self . compression . litlen_table
445
+ [ ( self . buffer >> ( litlen_code_bits + litlen_code_bits2) & 0xfff ) as usize ] ;
446
+ let litlen_code_bits3 = litlen_entry3 as u8 ;
447
+ let litlen_entry4 = self . compression . litlen_table [ ( self . buffer
448
+ >> ( litlen_code_bits + litlen_code_bits2 + litlen_code_bits3)
449
+ & 0xfff )
450
+ as usize ] ;
451
+
452
+ let advance_output_bytes = ( ( litlen_entry & 0xf00 ) >> 8 ) as usize ;
453
+ output[ output_index] = ( litlen_entry >> 16 ) as u8 ;
454
+ output[ output_index + 1 ] = ( litlen_entry >> 24 ) as u8 ;
455
+ output_index += advance_output_bytes;
456
+
457
+ if litlen_entry2 & LITERAL_ENTRY != 0 {
458
+ let advance_output_bytes2 = ( ( litlen_entry2 & 0xf00 ) >> 8 ) as usize ;
459
+ output[ output_index] = ( litlen_entry2 >> 16 ) as u8 ;
460
+ output[ output_index + 1 ] = ( litlen_entry2 >> 24 ) as u8 ;
461
+ output_index += advance_output_bytes2;
462
+
463
+ if litlen_entry3 & LITERAL_ENTRY != 0 {
464
+ let advance_output_bytes3 = ( ( litlen_entry3 & 0xf00 ) >> 8 ) as usize ;
465
+ output[ output_index] = ( litlen_entry3 >> 16 ) as u8 ;
466
+ output[ output_index + 1 ] = ( litlen_entry3 >> 24 ) as u8 ;
467
+ output_index += advance_output_bytes3;
468
+
469
+ litlen_entry = litlen_entry4;
470
+ self . consume_bits ( litlen_code_bits + litlen_code_bits2 + litlen_code_bits3) ;
471
+ self . fill_buffer ( remaining_input) ;
472
+ continue ;
473
+ } else {
474
+ self . consume_bits ( litlen_code_bits + litlen_code_bits2) ;
475
+ litlen_entry = litlen_entry3;
476
+ litlen_code_bits = litlen_code_bits3;
477
+ self . fill_buffer ( remaining_input) ;
478
+ bits = self . buffer ;
479
+ }
480
+ } else {
481
+ self . consume_bits ( litlen_code_bits) ;
482
+ bits = self . buffer ;
483
+ litlen_entry = litlen_entry2;
484
+ litlen_code_bits = litlen_code_bits2;
485
+ if self . nbits < 48 {
486
+ self . fill_buffer ( remaining_input) ;
487
+ }
488
+ }
489
+ } else {
490
+ bits = self . buffer ;
491
+ }
492
+
493
+ // The next symbol is either a 13+ bit literal, back-reference, or an EOF symbol.
494
+ let ( length_base, length_extra_bits, litlen_code_bits) =
495
+ if litlen_entry & EXCEPTIONAL_ENTRY == 0 {
496
+ (
497
+ litlen_entry >> 16 ,
498
+ ( litlen_entry >> 8 ) as u8 ,
499
+ litlen_code_bits,
500
+ )
501
+ } else if litlen_entry & SECONDARY_TABLE_ENTRY != 0 {
502
+ let secondary_table_index =
503
+ ( litlen_entry >> 16 ) + ( ( bits >> 12 ) as u32 & ( litlen_entry & 0xff ) ) ;
504
+ let secondary_entry =
505
+ self . compression . secondary_table [ secondary_table_index as usize ] ;
506
+ let litlen_symbol = secondary_entry >> 4 ;
507
+ let litlen_code_bits = ( secondary_entry & 0xf ) as u8 ;
508
+
509
+ match litlen_symbol {
510
+ 0 ..=255 => {
511
+ self . consume_bits ( litlen_code_bits) ;
512
+ litlen_entry =
513
+ self . compression . litlen_table [ ( self . buffer & 0xfff ) as usize ] ;
514
+ self . fill_buffer ( remaining_input) ;
515
+ output[ output_index] = litlen_symbol as u8 ;
516
+ output_index += 1 ;
517
+ continue ;
518
+ }
519
+ 256 => {
520
+ self . consume_bits ( litlen_code_bits) ;
521
+ self . state = match self . last_block {
522
+ true => State :: Checksum ,
523
+ false => State :: BlockHeader ,
524
+ } ;
525
+ break ;
526
+ }
527
+ _ => (
528
+ LEN_SYM_TO_LEN_BASE [ litlen_symbol as usize - 257 ] as u32 ,
529
+ LEN_SYM_TO_LEN_EXTRA [ litlen_symbol as usize - 257 ] ,
530
+ litlen_code_bits,
531
+ ) ,
532
+ }
533
+ } else if litlen_code_bits == 0 {
534
+ return Err ( DecompressionError :: InvalidLiteralLengthCode ) ;
535
+ } else {
536
+ self . consume_bits ( litlen_code_bits) ;
537
+ self . state = match self . last_block {
538
+ true => State :: Checksum ,
539
+ false => State :: BlockHeader ,
540
+ } ;
541
+ break ;
542
+ } ;
543
+ bits >>= litlen_code_bits;
544
+
545
+ let length_extra_mask = ( 1 << length_extra_bits) - 1 ;
546
+ let length = length_base as usize + ( bits & length_extra_mask) as usize ;
547
+ bits >>= length_extra_bits;
548
+
549
+ let dist_entry = self . compression . dist_table [ ( bits & 0x1ff ) as usize ] ;
550
+ let ( dist_base, dist_extra_bits, dist_code_bits) = if dist_entry & LITERAL_ENTRY != 0 {
551
+ (
552
+ ( dist_entry >> 16 ) as u16 ,
553
+ ( dist_entry >> 8 ) as u8 & 0xf ,
554
+ dist_entry as u8 ,
555
+ )
556
+ } else if dist_entry >> 8 == 0 {
557
+ return Err ( DecompressionError :: InvalidDistanceCode ) ;
558
+ } else {
559
+ let secondary_table_index =
560
+ ( dist_entry >> 16 ) + ( ( bits >> 9 ) as u32 & ( dist_entry & 0xff ) ) ;
561
+ let secondary_entry =
562
+ self . compression . dist_secondary_table [ secondary_table_index as usize ] ;
563
+ let dist_symbol = ( secondary_entry >> 4 ) as usize ;
564
+ if dist_symbol >= 30 {
565
+ return Err ( DecompressionError :: InvalidDistanceCode ) ;
566
+ }
567
+
568
+ (
569
+ DIST_SYM_TO_DIST_BASE [ dist_symbol] ,
570
+ DIST_SYM_TO_DIST_EXTRA [ dist_symbol] ,
571
+ ( secondary_entry & 0xf ) as u8 ,
572
+ )
573
+ } ;
574
+ bits >>= dist_code_bits;
575
+
576
+ let dist = dist_base as usize + ( bits & ( ( 1 << dist_extra_bits) - 1 ) ) as usize ;
577
+ if dist > output_index {
578
+ return Err ( DecompressionError :: DistanceTooFarBack ) ;
579
+ }
580
+
581
+ self . consume_bits (
582
+ litlen_code_bits + length_extra_bits + dist_code_bits + dist_extra_bits,
583
+ ) ;
584
+ self . fill_buffer ( remaining_input) ;
585
+ litlen_entry = self . compression . litlen_table [ ( self . buffer & 0xfff ) as usize ] ;
586
+
587
+ let copy_length = length. min ( output. len ( ) - output_index) ;
588
+ if dist == 1 {
589
+ let last = output[ output_index - 1 ] ;
590
+ output[ output_index..] [ ..copy_length] . fill ( last) ;
591
+
592
+ if copy_length < length {
593
+ self . queued_rle = Some ( ( last, length - copy_length) ) ;
594
+ output_index = output. len ( ) ;
595
+ break ;
596
+ }
597
+ } else if output_index + length + 15 <= output. len ( ) {
598
+ let start = output_index - dist;
599
+ output. copy_within ( start..start + 16 , output_index) ;
600
+
601
+ if length > 16 || dist < 16 {
602
+ for i in ( 0 ..length) . step_by ( dist. min ( 16 ) ) . skip ( 1 ) {
603
+ output. copy_within ( start + i..start + i + 16 , output_index + i) ;
604
+ }
605
+ }
606
+ } else {
607
+ if dist < copy_length {
608
+ for i in 0 ..copy_length {
609
+ output[ output_index + i] = output[ output_index + i - dist] ;
610
+ }
611
+ } else {
612
+ output. copy_within (
613
+ output_index - dist..output_index + copy_length - dist,
614
+ output_index,
615
+ )
616
+ }
617
+
618
+ if copy_length < length {
619
+ self . queued_backref = Some ( ( dist, length - copy_length) ) ;
620
+ output_index = output. len ( ) ;
621
+ break ;
622
+ }
623
+ }
624
+ output_index += copy_length;
625
+ }
626
+
627
+ // Careful decoding loop.
628
+ //
629
+ // This loop processes the remaining input when we're too close to the end of the input or
630
+ // output to use the fast loop.
418
631
while let State :: CompressedData = self . state {
419
632
self . fill_buffer ( remaining_input) ;
420
633
if output_index == output. len ( ) {
@@ -426,74 +639,10 @@ impl Decompressor {
426
639
let litlen_code_bits = litlen_entry as u8 ;
427
640
428
641
if litlen_entry & LITERAL_ENTRY != 0 {
429
- // Ultra-fast path: do 3 more consecutive table lookups and bail if any of them need the slow path.
430
- if self . nbits >= 48 {
431
- let litlen_entry2 =
432
- self . compression . litlen_table [ ( bits >> litlen_code_bits & 0xfff ) as usize ] ;
433
- let litlen_code_bits2 = litlen_entry2 as u8 ;
434
- let litlen_entry3 = self . compression . litlen_table
435
- [ ( bits >> ( litlen_code_bits + litlen_code_bits2) & 0xfff ) as usize ] ;
436
- let litlen_code_bits3 = litlen_entry3 as u8 ;
437
- let litlen_entry4 = self . compression . litlen_table [ ( bits
438
- >> ( litlen_code_bits + litlen_code_bits2 + litlen_code_bits3)
439
- & 0xfff )
440
- as usize ] ;
441
- let litlen_code_bits4 = litlen_entry4 as u8 ;
442
- if litlen_entry2 & litlen_entry3 & litlen_entry4 & LITERAL_ENTRY != 0 {
443
- let advance_output_bytes = ( ( litlen_entry & 0xf00 ) >> 8 ) as usize ;
444
- let advance_output_bytes2 = ( ( litlen_entry2 & 0xf00 ) >> 8 ) as usize ;
445
- let advance_output_bytes3 = ( ( litlen_entry3 & 0xf00 ) >> 8 ) as usize ;
446
- let advance_output_bytes4 = ( ( litlen_entry4 & 0xf00 ) >> 8 ) as usize ;
447
- if output_index
448
- + advance_output_bytes
449
- + advance_output_bytes2
450
- + advance_output_bytes3
451
- + advance_output_bytes4
452
- < output. len ( )
453
- {
454
- self . consume_bits (
455
- litlen_code_bits
456
- + litlen_code_bits2
457
- + litlen_code_bits3
458
- + litlen_code_bits4,
459
- ) ;
460
-
461
- output[ output_index] = ( litlen_entry >> 16 ) as u8 ;
462
- output[ output_index + 1 ] = ( litlen_entry >> 24 ) as u8 ;
463
- output_index += advance_output_bytes;
464
- output[ output_index] = ( litlen_entry2 >> 16 ) as u8 ;
465
- output[ output_index + 1 ] = ( litlen_entry2 >> 24 ) as u8 ;
466
- output_index += advance_output_bytes2;
467
- output[ output_index] = ( litlen_entry3 >> 16 ) as u8 ;
468
- output[ output_index + 1 ] = ( litlen_entry3 >> 24 ) as u8 ;
469
- output_index += advance_output_bytes3;
470
- output[ output_index] = ( litlen_entry4 >> 16 ) as u8 ;
471
- output[ output_index + 1 ] = ( litlen_entry4 >> 24 ) as u8 ;
472
- output_index += advance_output_bytes4;
473
- continue ;
474
- }
475
- }
476
- }
477
-
478
642
// Fast path: the next symbol is <= 12 bits and a literal, the table specifies the
479
643
// output bytes and we can directly write them to the output buffer.
480
644
let advance_output_bytes = ( ( litlen_entry & 0xf00 ) >> 8 ) as usize ;
481
645
482
- // match advance_output_bytes {
483
- // 1 => println!("[{output_index}] LIT1 {}", litlen_entry >> 16),
484
- // 2 => println!(
485
- // "[{output_index}] LIT2 {} {} {}",
486
- // (litlen_entry >> 16) as u8,
487
- // litlen_entry >> 24,
488
- // bits & 0xfff
489
- // ),
490
- // n => println!(
491
- // "[{output_index}] LIT{n} {} {}",
492
- // (litlen_entry >> 16) as u8,
493
- // litlen_entry >> 24,
494
- // ),
495
- // }
496
-
497
646
if self . nbits < litlen_code_bits {
498
647
break ;
499
648
} else if output_index + 1 < output. len ( ) {
@@ -536,14 +685,11 @@ impl Decompressor {
536
685
if self . nbits < litlen_code_bits {
537
686
break ;
538
687
} else if litlen_symbol < 256 {
539
- // println!("[{output_index}] LIT1b {} (val={:04x})", litlen_symbol, self.peak_bits(15));
540
-
541
688
self . consume_bits ( litlen_code_bits) ;
542
689
output[ output_index] = litlen_symbol as u8 ;
543
690
output_index += 1 ;
544
691
continue ;
545
692
} else if litlen_symbol == 256 {
546
- // println!("[{output_index}] EOF");
547
693
self . consume_bits ( litlen_code_bits) ;
548
694
self . state = match self . last_block {
549
695
true => State :: Checksum ,
@@ -563,7 +709,6 @@ impl Decompressor {
563
709
if self . nbits < litlen_code_bits {
564
710
break ;
565
711
}
566
- // println!("[{output_index}] EOF");
567
712
self . consume_bits ( litlen_code_bits) ;
568
713
self . state = match self . last_block {
569
714
true => State :: Checksum ,
@@ -618,7 +763,6 @@ impl Decompressor {
618
763
return Err ( DecompressionError :: DistanceTooFarBack ) ;
619
764
}
620
765
621
- // println!("[{output_index}] BACKREF len={} dist={} {:x}", length, dist, dist_entry);
622
766
self . consume_bits ( total_bits) ;
623
767
624
768
let copy_length = length. min ( output. len ( ) - output_index) ;
0 commit comments