@@ -127,7 +127,8 @@ def minicpm_attention_forward_original(
127
127
use_compresskv = isinstance (past_key_value , DynamicCompressCache )
128
128
129
129
use_fuse_rope = should_use_fuse_rope (self , hidden_states , position_ids )
130
- enough_kv_room = is_enough_kv_cache_room_4_36 (past_key_value , self .layer_idx , seq_len = q_len )
130
+ enough_kv_room = is_enough_kv_cache_room_4_36 (past_key_value , self .layer_idx ,
131
+ seq_len = q_len )
131
132
no_tp = not self .config .pretraining_tp > 1
132
133
decoding_fast_path = use_decoding_fast_path (self .q_proj ,
133
134
use_fuse_rope ,
@@ -408,7 +409,8 @@ def minicpm_attention_forward_quantized(
408
409
bsz , q_len , _ = hidden_states .size ()
409
410
device = hidden_states .device
410
411
use_fuse_rope = should_use_fuse_rope (self , hidden_states , position_ids )
411
- enough_kv_room = is_enough_kv_cache_room_4_36 (past_key_value , self .layer_idx , seq_len = q_len )
412
+ enough_kv_room = is_enough_kv_cache_room_4_36 (past_key_value , self .layer_idx ,
413
+ seq_len = q_len )
412
414
no_tp = not self .config .pretraining_tp > 1
413
415
decoding_fast_path = use_decoding_fast_path (self .q_proj ,
414
416
use_fuse_rope ,
@@ -821,7 +823,8 @@ def minicpm_attention_forward_original_4_39(
821
823
use_compresskv = isinstance (past_key_value , DynamicCompressCache )
822
824
823
825
use_fuse_rope = should_use_fuse_rope (self , hidden_states , position_ids )
824
- enough_kv_room = is_enough_kv_cache_room_4_36 (past_key_value , self .layer_idx , seq_len = q_len )
826
+ enough_kv_room = is_enough_kv_cache_room_4_36 (past_key_value , self .layer_idx ,
827
+ seq_len = q_len )
825
828
no_tp = not self .config .pretraining_tp > 1
826
829
decoding_fast_path = use_decoding_fast_path (self .q_proj ,
827
830
use_fuse_rope ,
0 commit comments