@@ -426,11 +426,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
426
426
os .mkdir (save_directory )
427
427
weight_dir = os .path .join (save_directory , "model_weights" )
428
428
os .mkdir (weight_dir )
429
- use_level_zero = os .environ .get ("IPEX_LLM_NPU_USE_LEVEL0" , "0" ) == "1"
430
429
layernorm_const = os .environ .get ("IPEX_LLM_NPU_LAYERNORM_CONST" , "1" ) == "1"
431
430
432
431
if model .config .model_type == "qwen2" :
433
- layernorm_const = os .environ .get ("IPEX_LLM_NPU_LAYERNORM_CONST" , "0" ) == "1"
434
432
if model .config .hidden_size == 1536 :
435
433
# Qwen2-1.5B-Instruct
436
434
fused_layers = 1
@@ -449,28 +447,16 @@ def convert_llm_for_deploy(model: torch.nn.Module,
449
447
"weight_num" : 7 ,
450
448
"weight_idx" : 8 ,
451
449
"n_splits_linear" : n_splits_linear ,
452
- "n_splits_down_proj" : n_splits_down_proj ,
453
- "use_level_zero" : use_level_zero }
450
+ "n_splits_down_proj" : n_splits_down_proj }
454
451
model .config .update (update_dict )
455
452
model .config .save_pretrained (save_directory )
456
453
457
454
from .qwen import convert_qwen_layer , convert_fused_qwen_layer
458
455
from .qwen import convert_lm_head_and_embedding
459
- if not use_level_zero :
460
- # save fused_layers blobs of fused decoder layers
461
- convert_fused_qwen_layer (model , fused_layers , n_splits_linear , n_splits_down_proj ,
462
- save_directory , weight_dir , transpose_value_cache , kv_len ,
463
- group_size , layernorm_const , "decode" )
464
- else :
465
- # save layer_num blobs of each decoder layer
466
- layer_num = len (model .model .layers )
467
- param_list = []
468
- for layer_idx in range (0 , layer_num ):
469
- param_list .append ((model , layer_idx , n_splits_linear , n_splits_down_proj ,
470
- save_directory , weight_dir , transpose_value_cache , kv_len ,
471
- group_size , layernorm_const ))
472
- with Pool () as pool :
473
- result = pool .starmap (convert_qwen_layer , param_list )
456
+ # save fused_layers blobs of fused decoder layers
457
+ convert_fused_qwen_layer (model , fused_layers , n_splits_linear , n_splits_down_proj ,
458
+ save_directory , weight_dir , transpose_value_cache , kv_len ,
459
+ group_size , layernorm_const , "decode" )
474
460
# save blob of single prefill layer
475
461
convert_qwen_layer (model , 0 , n_splits_linear , n_splits_down_proj ,
476
462
save_directory , weight_dir , transpose_value_cache , max_prompt_len ,
@@ -512,8 +498,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
512
498
"embedding_post" : embedding_post ,
513
499
"cos_sin_input" : cos_sin_input ,
514
500
"n_splits_linear" : n_splits_linear ,
515
- "n_splits_down_proj" : n_splits_down_proj ,
516
- "use_level_zero" : use_level_zero }
501
+ "n_splits_down_proj" : n_splits_down_proj }
517
502
model .config .update (update_dict )
518
503
model .config .save_pretrained (save_directory )
519
504
@@ -549,8 +534,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
549
534
"model_type" : "minicpm" ,
550
535
"embedding_post" : True ,
551
536
"n_splits_linear" : n_splits_linear ,
552
- "n_splits_down_proj" : n_splits_down_proj ,
553
- "use_level_zero" : use_level_zero }
537
+ "n_splits_down_proj" : n_splits_down_proj }
554
538
model .config .update (update_dict )
555
539
model .config .save_pretrained (save_directory )
556
540
0 commit comments