Skip to content

Commit b29da30

Browse files
authored
[NPU] Update C++ L0 (#12458)
* update * fix style
1 parent a2272b7 commit b29da30

File tree

1 file changed

+7
-23
lines changed

1 file changed

+7
-23
lines changed

python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -426,11 +426,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
426426
os.mkdir(save_directory)
427427
weight_dir = os.path.join(save_directory, "model_weights")
428428
os.mkdir(weight_dir)
429-
use_level_zero = os.environ.get("IPEX_LLM_NPU_USE_LEVEL0", "0") == "1"
430429
layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
431430

432431
if model.config.model_type == "qwen2":
433-
layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "0") == "1"
434432
if model.config.hidden_size == 1536:
435433
# Qwen2-1.5B-Instruct
436434
fused_layers = 1
@@ -449,28 +447,16 @@ def convert_llm_for_deploy(model: torch.nn.Module,
449447
"weight_num": 7,
450448
"weight_idx": 8,
451449
"n_splits_linear": n_splits_linear,
452-
"n_splits_down_proj": n_splits_down_proj,
453-
"use_level_zero": use_level_zero}
450+
"n_splits_down_proj": n_splits_down_proj}
454451
model.config.update(update_dict)
455452
model.config.save_pretrained(save_directory)
456453

457454
from .qwen import convert_qwen_layer, convert_fused_qwen_layer
458455
from .qwen import convert_lm_head_and_embedding
459-
if not use_level_zero:
460-
# save fused_layers blobs of fused decoder layers
461-
convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
462-
save_directory, weight_dir, transpose_value_cache, kv_len,
463-
group_size, layernorm_const, "decode")
464-
else:
465-
# save layer_num blobs of each decoder layer
466-
layer_num = len(model.model.layers)
467-
param_list = []
468-
for layer_idx in range(0, layer_num):
469-
param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
470-
save_directory, weight_dir, transpose_value_cache, kv_len,
471-
group_size, layernorm_const))
472-
with Pool() as pool:
473-
result = pool.starmap(convert_qwen_layer, param_list)
456+
# save fused_layers blobs of fused decoder layers
457+
convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
458+
save_directory, weight_dir, transpose_value_cache, kv_len,
459+
group_size, layernorm_const, "decode")
474460
# save blob of single prefill layer
475461
convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
476462
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
@@ -512,8 +498,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
512498
"embedding_post": embedding_post,
513499
"cos_sin_input": cos_sin_input,
514500
"n_splits_linear": n_splits_linear,
515-
"n_splits_down_proj": n_splits_down_proj,
516-
"use_level_zero": use_level_zero}
501+
"n_splits_down_proj": n_splits_down_proj}
517502
model.config.update(update_dict)
518503
model.config.save_pretrained(save_directory)
519504

@@ -549,8 +534,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
549534
"model_type": "minicpm",
550535
"embedding_post": True,
551536
"n_splits_linear": n_splits_linear,
552-
"n_splits_down_proj": n_splits_down_proj,
553-
"use_level_zero": use_level_zero}
537+
"n_splits_down_proj": n_splits_down_proj}
554538
model.config.update(update_dict)
555539
model.config.save_pretrained(save_directory)
556540

0 commit comments

Comments
 (0)