Skip to content

Commit

Permalink
revert test code
Browse files Browse the repository at this point in the history
  • Loading branch information
rnwang04 committed Dec 5, 2024
1 parent 4e56e79 commit 558e101
Showing 1 changed file with 23 additions and 23 deletions.
46 changes: 23 additions & 23 deletions python/llm/src/ipex_llm/transformers/npu_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,29 +284,29 @@ def optimize_npu_model(cls, *args, **kwargs):
model.share_memory()

if not pipeline:
# if (not hasattr(model, 'llm') and
# model.config.model_type in ["qwen2", "llama", "minicpm"]):
# from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
# optimize_llm_single_process(
# llm,
# kv_len=max_context_len,
# max_prompt_len=max_prompt_len,
# transpose_value_cache=transpose_value_cache,
# group_size=quantization_group_size,
# qtype=qtype,
# save_directory=save_directory,
# fuse_layers=fuse_layers
# )
# else:
optimize_llm(
llm,
max_context_len=max_context_len,
max_prompt_len=max_prompt_len,
inter_pp=inter_pp,
intra_pp=intra_pp,
transpose_value_cache=transpose_value_cache,
group_size=quantization_group_size
)
if (not hasattr(model, 'llm') and
model.config.model_type in ["qwen2", "llama", "minicpm"]):
from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
optimize_llm_single_process(
llm,
kv_len=max_context_len,
max_prompt_len=max_prompt_len,
transpose_value_cache=transpose_value_cache,
group_size=quantization_group_size,
qtype=qtype,
save_directory=save_directory,
fuse_layers=fuse_layers
)
else:
optimize_llm(
llm,
max_context_len=max_context_len,
max_prompt_len=max_prompt_len,
inter_pp=inter_pp,
intra_pp=intra_pp,
transpose_value_cache=transpose_value_cache,
group_size=quantization_group_size
)
else:
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
import convert_llm
Expand Down

0 comments on commit 558e101

Please sign in to comment.