Skip to content

Commit

Permalink
add down proj back for q4_0
Browse files Browse the repository at this point in the history
  • Loading branch information
rnwang04 committed Dec 5, 2024
1 parent d8b0ced commit 8948a43
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions python/llm/src/ipex_llm/transformers/npu_models/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ def replace_with_QuantizedLinear(layer, qtype, device, modules_to_not_convert,
if (layer.in_features == 3584 and layer.out_features == 152064):
qtype = "sym_int8_rtn"
iqtype = ggml_tensor_qtype[qtype]
if qtype == "sym_int4_rtn":
if (layer.in_features == 18944 and layer.out_features == 3584):
qtype = "sym_int8_rtn"
iqtype = ggml_tensor_qtype[qtype]
enable_scale_search = os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0"
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
iqtype, device=device,
Expand Down

0 comments on commit 8948a43

Please sign in to comment.