Skip to content

Commit

Permalink
update mlp of llama (#11897)
Browse files Browse the repository at this point in the history
* update mlp of llama

* relax threshold of  mlp test

* revert code
  • Loading branch information
rnwang04 authored Aug 22, 2024
1 parent 420ce7d commit 4a61f7d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
10 changes: 10 additions & 0 deletions python/llm/src/ipex_llm/transformers/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,16 @@ def llama_mlp_forward(
)
hidden_states = attn_output.view(x.shape)
return hidden_states
elif x.device.type == "xpu" and not self.training:
import xe_addons
gate = self.gate_proj(x)
up = self.up_proj(x)
xe_addons.mlp_silu_mul_inplaced(gate, up)
out = self.down_proj(gate)
if residual is not None:
return out + residual
else:
return out
else:
a = self.act_fn(self.gate_proj(x))
b = self.up_proj(x)
Expand Down
2 changes: 1 addition & 1 deletion python/llm/test/inference_gpu/test_transformers_api_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def Mistral_7B_Instruct_gpu_model(self, Name, Model, Tokenizer, model_path):
# currently only compare the output of the last mlp layer.
layer_before_MLP = "model.layers.31.post_attention_layernorm"
MLP_layer = "model.layers.31.mlp"
lower_bound = 0
lower_bound = 1e-3
self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, MLP_layer, layer_before_MLP, lower_bound)

def Llama2_7B_gpu_model(self, Name, Model, Tokenizer, model_path):
Expand Down

0 comments on commit 4a61f7d

Please sign in to comment.