diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py index 93bcd8cde76..3cf517b397d 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py @@ -165,7 +165,7 @@ def guessed(model: 'LazyModel') -> 'Params': n_mult=256, n_head=n_embd // 128, n_head_kv=n_embd // 128, - f_norm_eps=1e-5, + rms_norm_eps=1e-5, n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model), ) @@ -203,7 +203,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: Path) -> 'Params': ) # LLaMA v2 70B params.json - # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, + # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, # "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1} @staticmethod def loadOriginalParamsJson(model: 'LazyModel', config_path: Path) -> 'Params': @@ -230,8 +230,8 @@ def loadOriginalParamsJson(model: 'LazyModel', config_path: Path) -> 'Params': n_head=n_head, n_head_kv=n_head_kv, ffn_hidden_size=ffn_hidden_size, - bos_token_id = bos_token_id, - eos_token_id = eos_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, ) @staticmethod @@ -278,7 +278,7 @@ def __init__(self, fname_tokenizer: Path, params_vocab_size: int, fname_added_to def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]: tokenizer = self.sentencepiece_tokenizer for i in range(self.params_vocab_size): - text: bytes + text: bytes if i < tokenizer.vocab_size(): if tokenizer.is_unknown(i): text = " \u2047 ".encode("utf-8") @@ -1086,7 +1086,7 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None: self.fout.write(struct.pack("f", params.rope_theta)) self.fout.write(struct.pack("f", params.rope_scale)) - # TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json + # TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json # but bos_token_id = 1 in llama.cpp self.fout.write(struct.pack("i", params.bos_token_id)) self.fout.write(struct.pack("i", params.eos_token_id)) @@ -1108,10 +1108,9 @@ def write_vocab(self, vocab: Vocab) -> None: @staticmethod def write_vocab_only(fname_out: Path, vocab: Vocab) -> None: + params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0) of = OutputFile(fname_out) - params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, file_type=NEFileType.AllF32) - of = OutputFile(fname_out) - of.write_file_header(params) + of.write_file_header(params, file_type=NEFileType.AllF32) of.write_vocab(vocab) of.fout.close() diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py index 76f6e4a6ea6..8bdefe4b714 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py @@ -164,7 +164,7 @@ def guessed(model: 'LazyModel') -> 'Params': n_mult=256, n_head=n_embd // 128, n_head_kv=n_embd // 128, - f_norm_eps=1e-5, + rms_norm_eps=1e-5, n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model), ) @@ -1088,10 +1088,9 @@ def write_vocab(self, vocab: Vocab) -> None: @staticmethod def write_vocab_only(fname_out: Path, vocab: Vocab) -> None: + params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0) of = OutputFile(fname_out) - params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, file_type=NEFileType.AllF32) - of = OutputFile(fname_out) - of.write_file_header(params) + of.write_file_header(params, file_type=NEFileType.AllF32) of.write_vocab(vocab) of.fout.close()