Skip to content

Commit c5fef1a

Browse files
committed
Fix NPU LLM example save/load tokenizer
1 parent ab01753 commit c5fef1a

File tree

6 files changed

+18
-6
lines changed

6 files changed

+18
-6
lines changed

python/llm/example/NPU/HF-Transformers-AutoModels/LLM/baichuan2.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
7979
transpose_value_cache=not args.disable_transpose_value_cache,
8080
save_directory=args.save_directory
8181
)
82+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
83+
tokenizer.save_pretrained(args.save_directory)
8284
else:
8385
model = AutoModelForCausalLM.load_low_bit(
8486
args.save_directory,
@@ -90,8 +92,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
9092
transpose_value_cache=not args.disable_transpose_value_cache,
9193
trust_remote_code=True,
9294
)
95+
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
9396

94-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
9597

9698
DEFAULT_SYSTEM_PROMPT = """\
9799
"""

python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
args = parser.parse_args()
4444
model_path = args.repo_id_or_model_path
4545

46-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
4746

4847
if not args.lowbit_path or not os.path.exists(args.lowbit_path):
4948
model = AutoModelForCausalLM.from_pretrained(
@@ -52,13 +51,16 @@
5251
load_in_low_bit=args.load_in_low_bit,
5352
attn_implementation="eager"
5453
)
54+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
55+
tokenizer.save_pretrained(args.lowbit_path)
5556
else:
5657
model = AutoModelForCausalLM.load_low_bit(
5758
args.lowbit_path,
5859
trust_remote_code=True,
5960
bigdl_transformers_low_bit=args.load_in_low_bit,
6061
attn_implementation="eager"
6162
)
63+
tokenizer = AutoTokenizer.from_pretrained(args.lowbit_path, trust_remote_code=True)
6264

6365
print(model)
6466

python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama2.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
7979
transpose_value_cache=not args.disable_transpose_value_cache,
8080
save_directory=args.save_directory
8181
)
82+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
83+
tokenizer.save_pretrained(args.save_directory)
8284
else:
8385
model = AutoModelForCausalLM.load_low_bit(
8486
args.save_directory,
@@ -89,8 +91,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
8991
max_prompt_len=args.max_prompt_len,
9092
transpose_value_cache=not args.disable_transpose_value_cache,
9193
)
94+
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
9295

93-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
9496

9597
DEFAULT_SYSTEM_PROMPT = """\
9698
"""

python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama3.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
8080
transpose_value_cache=not args.disable_transpose_value_cache,
8181
save_directory=args.save_directory
8282
)
83+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
84+
tokenizer.save_pretrained(args.save_directory)
8385
else:
8486
model = AutoModelForCausalLM.load_low_bit(
8587
args.save_directory,
@@ -90,8 +92,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
9092
max_prompt_len=args.max_prompt_len,
9193
transpose_value_cache=not args.disable_transpose_value_cache,
9294
)
95+
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
9396

94-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
9597

9698
DEFAULT_SYSTEM_PROMPT = """\
9799
"""

python/llm/example/NPU/HF-Transformers-AutoModels/LLM/minicpm.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
transpose_value_cache=not args.disable_transpose_value_cache,
6666
save_directory=args.save_directory
6767
)
68+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
69+
tokenizer.save_pretrained(args.save_directory)
6870
else:
6971
model = AutoModelForCausalLM.load_low_bit(
7072
args.save_directory,
@@ -76,7 +78,7 @@
7678
transpose_value_cache=not args.disable_transpose_value_cache,
7779
trust_remote_code=True,
7880
)
79-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
81+
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
8082

8183
print("-" * 80)
8284
print("done")

python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171
quantization_group_size=args.quantization_group_size,
7272
save_directory=args.save_directory
7373
)
74+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
75+
tokenizer.save_pretrained(args.save_directory)
7476
else:
7577
model = AutoModelForCausalLM.load_low_bit(
7678
args.save_directory,
@@ -81,8 +83,8 @@
8183
max_prompt_len=args.max_prompt_len,
8284
transpose_value_cache=not args.disable_transpose_value_cache,
8385
)
86+
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
8487

85-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
8688

8789
print("-" * 80)
8890
print("done")

0 commit comments

Comments
 (0)