From c5fef1ac2fc32053a6aa224b13f7ac629866b83b Mon Sep 17 00:00:00 2001 From: jinbridge <2635480475@qq.com> Date: Tue, 3 Dec 2024 16:10:54 +0800 Subject: [PATCH] Fix NPU LLM example save/load tokenizer --- .../example/NPU/HF-Transformers-AutoModels/LLM/baichuan2.py | 4 +++- .../example/NPU/HF-Transformers-AutoModels/LLM/generate.py | 4 +++- .../llm/example/NPU/HF-Transformers-AutoModels/LLM/llama2.py | 4 +++- .../llm/example/NPU/HF-Transformers-AutoModels/LLM/llama3.py | 4 +++- .../llm/example/NPU/HF-Transformers-AutoModels/LLM/minicpm.py | 4 +++- python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py | 4 +++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/baichuan2.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/baichuan2.py index 05c47076ede0..cdf26af179bc 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/baichuan2.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/baichuan2.py @@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]], transpose_value_cache=not args.disable_transpose_value_cache, save_directory=args.save_directory ) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer.save_pretrained(args.save_directory) else: model = AutoModelForCausalLM.load_low_bit( args.save_directory, @@ -90,8 +92,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]], transpose_value_cache=not args.disable_transpose_value_cache, trust_remote_code=True, ) + tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) DEFAULT_SYSTEM_PROMPT = """\ """ diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py index 41a14e1a32b5..d3abd13a6e6b 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py @@ -43,7 +43,6 @@ args = parser.parse_args() model_path = args.repo_id_or_model_path - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) if not args.lowbit_path or not os.path.exists(args.lowbit_path): model = AutoModelForCausalLM.from_pretrained( @@ -52,6 +51,8 @@ load_in_low_bit=args.load_in_low_bit, attn_implementation="eager" ) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer.save_pretrained(args.lowbit_path) else: model = AutoModelForCausalLM.load_low_bit( args.lowbit_path, @@ -59,6 +60,7 @@ bigdl_transformers_low_bit=args.load_in_low_bit, attn_implementation="eager" ) + tokenizer = AutoTokenizer.from_pretrained(args.lowbit_path, trust_remote_code=True) print(model) diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama2.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama2.py index 83fe6d899ebf..d981f39f97ed 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama2.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama2.py @@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]], transpose_value_cache=not args.disable_transpose_value_cache, save_directory=args.save_directory ) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer.save_pretrained(args.save_directory) else: model = AutoModelForCausalLM.load_low_bit( args.save_directory, @@ -89,8 +91,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]], max_prompt_len=args.max_prompt_len, transpose_value_cache=not args.disable_transpose_value_cache, ) + tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) DEFAULT_SYSTEM_PROMPT = """\ """ diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama3.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama3.py index 85cca7fd6dbd..35ee49022464 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama3.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/llama3.py @@ -80,6 +80,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]], transpose_value_cache=not args.disable_transpose_value_cache, save_directory=args.save_directory ) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer.save_pretrained(args.save_directory) else: model = AutoModelForCausalLM.load_low_bit( args.save_directory, @@ -90,8 +92,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]], max_prompt_len=args.max_prompt_len, transpose_value_cache=not args.disable_transpose_value_cache, ) + tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) DEFAULT_SYSTEM_PROMPT = """\ """ diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/minicpm.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/minicpm.py index 5ec0bf7289c3..b177042cc2b6 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/minicpm.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/minicpm.py @@ -65,6 +65,8 @@ transpose_value_cache=not args.disable_transpose_value_cache, save_directory=args.save_directory ) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer.save_pretrained(args.save_directory) else: model = AutoModelForCausalLM.load_low_bit( args.save_directory, @@ -76,7 +78,7 @@ transpose_value_cache=not args.disable_transpose_value_cache, trust_remote_code=True, ) - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True) print("-" * 80) print("done") diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py index 9f03c908b969..caf6d1b3e545 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py @@ -71,6 +71,8 @@ quantization_group_size=args.quantization_group_size, save_directory=args.save_directory ) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer.save_pretrained(args.save_directory) else: model = AutoModelForCausalLM.load_low_bit( args.save_directory, @@ -81,8 +83,8 @@ max_prompt_len=args.max_prompt_len, transpose_value_cache=not args.disable_transpose_value_cache, ) + tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) print("-" * 80) print("done")