diff --git a/llmware/model_configs.py b/llmware/model_configs.py index 765b3baf..be51a683 100644 --- a/llmware/model_configs.py +++ b/llmware/model_configs.py @@ -1,4 +1,4 @@ -# Copyright 2023-2024 llmware +# Copyright 2023-2025 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You @@ -3044,6 +3044,12 @@ "phi_3": {"system_start": "<|system|>\n", "system_stop": "<|end|>\n", "main_start": "<|user|>\n", "main_stop": "<|end|>\n", "start_llm_response": "<|assistant|>"}, + "phi_4": {"system_start": "<|im_start|>system<|im_sep|>\n", + "system_stop": "<|im_end|>\n", + "main_start": "<|im_start|>user<|im_sep|>\n", + "main_stop": "<|im_end|>\n", + "start_llm_response": "<|im_start|>assistant<|im_sep|>"}, + "llama_3_chat": {"system_start": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "system_stop": "<|eot_id|>", "main_start": "<|start_header_id|>user>|end_header_id|>\n", @@ -3069,6 +3075,58 @@ } + +""" Tokenizer EOS/BOS lookup master table """ + +global_tokenizer_bos_eos_lookup = { + + "tokenizer_phi3.json": {"bos_id": 1, "bos_token": "", + "eos_id": [32000, 32001, 32007], "eos_token": "<|endoftext|>"}, + + # e.g., 100265 = <|im_end|> + "tokenizer_phi4.json": {"bos_id": 100257, "bos_token": "<|endoftext|>", + "eos_id": [100257, 100265], "eos_token": "<|endoftext|>"}, + + "tokenizer_stablelm.json": {"bos_id": 0, "bos_token": "<|endoftext|>", + "eos_id": [0], "eos_token": "<|endoftext|>"}, + + "tokenizer_stablelm_1_6.json": {"bos_id": 100257, "bos_token": "<|endoftext|>", + "eos_id": [100257], "eos_token": "<|endoftext|>"}, + + "tokenizer_tl.json": {"bos_id": 1, "bos_token": "", + "eos_id": [2, 32000], "eos_token": ""}, + + "tokenizer_ll2.json": {"bos_id": 1, "bos_token": "", + "eos_id": [2], "eos_token": ""}, + + "tokenizer_gemma.json": {"bos_id": 2, "bos_token": "", + "eos_id": [1], "eos_token": ""}, + + "tokenizer_ll3.json": {"bos_id": 128000, "bos_token": "<|begin_of_text|>", + "eos_id": [128001, 128008, 128009, 128256], "eos_token": "<|eot_id|>" + }, + + "tokenizer_qw.json": {"bos_id": 151643, "bos_token": "<|endoftext|>", + "eos_id": [151643, 151645], + "eos_token": ["<|im_end|>"]}, + + "tokenizer_phi2.json": {"bos_id": 50256, "bos_token": "<|endoftext|>", + "eos_id": [50256], "eos_token": "<|endoftext|>"}, + + # 01-ai yi tokenizer + "tokenizer_yi.json": {"bos_id": 1, "bos_token": "<|startoftext|>", + "eos_id": [2, 7], "eos_token": "<|endoftext|>"}, + + # Mistral tokenizer + "tokenizer_mistral.json": {"bos_id": 1, "bos_token": "", + "eos_id": [2], "eos_token": ""}, + + "tokenizer_mistral_chat.json": {"bos_id": 1, "bos_token": "", + "eos_id": [2, 32000, 32768], "eos_token": ["", "<|im_end|>"]}, + +} + + """ Global default prompt catalog consists of a set of prebuilt useful prompt instructions across a wide range of models. Unlike prompt_wrappers, which tend to be an attribute of the model, the prompt catalog can be invoked on a 'prompt-by-prompt' basis to drive different behavior from a model. Note: not all models will support diff --git a/llmware/models.py b/llmware/models.py index 1a97b180..fed50166 100755 --- a/llmware/models.py +++ b/llmware/models.py @@ -1,4 +1,4 @@ -# Copyright 2023-2024 llmware +# Copyright 2023-2025 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You @@ -32,7 +32,7 @@ ModelCardNotRegisteredException, GGUFLibNotLoadedException, LLMWareException) from llmware.model_configs import (global_model_repo_catalog_list, global_model_finetuning_prompt_wrappers_lookup, - global_default_prompt_catalog, model_benchmark_data) + global_default_prompt_catalog, model_benchmark_data, global_tokenizer_bos_eos_lookup) from llmware.gguf_configs import * from llmware.gguf_configs import _LlamaModel, _LlamaContext, _LlamaBatch, _LlamaTokenDataArray @@ -104,10 +104,13 @@ class _ModelRegistry: # we are treating these "prompt_wrappers" as core attributes of the model prompt_wrappers = ["alpaca", "human_bot", "chatgpt", "", "open_chat", "hf_chat", "chat_ml", "phi_3", "llama_3_chat","tiny_llama_chat","stablelm_zephyr_chat", "google_gemma_chat", - "vicuna_chat"] + "vicuna_chat", "phi_4"] registered_wrappers = global_model_finetuning_prompt_wrappers_lookup + # new attribute - track bos/eos for common tokenizers + tokenizer_bos_eos_config = global_tokenizer_bos_eos_lookup + # list of specialized function calling tools llm_fx_tools = ["ner", "sentiment", "topics", "ratings", "emotions", "nli", @@ -162,6 +165,11 @@ def get_wrapper_list(cls): """ List current registered wrapper formats """ return cls.registered_wrappers + # new method + @classmethod + def get_tokenizer_bos_eos_lookup(cls): + return cls.tokenizer_bos_eos_config + @classmethod def get_llm_fx_tools_list (cls): """ List of function calling model tools available """ @@ -182,6 +190,25 @@ def add_wrapper(cls, wrapper_name, wrapper_dict): return wrapper_dict + @classmethod + def load_prompt_wrappers_from_file(cls, new_wrapper_registry): + + cls.registered_wrappers = {} + cls.prompt_wrappers = [] + + for key,value in new_wrapper_registry.items(): + if key not in cls.prompt_wrappers: + cls.prompt_wrappers.append(key) + + cls.registered_wrappers.update({key:value}) + + @classmethod + def load_tokenizer_configs_from_file(cls, new_tokenizer_configs): + + cls.tokenizer_bos_eos_config = {} + for key, value in new_tokenizer_configs.items(): + cls.tokenizer_bos_eos_config.update({key:value}) + @classmethod def validate(cls, model_card_dict): @@ -542,6 +569,68 @@ def load_model_registry(self, fp=None, fn="llmware_model_catalog.json"): return 0 + def load_prompt_wrapper_registry(self, fp=None, fn="prompt_wrappers.json"): + + """ Utility method to load updated prompt wrapper registry from json file. Will + remove the current global prompt wrapper registry and replace with updated registry from file. """ + + if not fp: + fp = LLMWareConfig().get_llmware_path() + + prompt_list = json.load(open(os.path.join(fp,fn), "r")) + _ModelRegistry().load_prompt_wrappers_from_file(prompt_list) + + return True + + def save_prompt_wrapper_registry(self, fp=None, fn="prompt_wrappers.json"): + + """ Utility method to export global prompt wrapper list to json file """ + + if not fp: + fp = LLMWareConfig().get_llmware_path() + + prompt_list = _ModelRegistry().get_wrapper_list() + + json_dict = json.dumps(prompt_list, indent=1) + with open(os.path.join(fp, fn), "w", encoding='utf-8') as outfile: + outfile.write(json_dict) + + return True + + def get_tokenizer_bos_eos_configs(self): + + """" Returns the tokenizer bos eos configs for common models. """ + + return _ModelRegistry().get_tokenizer_bos_eos_lookup() + + def save_tokenizer_bos_eos_configs(self, fp=None, fn="tokenizer_bos_eos_configs.json"): + + """ Utility method to export tokenizer bos_eos configs to json file """ + + if not fp: + fp = LLMWareConfig().get_llmware_path() + + tok_configs = _ModelRegistry().get_tokenizer_bos_eos_lookup() + + json_dict = json.dumps(tok_configs, indent=1) + with open(os.path.join(fp, fn), "w", encoding='utf-8') as outfile: + outfile.write(json_dict) + + return True + + def load_tokenizer_bos_eos_configs(self, fp=None, fn="tokenizer_bos_eos_configs.json"): + + """ Utility method to load updated tokenizer bos_eos configs from json file. Will + remove the current tokenizer bos eos configs and replace with updated configs from file. """ + + if not fp: + fp = LLMWareConfig().get_llmware_path() + + tok_config_list = json.load(open(os.path.join(fp, fn), "r")) + _ModelRegistry().load_tokenizer_configs_from_file(tok_config_list) + + return True + def add_model_cards_from_file(self, fp=None, fn="custom_models_manifest.json"): """ Utility method that loads model cards from a single json file and incrementally adds diff --git a/llmware/util.py b/llmware/util.py index 19a23ea1..98666999 100755 --- a/llmware/util.py +++ b/llmware/util.py @@ -1,5 +1,5 @@ -# Copyright 2023-2024 llmware +# Copyright 2023-2025 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You @@ -1641,10 +1641,14 @@ class AgentWriter: -- 'off' - turns off (no action taken) """ - def __init__(self): + def __init__(self, mode=None): # options configured through global LLMWareConfigs - self.mode = LLMWareConfig().get_agent_writer_mode() + if mode: + self.mode = mode + else: + self.mode = LLMWareConfig().get_agent_writer_mode() + self.fp_base = LLMWareConfig().get_llmware_path() self.fn = LLMWareConfig().get_agent_log_file()