Skip to content

Commit

Permalink
A new approach to template files!
Browse files Browse the repository at this point in the history
  • Loading branch information
Rocketknight1 committed Mar 6, 2025
1 parent bcaf117 commit 199d69f
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/transformers/tokenization_utils_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
)
from .utils.chat_template_utils import _compile_jinja_template, _render_with_assistant_indices
from .utils.import_utils import PROTOBUF_IMPORT_ERROR
from huggingface_hub import list_repo_tree


if TYPE_CHECKING:
Expand Down Expand Up @@ -1966,6 +1967,18 @@ def from_pretrained(
"tokenizer_file": FULL_TOKENIZER_FILE,
"chat_template_file": CHAT_TEMPLATE_FILE,
}
if is_local:
template_dir = Path(pretrained_model_name_or_path, "templates")
if template_dir.is_dir():
for template_file in template_dir.glob("*.jinja"):
template_name = template_file.name.removesuffix(".jinja")
additional_files_names[f"{template_name}_template"] = f"templates/{template_file.name}"
else:
for template_file in list_repo_tree(pretrained_model_name_or_path, path_in_repo="templates", recursive=False):
if not template_file.endswith(".jinja"):
continue
template_name = template_file.split('/')[-1].removesuffix(".jinja")
additional_files_names[f"{template_name}_template"] = template_file # This might be wrong!
vocab_files = {**cls.vocab_files_names, **additional_files_names}
if "tokenizer_file" in vocab_files:
# Try to get the tokenizer config to see if there are versioned tokenizer files.
Expand Down

0 comments on commit 199d69f

Please sign in to comment.