Skip to content

Commit

Permalink
change chat template
Browse files Browse the repository at this point in the history
  • Loading branch information
lvhan028 committed Sep 15, 2023
1 parent 2dec28a commit d1c4e14
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
8 changes: 4 additions & 4 deletions lmdeploy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_prompt(self, prompt, sequence_start=True):

@abstractmethod
def decorate_prompt(self, prompt, sequence_start):
pass
return prompt

@staticmethod
def _translate_messages(messages: List):
Expand Down Expand Up @@ -176,8 +176,8 @@ class InternLMChat7B(BaseModel):
def __init__(self,
system='',
user='<|User|>',
eoh='<eoh>',
eoa='<eoa>',
eoh='',
eoa='',
assistant='<|Bot|>',
**kwargs):
super().__init__(**kwargs)
Expand Down Expand Up @@ -231,7 +231,7 @@ def messages2prompt(self, messages, sequence_start=True):
@property
def stop_words(self):
"""Return the stop-words' token ids."""
return [103027, 103028]
return [103028]


@MODELS.register_module(name='internlm-chat-7b-8k')
Expand Down
7 changes: 4 additions & 3 deletions lmdeploy/turbomind/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class HuggingFaceTokenizer:

def __init__(self, model_dir: str):
from transformers import (AutoTokenizer, CodeLlamaTokenizerFast,
LlamaTokenizerFast)
LlamaTokenizer, LlamaTokenizerFast)
model_file = osp.join(model_dir, 'tokenizer.model')
backend_tokenizer_file = osp.join(model_dir, 'tokenizer.json')
model_file_exists = osp.exists(model_file)
Expand All @@ -121,8 +121,9 @@ def __init__(self, model_dir: str):
'It may take long time to initialize the tokenizer.')
self.model = AutoTokenizer.from_pretrained(model_dir,
trust_remote_code=True)
self.need_padding = isinstance(self.model, LlamaTokenizerFast) \
or isinstance(self.model, CodeLlamaTokenizerFast)
self.need_padding = type(self.model) in [
LlamaTokenizer, LlamaTokenizerFast, CodeLlamaTokenizerFast
]
self._no_prefix_space_tokens = None
# save tokenizer.json to reuse
if not osp.exists(backend_tokenizer_file) and model_file_exists:
Expand Down

0 comments on commit d1c4e14

Please sign in to comment.