diff --git a/lmdeploy/model.py b/lmdeploy/model.py index 7abbb6295..311ab2eef 100644 --- a/lmdeploy/model.py +++ b/lmdeploy/model.py @@ -156,7 +156,7 @@ class Llama2(BaseModel): """Chat template of LLaMA2 model.""" def __init__(self): - + super().__init__() B_INST, E_INST = '[INST]', '[/INST]' B_SYS, E_SYS = '<>\n', '\n<>\n\n' diff --git a/lmdeploy/serve/turbomind/chatbot.py b/lmdeploy/serve/turbomind/chatbot.py index d44dfb4ba..e84157eb0 100644 --- a/lmdeploy/serve/turbomind/chatbot.py +++ b/lmdeploy/serve/turbomind/chatbot.py @@ -547,7 +547,6 @@ def stream_consumer(postprocess, res_queue, session, n_input_token, except Exception as e: logger.error(f'catch exception: {e}') - session.response = session.response[len(session.prompt):] # put session back to queue so that `_stream_infer` can update it in # `self.sessions` while not res_queue.empty():