From 67abdbb42fdbb59c274130368981c0d0ac3539e3 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 9 Aug 2024 22:51:04 +0800
Subject: [PATCH] [VLM][Doc] Add `stop_token_ids` to InternVL example (#7354)

---
 examples/offline_inference_vision_language.py | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py
index ea607fc2a1e..9a0e9d4bc53 100644
--- a/examples/offline_inference_vision_language.py
+++ b/examples/offline_inference_vision_language.py
@@ -124,16 +124,27 @@ def run_minicpmv(question):
 
 # InternVL
 def run_internvl(question):
-    # Generally, InternVL can use chatml template for conversation
-    TEMPLATE = "<|im_start|>User\n{prompt}<|im_end|>\n<|im_start|>Assistant\n"
-    prompt = f"<image>\n{question}\n"
-    prompt = TEMPLATE.format(prompt=prompt)
+    model_name = "OpenGVLab/InternVL2-2B"
+
     llm = LLM(
-        model="OpenGVLab/InternVL2-4B",
+        model=model_name,
         trust_remote_code=True,
         max_num_seqs=5,
     )
-    stop_token_ids = None
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name,
+                                              trust_remote_code=True)
+    messages = [{'role': 'user', 'content': f"<image>\n{question}"}]
+    prompt = tokenizer.apply_chat_template(messages,
+                                           tokenize=False,
+                                           add_generation_prompt=True)
+
+    # Stop tokens for InternVL
+    # models variants may have different stop tokens
+    # please refer to the model card for the correct "stop words":
+    # https://huggingface.co/OpenGVLab/InternVL2-2B#service
+    stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"]
+    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
     return llm, prompt, stop_token_ids