From b89171da2c3857f9a9d3888bf3029d4b4e655a0b Mon Sep 17 00:00:00 2001
From: HUANYU XU <147359634+Huanshere@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:06:54 +0800
Subject: [PATCH] update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

translate步骤加入了重试，避免短句合并影响后续对齐
---
 README.md              |  4 +++-
 config.example.py      |  2 +-
 core/translate_once.py | 17 +++++++++++++++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 678a67c2..e5e1ba6c 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,9 @@ https://github.com/user-attachments/assets/0f5d5878-bfa5-41e4-ade1-d2b81d925a7d
 
 - 音频长度：目前仅支持30分钟以内的视频，我们计划很快扩展这一限制。
 
-- 输入语言支持：
+- 翻译可能偶发无法对齐：对于非英文视频，可能偶尔会存在无法对齐的bug，这是由claude遵循指令能力决定的。
+
+- 输入语言支持（whisperX对部分语言产出的时间轴和标点不稳定）：
 
 | 输入语言 | 支持程度 | 示例视频 |
 |---------|---------|---------|
diff --git a/config.example.py b/config.example.py
index 8466e8dc..5a5544c2 100644
--- a/config.example.py
+++ b/config.example.py
@@ -26,7 +26,7 @@
 MAX_SUB_LENGTH = 80
 # 输出字幕字号更大一些
 # Increase the font size of the output subtitles
-TARGET_SUB_MULTIPLIER = 1.5
+TARGET_SUB_MULTIPLIER = 1.2
 
 # 视频分辨率
 # Video resolution
diff --git a/core/translate_once.py b/core/translate_once.py
index 3e3ee12a..82781d45 100644
--- a/core/translate_once.py
+++ b/core/translate_once.py
@@ -8,19 +8,32 @@ def translate_lines(lines, previous_content_prompt, after_cotent_prompt, things_
     
     shared_prompt = generate_shared_prompt(previous_content_prompt, after_cotent_prompt, summary_prompt, things_to_note_prompt)
 
+    # Retry translation if the length of the original text and the translated text are not the same, this sometimes happens
+    def retry_translation(prompt, model, step_name):
+        for retry in range(3):
+            result = ask_gpt(prompt + retry*" ", model=model, response_json=True, valid_key='1', log_title=f'translate_{step_name}')
+            if len(lines.split('\n')) == len(result):
+                return result
+            if retry != 2:
+                print(f'⚠️ {step_name.capitalize()} translation of block {index} failed, Length Mismatch, Retry...')
+        raise ValueError(f'❌ {step_name.capitalize()} translation of block {index} failed, Length Mismatch, Retry 3 times, Please check your input text.')
+
     ## Step 1: Faithful to the Original Text
     prompt1 = get_prompt_faithfulness(lines, shared_prompt)
-    faith_result = ask_gpt(prompt1, model=step4_2_translate_direct_model, response_json=True, valid_key='1', log_title='translate_faithfulness')
+    faith_result = retry_translation(prompt1, step4_2_translate_direct_model, 'faithfulness')
+    
     for i in faith_result:
         print(f'📄 Original Subtitle:   {faith_result[i]["Original Subtitle"]}')
         print(f'📚 Direct Translation:  {faith_result[i]["Direct Translation"]}')
 
     ## Step 2: Express Smoothly
     prompt2 = get_prompt_expressiveness(faith_result, lines, shared_prompt)
-    express_result =  ask_gpt(prompt2, model=step4_2_translate_free_model, response_json=True, valid_key='1', log_title='translate_expressiveness') 
+    express_result = retry_translation(prompt2, step4_2_translate_free_model, 'expressiveness')
+    
     for i in express_result:
         print(f'📄 Original Subtitle:   {express_result[i]["Original Subtitle"]}')
         print(f'🧠 Free Translation:    {express_result[i]["Free Translation"]}')
+    
     translate_result = "\n".join([express_result[i]["Free Translation"].strip() for i in express_result])
 
     if len(lines.split('\n')) != len(translate_result.split('\n')):