From b89171da2c3857f9a9d3888bf3029d4b4e655a0b Mon Sep 17 00:00:00 2001 From: HUANYU XU <147359634+Huanshere@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:06:54 +0800 Subject: [PATCH] update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit translate步骤加入了重试,避免短句合并影响后续对齐 --- README.md | 4 +++- config.example.py | 2 +- core/translate_once.py | 17 +++++++++++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 678a67c2..e5e1ba6c 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,9 @@ https://github.com/user-attachments/assets/0f5d5878-bfa5-41e4-ade1-d2b81d925a7d - 音频长度:目前仅支持30分钟以内的视频,我们计划很快扩展这一限制。 -- 输入语言支持: +- 翻译可能偶发无法对齐:对于非英文视频,可能偶尔会存在无法对齐的bug,这是由claude遵循指令能力决定的。 + +- 输入语言支持(whisperX对部分语言产出的时间轴和标点不稳定): | 输入语言 | 支持程度 | 示例视频 | |---------|---------|---------| diff --git a/config.example.py b/config.example.py index 8466e8dc..5a5544c2 100644 --- a/config.example.py +++ b/config.example.py @@ -26,7 +26,7 @@ MAX_SUB_LENGTH = 80 # 输出字幕字号更大一些 # Increase the font size of the output subtitles -TARGET_SUB_MULTIPLIER = 1.5 +TARGET_SUB_MULTIPLIER = 1.2 # 视频分辨率 # Video resolution diff --git a/core/translate_once.py b/core/translate_once.py index 3e3ee12a..82781d45 100644 --- a/core/translate_once.py +++ b/core/translate_once.py @@ -8,19 +8,32 @@ def translate_lines(lines, previous_content_prompt, after_cotent_prompt, things_ shared_prompt = generate_shared_prompt(previous_content_prompt, after_cotent_prompt, summary_prompt, things_to_note_prompt) + # Retry translation if the length of the original text and the translated text are not the same, this sometimes happens + def retry_translation(prompt, model, step_name): + for retry in range(3): + result = ask_gpt(prompt + retry*" ", model=model, response_json=True, valid_key='1', log_title=f'translate_{step_name}') + if len(lines.split('\n')) == len(result): + return result + if retry != 2: + print(f'⚠️ {step_name.capitalize()} translation of block {index} failed, Length Mismatch, Retry...') + raise ValueError(f'❌ {step_name.capitalize()} translation of block {index} failed, Length Mismatch, Retry 3 times, Please check your input text.') + ## Step 1: Faithful to the Original Text prompt1 = get_prompt_faithfulness(lines, shared_prompt) - faith_result = ask_gpt(prompt1, model=step4_2_translate_direct_model, response_json=True, valid_key='1', log_title='translate_faithfulness') + faith_result = retry_translation(prompt1, step4_2_translate_direct_model, 'faithfulness') + for i in faith_result: print(f'📄 Original Subtitle: {faith_result[i]["Original Subtitle"]}') print(f'📚 Direct Translation: {faith_result[i]["Direct Translation"]}') ## Step 2: Express Smoothly prompt2 = get_prompt_expressiveness(faith_result, lines, shared_prompt) - express_result = ask_gpt(prompt2, model=step4_2_translate_free_model, response_json=True, valid_key='1', log_title='translate_expressiveness') + express_result = retry_translation(prompt2, step4_2_translate_free_model, 'expressiveness') + for i in express_result: print(f'📄 Original Subtitle: {express_result[i]["Original Subtitle"]}') print(f'🧠 Free Translation: {express_result[i]["Free Translation"]}') + translate_result = "\n".join([express_result[i]["Free Translation"].strip() for i in express_result]) if len(lines.split('\n')) != len(translate_result.split('\n')):