From 608eb29088afd1b900098d757f4e2e622ba8c426 Mon Sep 17 00:00:00 2001 From: HUANYU XU Date: Mon, 16 Sep 2024 17:54:35 +0800 Subject: [PATCH 1/3] add long video support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 现在可以支持无限长度的视频了 --- config.example.py | 5 +- core/all_whisper_methods/whisperX.py | 62 +++++----- core/all_whisper_methods/whisperXapi.py | 110 ++++++++++++++++-- core/step4_2_translate_all.py | 1 + ...77\347\224\250\350\257\264\346\230\216.md" | 76 ------------ st_components/locales.csv | 6 +- st_components/sidebar_setting.py | 2 +- 7 files changed, 144 insertions(+), 118 deletions(-) delete mode 100644 "docs/\350\266\213\345\212\250\344\272\221\344\275\277\347\224\250\350\257\264\346\230\216.md" diff --git a/config.example.py b/config.example.py index 6b561227..6f17fbfd 100644 --- a/config.example.py +++ b/config.example.py @@ -22,14 +22,14 @@ # Subtitle settings # 每行字幕的最大长度字母数量 # Maximum number of characters per line of subtitle -MAX_SUB_LENGTH = 80 +MAX_SUB_LENGTH = 75 # 输出字幕字号更大一些 # Increase the font size of the output subtitles TARGET_SUB_MULTIPLIER = 1.2 # 视频分辨率 # Video resolution -RESOLUTIOM = '854x480' +RESOLUTIOM = '640x360' # 显示语言 # Display language @@ -101,6 +101,7 @@ "de": "de_core_news_md", "it": "it_core_news_md", + # Not supported # "zh": "zh_core_web_md", diff --git a/core/all_whisper_methods/whisperX.py b/core/all_whisper_methods/whisperX.py index cf2a5249..48dd3a87 100644 --- a/core/all_whisper_methods/whisperX.py +++ b/core/all_whisper_methods/whisperX.py @@ -2,68 +2,76 @@ import sys import whisperx import torch -import pandas as pd -import json from typing import Dict sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from config import MODEL_DIR -from core.all_whisper_methods.whisperXapi import process_transcription, convert_video_to_audio +from core.all_whisper_methods.whisperXapi import ( + process_transcription, convert_video_to_audio, split_audio, + save_results, save_language +) -def transcribe_audio(audio_file: str) -> Dict: +def transcribe_audio(audio_file: str, start: float, end: float) -> Dict: from config import WHISPER_LANGUAGE device = "cuda" if torch.cuda.is_available() else "cpu" batch_size = 16 # TODO Reduce this value if GPU memory is insufficient compute_type = "float16" # TODO Change to "int8" if GPU memory is insufficient (may reduce accuracy) - print(f"🚀 Starting WhisperX... Please wait patiently...") + print(f"🚀 Starting WhisperX for segment {start:.2f}s to {end:.2f}s... Please wait patiently...") try: whisperx_model_dir = os.path.join(MODEL_DIR, "whisperx") model = whisperx.load_model("large-v2", device, compute_type=compute_type, download_root=whisperx_model_dir) + # Load audio segment audio = whisperx.load_audio(audio_file) - result = model.transcribe(audio, batch_size=batch_size, language=(None if WHISPER_LANGUAGE == 'auto' else WHISPER_LANGUAGE)) + audio_segment = audio[int(start * 16000):int(end * 16000)] # Assuming 16kHz sample rate + + result = model.transcribe(audio_segment, batch_size=batch_size, language=(None if WHISPER_LANGUAGE == 'auto' else WHISPER_LANGUAGE)) # Free GPU resources del model torch.cuda.empty_cache() - - # Save language information - save_language(result['language']) # Align whisper output model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device) - result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False) + result = whisperx.align(result["segments"], model_a, metadata, audio_segment, device, return_char_alignments=False) # Free GPU resources again del model_a torch.cuda.empty_cache() + # Adjust timestamps + for segment in result['segments']: + segment['start'] += start + segment['end'] += start + for word in segment['words']: + word['start'] += start + word['end'] += start + return result except Exception as e: raise Exception(f"WhisperX processing error: {e}") -def save_results(df: pd.DataFrame): - os.makedirs('output/log', exist_ok=True) - excel_path = os.path.join('output/log', "cleaned_chunks.xlsx") - df['text'] = df['text'].apply(lambda x: f'"{x}"') - df.to_excel(excel_path, index=False) - print(f"📊 Excel file saved to {excel_path}") - -def save_language(language: str): - os.makedirs('output/log', exist_ok=True) - with open('output/log/transcript_language.json', 'w', encoding='utf-8') as f: - json.dump({"language": language}, f, ensure_ascii=False, indent=4) - def transcribe(video_file: str): if not os.path.exists("output/log/cleaned_chunks.xlsx"): audio_file = convert_video_to_audio(video_file) - if os.path.getsize(audio_file) > 25 * 1024 * 1024: - print("⚠️ File size exceeds 25MB. Please use a smaller file.") - return + segments = split_audio(audio_file) + + all_results = [] + for start, end in segments: + result = transcribe_audio(audio_file, start, end) + all_results.append(result) + + # Combine results + combined_result = { + 'segments': [], + 'language': all_results[0]['language'] + } + for result in all_results: + combined_result['segments'].extend(result['segments']) - result = transcribe_audio(audio_file) + save_language(combined_result['language']) - df = process_transcription(result) + df = process_transcription(combined_result) save_results(df) else: print("📊 Transcription results already exist, skipping transcription step.") diff --git a/core/all_whisper_methods/whisperXapi.py b/core/all_whisper_methods/whisperXapi.py index 64de8d8f..a9491a62 100644 --- a/core/all_whisper_methods/whisperXapi.py +++ b/core/all_whisper_methods/whisperXapi.py @@ -3,7 +3,7 @@ import replicate import pandas as pd import json -from typing import Dict +from typing import Dict, List, Tuple import subprocess import base64 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) @@ -28,6 +28,81 @@ def convert_video_to_audio(input_file: str) -> str: return audio_file +def split_audio(audio_file: str, target_duration: int = 20*60, window: int = 60) -> List[Tuple[float, float]]: + print("🔪 Splitting audio into segments...") + duration = float(subprocess.check_output(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', audio_file]).decode('utf-8').strip()) + + segments = [] + start = 0 + while start < duration: + end = min(start + target_duration + window, duration) + if end - start < target_duration: + segments.append((start, end)) + break + + # Analyze audio in the 2-minute window + window_start = start + target_duration - window + window_end = min(window_start + 2 * window, duration) + + ffmpeg_cmd = [ + 'ffmpeg', + '-i', audio_file, + '-ss', str(window_start), + '-to', str(window_end), + '-af', 'silencedetect=n=-30dB:d=0.5', + '-f', 'null', + '-' + ] + + output = subprocess.run(ffmpeg_cmd, capture_output=True, text=True).stderr + + # Parse silence detection output + silence_end_times = [float(line.split('silence_end: ')[1].split(' ')[0]) for line in output.split('\n') if 'silence_end' in line] + + if silence_end_times: + # Find the first silence after the target duration + split_point = next((t for t in silence_end_times if t > target_duration), None) + if split_point: + segments.append((start, start + split_point)) + start += split_point + continue + + # If no suitable split point found, split at the target duration + segments.append((start, start + target_duration)) + start += target_duration + + print(f"🔪 Split audio into {len(segments)} segments") + return segments + +def transcribe_segment(audio_file: str, start: float, end: float) -> Dict: + print(f"🎙️ Transcribing segment from {start:.2f}s to {end:.2f}s") + + segment_file = f'output/audio/segment_{start:.2f}_{end:.2f}.wav' + ffmpeg_cmd = [ + 'ffmpeg', + '-i', audio_file, + '-ss', str(start), + '-to', str(end), + '-c', 'copy', + segment_file + ] + subprocess.run(ffmpeg_cmd, check=True, stderr=subprocess.PIPE) + + # Encode to base64 + with open(segment_file, 'rb') as file: + audio_base64 = base64.b64encode(file.read()).decode('utf-8') + + # Check segment size + segment_size = len(audio_base64) / (1024 * 1024) # Size in MB + print(f"📊 Segment size: {segment_size:.2f} MB") + + result = transcribe_audio(audio_base64) + + # delete the segment file + os.remove(segment_file) + + return result + def encode_file_to_base64(file_path: str) -> str: print("🔄 Encoding audio file to base64...") with open(file_path, 'rb') as file: @@ -83,7 +158,7 @@ def process_transcription(result: Dict) -> pd.DataFrame: } all_words.append(word_dict) else: - # If it’s the first word, look next for a timestamp then assign it to the current word + # If it's the first word, look next for a timestamp then assign it to the current word next_word = next((w for w in segment['words'] if 'start' in w and 'end' in w), None) if next_word: word_dict = { @@ -122,16 +197,33 @@ def transcribe(video_file: str): if not os.path.exists("output/log/cleaned_chunks.xlsx"): audio_file = convert_video_to_audio(video_file) - if os.path.getsize(audio_file) > 25 * 1024 * 1024: - print("⚠️ File size exceeds 25MB. Please use a smaller file.") - return + segments = split_audio(audio_file) - audio_base64 = encode_file_to_base64(audio_file) - result = transcribe_audio(audio_base64) + all_results = [] + for start, end in segments: + result = transcribe_segment(audio_file, start, end) + result['time_offset'] = start # Add time offset to the result + all_results.append(result) + + # Combine results + combined_result = { + 'segments': [], + 'detected_language': all_results[0]['detected_language'] + } + for result in all_results: + for segment in result['segments']: + segment['start'] += result['time_offset'] + segment['end'] += result['time_offset'] + for word in segment['words']: + if 'start' in word: + word['start'] += result['time_offset'] + if 'end' in word: + word['end'] += result['time_offset'] + combined_result['segments'].extend(result['segments']) - save_language(result['detected_language']) + save_language(combined_result['detected_language']) - df = process_transcription(result) + df = process_transcription(combined_result) save_results(df) else: print("📊 Transcription results already exist, skipping transcription step.") diff --git a/core/step4_2_translate_all.py b/core/step4_2_translate_all.py index 8a01db98..f2633d2e 100644 --- a/core/step4_2_translate_all.py +++ b/core/step4_2_translate_all.py @@ -47,6 +47,7 @@ def translate_all(): print("🚨 File `translation_results.xlsx` already exists, skipping TRANSLATE ALL.") return + print("Start Translate All...") chunks = split_chunks_by_chars() with open('output/log/terminology.json', 'r', encoding='utf-8') as file: theme_prompt = json.load(file).get('theme') diff --git "a/docs/\350\266\213\345\212\250\344\272\221\344\275\277\347\224\250\350\257\264\346\230\216.md" "b/docs/\350\266\213\345\212\250\344\272\221\344\275\277\347\224\250\350\257\264\346\230\216.md" deleted file mode 100644 index 771378cf..00000000 --- "a/docs/\350\266\213\345\212\250\344\272\221\344\275\277\347\224\250\350\257\264\346\230\216.md" +++ /dev/null @@ -1,76 +0,0 @@ -# VideoLingo 趋动云使用指南 -注意,此为v0.2版本,仅适合英文输入中文输出 - -## 1. 获取 API Key - -推荐在 [云雾 API](https://api.wlai.vip/register?aff=TXMB) 注册并充值以获取 Claude API key。 - -> 注意: 此渠道价格约为官方的 1/10,约合 10元/1M tokens(连 2元/1M 的 deepseek 都不香了)。如果已有其他渠道的 API key,可跳过此步骤,在后续的 `config.py` 设置中进行相应设置。 - -![云雾 API 注册](https://files.catbox.moe/wxlsy1.png) -![云雾 API 充值](https://files.catbox.moe/8kjv1x.png) - -## 2. 注册趋动云账号 - -在 [趋动云官网](https://platform.virtaicloud.com/) 进行注册。 - -## 3. 克隆 VideoLingo 项目 - -1. 打开 [VideoLingo 趋动云项目页](https://open.virtaicloud.com/web/project/detail/480194078119297024) -2. 将项目克隆到您的个人运行空间 - -![趋动云页面](https://files.catbox.moe/rkhd0z.png) -![克隆项目](https://files.catbox.moe/ygrm39.png) - -## 4. 启动开发环境 - -1. 在空间中打开 VideoLingo 项目 -2. 点击 `启动开发环境` -3. 环境启动后,点击 `进入开发环境` - -![启动环境](https://files.catbox.moe/tbmu8m.png) -![进入环境](https://files.catbox.moe/ik5jp5.png) - -## 5. 配置 API Key - -1. 点击左上角进入 `JupyterLab` -2. 修改 `config.py` 文件,填写 `api_key` 等 - -![填写配置](https://files.catbox.moe/uzn2b2.png) - -## 6. 启动项目 - -1. 打开 `一键启动.ipynb` 文件 -2. 运行所有单元格 - -> 提示: `重启并运行所有单元格` 可能存在 bug,多点几次就行 - -![启动文件](https://files.catbox.moe/awk6ca.png) -![执行所有单元格](https://files.catbox.moe/zkj3d0.png) - -## 7. 访问 Streamlit 界面 - -1. 等待约 3 分钟,待环境安装完成 -2. 最下方单元格会弹出 URL -3. 点击右边栏 `端口`,创建 8501, 复制 `外部访问` 链接 -4. 在新的浏览器标签页中打开该链接 - -![复制外部访问链接](https://files.catbox.moe/9hipu3.png) - -## 8. 使用 VideoLingo - -Streamlit 界面启动后就可以开始使用 VideoLingo 了! - -- 处理进度可在弹出 URL 的界面中查看 -- 输出文件将保存在 `output` 文件夹下 - -![Streamlit 界面](https://files.catbox.moe/rpmsl4.png) - -## 9. 保存结果并停止环境 - -1. 处理完成后,请下载并保存 srt 字幕文件 -2. 点击 `停止并销毁` 以停止计费 - -![停止环境](https://files.catbox.moe/sd4sib.png) - -> 任何问题可以提 Issue 或加入 QQ 群:875297969 \ No newline at end of file diff --git a/st_components/locales.csv b/st_components/locales.csv index e5a2c40c..6ff0e7b8 100644 --- a/st_components/locales.csv +++ b/st_components/locales.csv @@ -128,9 +128,9 @@ ja_JP,subtitle_line_length_settings,"字幕行の長さ設定:" en_US,max_characters_per_line,"Maximum Characters Per Line:" zh_CN,max_characters_per_line,"单行最大字符数:" ja_JP,max_characters_per_line,"1行あたりの最大文字数:" -en_US,max_characters_per_line_help,"Maximum number of characters per line of subtitles, calculated based on English. For Chinese, it will be automatically multiplied by 1.75. Default is 80" -zh_CN,max_characters_per_line_help,"每一行字幕的最大字符数,按照英文计算,中文会自动乘以1.75,默认 80" -ja_JP,max_characters_per_line_help,"字幕の1行あたりの最大文字数。英語を基準に計算され、中国語の場合は自動的に1.75倍になります。デフォルトは80です" +en_US,max_characters_per_line_help,"Maximum number of characters per line of subtitles, calculated based on English. For Chinese, it will be automatically multiplied by 1.75. Default is 75" +zh_CN,max_characters_per_line_help,"每一行字幕的最大字符数,按照英文计算,中文会自动乘以1.75,默认 75" +ja_JP,max_characters_per_line_help,"字幕の1行あたりの最大文字数。英語を基準に計算され、中国語の場合は自動的に1.75倍になります。デフォルトは75です" en_US,translation_length_multiplier,"Translation Length Multiplier:" zh_CN,translation_length_multiplier,"翻译长度倍数:" ja_JP,translation_length_multiplier,"翻訳長さの倍率:" diff --git a/st_components/sidebar_setting.py b/st_components/sidebar_setting.py index d537989b..42bdb61e 100644 --- a/st_components/sidebar_setting.py +++ b/st_components/sidebar_setting.py @@ -78,7 +78,7 @@ def page_setting(): resolution_options = { "1080p": "1920x1080", - "480p": "854x480" + "360p": "640x360" } selected_resolution = st.selectbox(get_localized_string("video_resolution"), options=list(resolution_options.keys()), index=list(resolution_options.values()).index(config.RESOLUTIOM)) resolution = resolution_options[selected_resolution] From 99c50d2ed035be155066b960e5f405fcc5bb901d Mon Sep 17 00:00:00 2001 From: HUANYU XU Date: Mon, 16 Sep 2024 18:35:02 +0800 Subject: [PATCH 2/3] delete docker; rich print --- Dockerfile | 48 -------- core/spacy_utils/load_nlp_model.py | 11 +- core/spacy_utils/split_by_comma.py | 7 +- core/spacy_utils/split_by_connector.py | 15 +-- core/spacy_utils/split_by_mark.py | 5 +- core/spacy_utils/split_long_by_root.py | 5 +- core/translate_once.py | 29 +++-- docs/install_locally_en.md | 23 +--- docs/install_locally_zh.md | 23 +--- entrypoint.sh | 7 -- install.py | 156 +++++++++++-------------- install_docker.py | 73 ------------ 12 files changed, 118 insertions(+), 284 deletions(-) delete mode 100644 Dockerfile delete mode 100644 entrypoint.sh delete mode 100644 install_docker.py diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 607e6e58..00000000 --- a/Dockerfile +++ /dev/null @@ -1,48 +0,0 @@ -# 第一阶段:从官方的 PyTorch拉取pytorch -FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime AS base - -# 设置工作目录 -WORKDIR /app - -# 第二步:依赖镜像 - 安装 Python 依赖 -FROM base AS dependencies - -# 设置工作目录 -WORKDIR /app - -# 安装必要的工具和依赖,并清理缓存 -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - cmake \ - build-essential \ - ffmpeg && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# 安装基础依赖 -RUN pip install --upgrade pip - -# 拷贝 requirements.txt 并安装依赖 -COPY requirements.txt . -RUN pip install -r requirements.txt - -# 第四步:应用镜像 -FROM dependencies AS app - -# 设置工作目录 -WORKDIR /app - -# 将当前目录的内容复制到工作目录 -COPY . . - -# 确保所有脚本是可执行的 -RUN chmod +x *.py entrypoint.sh - -# 创建并复制 entrypoint.sh 脚本 -COPY entrypoint.sh . - -# 暴露Streamlit默认端口 -EXPOSE 8501 - -# 启动应用 -ENTRYPOINT ["sh", "entrypoint.sh"] diff --git a/core/spacy_utils/load_nlp_model.py b/core/spacy_utils/load_nlp_model.py index 3faf70c4..be3681dc 100644 --- a/core/spacy_utils/load_nlp_model.py +++ b/core/spacy_utils/load_nlp_model.py @@ -1,6 +1,7 @@ import os,sys import spacy from spacy.cli import download +from rich import print sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from core.step2_whisper import get_whisper_language from config import SPACY_MODEL_MAP @@ -8,7 +9,7 @@ def get_spacy_model(language: str): model = SPACY_MODEL_MAP.get(language.lower(), "en_core_web_sm") if language not in SPACY_MODEL_MAP: - print(f"Spacy model does not support '{language}', using en_core_web_sm model as fallback...") + print(f"[yellow]Spacy model does not support '{language}', using en_core_web_sm model as fallback...[/yellow]") return model def init_nlp(): @@ -16,15 +17,15 @@ def init_nlp(): from config import WHISPER_LANGUAGE language = "en" if WHISPER_LANGUAGE == "en" else get_whisper_language() model = get_spacy_model(language) - print(f"⏳ Loading NLP Spacy model: <{model}> ...") + print(f"[blue]⏳ Loading NLP Spacy model: <{model}> ...[/blue]") try: nlp = spacy.load(model) except: - print(f"Downloading {model} model...") - print("If download failed, please check your network and try again.") + print(f"[yellow]Downloading {model} model...[/yellow]") + print("[yellow]If download failed, please check your network and try again.[/yellow]") download(model) nlp = spacy.load(model) except: raise ValueError(f"❌ Failed to load NLP Spacy model: {model}") - print(f"✅ NLP Spacy model loaded successfully!") + print(f"[green]✅ NLP Spacy model loaded successfully![/green]") return nlp \ No newline at end of file diff --git a/core/spacy_utils/split_by_comma.py b/core/spacy_utils/split_by_comma.py index b6909161..e3475fca 100644 --- a/core/spacy_utils/split_by_comma.py +++ b/core/spacy_utils/split_by_comma.py @@ -4,6 +4,7 @@ import os,sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) from load_nlp_model import init_nlp +from rich import print def is_valid_phrase(phrase): # 🔍 Check for subject and verb @@ -37,13 +38,13 @@ def split_by_comma(text, nlp): if suitable_for_splitting : sentences.append(doc[start:token.i].text.strip()) - print(f"✂️ Split at comma: {doc[start:token.i][-4:]},| {doc[token.i + 1:][:4]}") + print(f"[yellow]✂️ Split at comma: {doc[start:token.i][-4:]},| {doc[token.i + 1:][:4]}[/yellow]") start = token.i + 1 for i, token in enumerate(doc): if token.text == ":": # Split at colon sentences.append(doc[start:token.i].text.strip()) - print(f"✂️ Split at colon: {doc[start:token.i][-4:]}:| {doc[token.i + 1:][:4]}") + print(f"[yellow]✂️ Split at colon: {doc[start:token.i][-4:]}:| {doc[token.i + 1:][:4]}[/yellow]") sentences.append(doc[start:].text.strip()) @@ -63,7 +64,7 @@ def split_by_comma_main(nlp): for sentence in all_split_sentences: output_file.write(sentence + "\n") - print("💾 Sentences split by commas saved to → `sentences_by_comma.txt`") + print("[green]💾 Sentences split by commas saved to → `sentences_by_comma.txt`[/green]") if __name__ == "__main__": nlp = init_nlp() diff --git a/core/spacy_utils/split_by_connector.py b/core/spacy_utils/split_by_connector.py index 6aa11dac..5a57c68d 100644 --- a/core/spacy_utils/split_by_connector.py +++ b/core/spacy_utils/split_by_connector.py @@ -3,17 +3,18 @@ import os,sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) from load_nlp_model import init_nlp +from rich import print def analyze_connectors(doc, token): """ Analyze whether a token is a connector that should trigger a sentence split. Processing logic and order: - 1. Check if the token is one of the target connectors based on the language. - 2. For 'that' (English), check if it's part of a contraction (e.g., that's, that'll). - 3. For all connectors, check if they function as a specific dependency of a verb or noun. - 4. Default to splitting for certain connectors if no other conditions are met. - 5. For coordinating conjunctions, check if they connect two independent clauses. + 1. Check if the token is one of the target connectors based on the language. + 2. For 'that' (English), check if it's part of a contraction (e.g., that's, that'll). + 3. For all connectors, check if they function as a specific dependency of a verb or noun. + 4. Default to splitting for certain connectors if no other conditions are met. + 5. For coordinating conjunctions, check if they connect two independent clauses. """ lang = doc.lang_ if lang == "en": @@ -101,7 +102,7 @@ def split_by_connectors(text, context_words=5, nlp=None): right_words = [word.text for word in right_words if not word.is_punct] if len(left_words) >= context_words and len(right_words) >= context_words and split_before: - print(f"✂️ Split before '{token.text}': {' '.join(left_words)}| {token.text} {' '.join(right_words)}") + print(f"[yellow]✂️ Split before '{token.text}': {' '.join(left_words)}| {token.text} {' '.join(right_words)}[/yellow]") new_sentences.append(doc[start:token.i].text.strip()) start = token.i split_occurred = True @@ -136,7 +137,7 @@ def split_sentences_main(nlp): output_file.seek(output_file.tell() - 1, os.SEEK_SET) output_file.truncate() - print("💾 Sentences split by connectors saved to → `sentence_splitbyconnector.txt`") + print("[green]💾 Sentences split by connectors saved to → `sentence_splitbyconnector.txt`[/green]") if __name__ == "__main__": nlp = init_nlp() diff --git a/core/spacy_utils/split_by_mark.py b/core/spacy_utils/split_by_mark.py index a4e02d20..7a6ad655 100644 --- a/core/spacy_utils/split_by_mark.py +++ b/core/spacy_utils/split_by_mark.py @@ -6,11 +6,12 @@ from core.spacy_utils.load_nlp_model import init_nlp from core.step2_whisper import get_whisper_language from config import get_joiner, WHISPER_LANGUAGE +from rich import print def split_by_mark(nlp): language = get_whisper_language() if WHISPER_LANGUAGE == 'auto' else WHISPER_LANGUAGE # consider force english case joiner = get_joiner(language) - print(f"🔍 Using {language} language joiner: '{joiner}'") + print(f"[blue]🔍 Using {language} language joiner: '{joiner}'[/blue]") chunks = pd.read_excel("output/log/cleaned_chunks.xlsx") chunks.text = chunks.text.apply(lambda x: x.strip('"')) @@ -31,7 +32,7 @@ def split_by_mark(nlp): else: output_file.write(sentence + "\n") - print("💾 Sentences split by punctuation marks saved to → `sentences_by_mark.txt`") + print("[green]💾 Sentences split by punctuation marks saved to → `sentences_by_mark.txt`[/green]") if __name__ == "__main__": nlp = init_nlp() diff --git a/core/spacy_utils/split_long_by_root.py b/core/spacy_utils/split_long_by_root.py index 8f5a47a5..5d5219b3 100644 --- a/core/spacy_utils/split_long_by_root.py +++ b/core/spacy_utils/split_long_by_root.py @@ -5,6 +5,7 @@ from core.spacy_utils.load_nlp_model import init_nlp from config import get_joiner, WHISPER_LANGUAGE from core.step2_whisper import get_whisper_language +from rich import print def split_long_sentence(doc): tokens = [token.text for token in doc] @@ -70,7 +71,7 @@ def split_long_by_root_main(nlp): if any(len(nlp(sent)) > 60 for sent in split_sentences): split_sentences = [subsent for sent in split_sentences for subsent in split_extremely_long_sentence(nlp(sent))] all_split_sentences.extend(split_sentences) - print(f"✂️ Splitting long sentences by root: {sentence[:30]}...") + print(f"[yellow]✂️ Splitting long sentences by root: {sentence[:30]}...[/yellow]") else: all_split_sentences.append(sentence.strip()) @@ -78,7 +79,7 @@ def split_long_by_root_main(nlp): for sentence in all_split_sentences: output_file.write(sentence + "\n") - print("💾 Long sentences split by root saved to → `sentence_splitbynlp.txt`") + print("[green]💾 Long sentences split by root saved to → `sentence_splitbynlp.txt`[/green]") if __name__ == "__main__": nlp = init_nlp() diff --git a/core/translate_once.py b/core/translate_once.py index e439409b..45e7e7e3 100644 --- a/core/translate_once.py +++ b/core/translate_once.py @@ -2,6 +2,12 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from core.ask_gpt import ask_gpt from core.prompts_storage import generate_shared_prompt, get_prompt_faithfulness, get_prompt_expressiveness +from rich import print +from rich.panel import Panel +from rich.console import Console +from rich.table import Table + +console = Console() def translate_lines(lines, previous_content_prompt, after_cotent_prompt, things_to_note_prompt, summary_prompt, index = 0): from config import step4_2_translate_direct_model, step4_2_translate_free_model @@ -15,8 +21,8 @@ def retry_translation(prompt, model, step_name, valide_key=None, valid_sub_key=N if len(lines.split('\n')) == len(result): return result if retry != 2: - print(f'⚠️ {step_name.capitalize()} translation of block {index} failed, Retry...') - raise ValueError(f'❌ {step_name.capitalize()} translation of block {index} failed after 3 retries. Please check your input text.') + console.print(f'[yellow]⚠️ {step_name.capitalize()} translation of block {index} failed, Retry...[/yellow]') + raise ValueError(f'[red]❌ {step_name.capitalize()} translation of block {index} failed after 3 retries. Please check your input text.[/red]') ## Step 1: Faithful to the Original Text prompt1 = get_prompt_faithfulness(lines, shared_prompt) @@ -29,18 +35,25 @@ def retry_translation(prompt, model, step_name, valide_key=None, valid_sub_key=N prompt2 = get_prompt_expressiveness(faith_result, lines, shared_prompt) express_result = retry_translation(prompt2, step4_2_translate_free_model, 'expressiveness', valide_key="1", valid_sub_key="Free Translation") - for i in express_result: - print(f'📄 Original Subtitle: {faith_result[i]["Original Subtitle"]}') - print(f'📚 Direct Translation: {faith_result[i]["Direct Translation"]}') - print(f'🧠 Free Translation: {express_result[i]["Free Translation"]}') + table = Table(title="Translation Results") + table.add_column("Translations", style="cyan") + + for i, key in enumerate(express_result): + table.add_row(f"[cyan]Original: {faith_result[key]['Original Subtitle']}[/cyan]") + table.add_row(f"[magenta]Direct: {faith_result[key]['Direct Translation']}[/magenta]") + table.add_row(f"[green]Free: {express_result[key]['Free Translation']}[/green]") + if i < len(express_result) - 1: + table.add_row("[yellow]" + "-" * 50 + "[/yellow]") + + console.print(table) translate_result = "\n".join([express_result[i]["Free Translation"].replace('\n', ' ').strip() for i in express_result]) if len(lines.split('\n')) != len(translate_result.split('\n')): - print(f'❌ Translation of block {index} failed, Length Mismatch, Please check `output/gpt_log/translate_expressiveness.json`') + console.print(Panel(f'[red]❌ Translation of block {index} failed, Length Mismatch, Please check `output/gpt_log/translate_expressiveness.json`[/red]')) raise ValueError(f'Original ···{lines}···,\nbut got ···{translate_result}···') else: - print(f'✅ Translation of block {index} completed') + console.print(Panel(f'[green]✅ Translation of block {index} completed[/green]')) return translate_result, lines diff --git a/docs/install_locally_en.md b/docs/install_locally_en.md index d2262a5b..26859961 100644 --- a/docs/install_locally_en.md +++ b/docs/install_locally_en.md @@ -84,25 +84,4 @@ Before installing VideoLingo, ensure at least **20GB** of free disk space and co 6. Set the key in the sidebar of the pop-up webpage, and make sure to select the correct Whisper method to use - ![2](https://github.com/user-attachments/assets/ba5621f0-8320-4a45-8da8-9ea574b5c7cc) - - -## Docker Deployment - -Pull Image - -```bash -docker pull sguann/videolingo_app:latest -``` - -Run Image: -```bash -docker run -d -p 8501:8501 -e API_KEY=xxx -e BASE_URL=xxx -e WHISPER_METHOD=xxx -e DISPLAY_LANGUAGE=xxx sguann/videolingo_app:latest -``` - -Where: - - - `API_KEY`: Access token, needs to be applied for by yourself. Recommended: [YunWu API](https://api2.wlai.vip/register?aff=TXMB) - - `BASE_URL`: API provider interface, no need for v1 suffix - - `WHISPER_METHOD`: Whisper model, options are: `whisper_timestamped`, `whisperX`, `whisperX_api`, default is `whisperX_api` - - `DISPLAY_LANGUAGE`: Display language, options are `zh_CN`, `zh_TW`, `en_US`, `ja_JP`, default is `auto` + ![2](https://github.com/user-attachments/assets/ba5621f0-8320-4a45-8da8-9ea574b5c7cc) \ No newline at end of file diff --git a/docs/install_locally_zh.md b/docs/install_locally_zh.md index fcf07a4c..e9b2a32b 100644 --- a/docs/install_locally_zh.md +++ b/docs/install_locally_zh.md @@ -83,25 +83,4 @@ VideoLingo 语音识别文本步骤提供多种 Whisper 方案的选择(因为 6. 在弹出网页的侧边栏中设置key,并注意选择whisper方法 - ![2](https://github.com/user-attachments/assets/ba5621f0-8320-4a45-8da8-9ea574b5c7cc) - - -## Docker一键部署 - -拉取镜像: - -```bash -docker pull sguann/videolingo_app:latest -``` - -运行镜像: -```bash -docker run -d -p 8501:8501 -e API_KEY=xxx -e BASE_URL=xxx -e WHISPER_METHOD=xxx -e DISPLAY_LANGUAGE=xxx sguann/videolingo_app:latest -``` - -其中: - - - `API_KEY` 访问token,需要自行申请,推荐[云雾API](https://api2.wlai.vip/register?aff=TXMB) - - `BASE_URL` API提供商接口,不需要v1后缀 - - `WHISPER_METHOD` Whisper模型,可选项分别为:`whisper_timestamped`、`whisperX`、`whisperX_api`, 默认`whisperX_api` - - `DISPLAY_LANGUAGE` 显示语言,可选`zh_CN`, `zh_TW`, `en_US`, `ja_JP`, 默认`auto` + ![2](https://github.com/user-attachments/assets/ba5621f0-8320-4a45-8da8-9ea574b5c7cc) \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100644 index 77ce2b1f..00000000 --- a/entrypoint.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -# 运行 install.py -python install_docker.py - -# 运行 Streamlit 应用 -streamlit run st.py diff --git a/install.py b/install.py index 2d8dec29..3facbc24 100644 --- a/install.py +++ b/install.py @@ -4,136 +4,132 @@ import sys import zipfile import shutil +from rich.console import Console +from rich.table import Table +from rich.panel import Panel +from rich.progress import Progress sys.path.append(os.path.dirname(os.path.abspath(__file__))) +console = Console() + def install_package(*packages): subprocess.check_call([sys.executable, "-m", "pip", "install", *packages]) def install_requirements(): """Install requirements from requirements.txt file.""" if os.path.exists("requirements.txt"): - print("Converting requirements.txt to GBK encoding...") - print("正在将requirements.txt转换为GBK编码...") + console.print(Panel("Converting requirements.txt to GBK encoding...", style="cyan")) try: with open("requirements.txt", "r", encoding="utf-8") as file: content = file.read() with open("requirements.txt", "w", encoding="gbk") as file: file.write(content) - print("Conversion completed.") - print("转换完成。") + console.print("[green]Conversion completed.[/green]") except UnicodeDecodeError: - print("requirements.txt is already in GBK encoding, no conversion needed.") - print("requirements.txt已经是GBK编码,无需转换。") + console.print("[yellow]requirements.txt is already in GBK encoding, no conversion needed.[/yellow]") except Exception as e: - print(f"Error occurred during encoding conversion: {str(e)}") - print(f"转换编码时出错:{str(e)}") + console.print(f"[red]Error occurred during encoding conversion: {str(e)}[/red]") - print("Installing dependencies from requirements.txt...") - print("正在从requirements.txt安装依赖...") + console.print(Panel("Installing dependencies from requirements.txt...", style="cyan")) subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"]) else: - print("requirements.txt not found. Skipping installation.") - print("未找到requirements.txt。跳过安装。") + console.print("[yellow]requirements.txt not found. Skipping installation.[/yellow]") def dowanload_uvr_model(): """Download the specified uvr model.""" if not os.path.exists("_model_cache/uvr5_weights/HP2_all_vocals.pth"): os.makedirs("_model_cache/uvr5_weights", exist_ok=True) import requests - print("Downloading UVR model...") - print("正在下载UVR模型...") + console.print(Panel("Downloading UVR model...", style="cyan")) url = "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/e992cb1bc5d777fcddce20735a899219b1d46aba/uvr5_weights/HP2_all_vocals.pth" - response = requests.get(url) - with open("_model_cache/uvr5_weights/HP2_all_vocals.pth", "wb") as file: - file.write(response.content) - print("UVR model downloaded successfully.") - print("UVR模型下载成功。") + with Progress() as progress: + task = progress.add_task("[cyan]Downloading...", total=100) + response = requests.get(url, stream=True) + total_size = int(response.headers.get('content-length', 0)) + with open("_model_cache/uvr5_weights/HP2_all_vocals.pth", "wb") as file: + for data in response.iter_content(chunk_size=4096): + size = file.write(data) + progress.update(task, advance=(size/total_size)*100) + console.print("[green]UVR model downloaded successfully.[/green]") else: - print("HP2_all_vocals.pth already exists. Skipping download.") - print("HP2_all_vocals.pth已存在。跳过下载。") + console.print("[yellow]HP2_all_vocals.pth already exists. Skipping download.[/yellow]") def download_and_extract_ffmpeg(): - """Download FFmpeg based on the platform, extract it, and clean up.""" + """Download FFmpeg and FFprobe based on the platform, extract them, and clean up.""" system = platform.system() if system == "Windows": ffmpeg_exe = "ffmpeg.exe" + ffprobe_exe = "ffprobe.exe" url = "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip" elif system == "Darwin": - ffmpeg_exe = "ffmpeg" - url = "https://evermeet.cx/ffmpeg/ffmpeg-4.4.zip" + console.print(Panel.fit( + "For macOS users, please install FFmpeg using Homebrew:\n" + "1. Install Homebrew if you haven't: /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"\n" + "2. Then run: brew install ffmpeg", + title="⚠️ MacOS Installation", border_style="green" + )) + return elif system == "Linux": ffmpeg_exe = "ffmpeg" + ffprobe_exe = "ffprobe" url = "https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz" else: return - if os.path.exists(ffmpeg_exe): - print(f"{ffmpeg_exe} already exists. Skipping download.") - print(f"{ffmpeg_exe}已存在。跳过下载。") + if os.path.exists(ffmpeg_exe) and os.path.exists(ffprobe_exe): + console.print(f"[yellow]{ffmpeg_exe} and {ffprobe_exe} already exist. Skipping download.[/yellow]") return - print("Downloading FFmpeg...") - print("正在下载FFmpeg...") + console.print(Panel("Downloading FFmpeg and FFprobe...", style="cyan")) import requests + response = requests.get(url) if response.status_code == 200: - filename = "ffmpeg.zip" + filename = "ffmpeg.zip" if system == "Windows" else "ffmpeg.tar.xz" with open(filename, 'wb') as f: f.write(response.content) - print(f"FFmpeg has been downloaded to {filename}") - print(f"FFmpeg已下载到{filename}") - - print("Extracting FFmpeg...") - print("正在解压FFmpeg...") + console.print(f"[green]FFmpeg and FFprobe have been downloaded to {filename}[/green]") + + console.print(Panel("Extracting FFmpeg and FFprobe...", style="cyan")) if system == "Linux": import tarfile with tarfile.open(filename) as tar_ref: for member in tar_ref.getmembers(): - if member.name.endswith("ffmpeg"): + if member.name.endswith(("ffmpeg", "ffprobe")): member.name = os.path.basename(member.name) tar_ref.extract(member) - break else: with zipfile.ZipFile(filename, 'r') as zip_ref: for file in zip_ref.namelist(): - if file.endswith(ffmpeg_exe): + if file.endswith((ffmpeg_exe, ffprobe_exe)): zip_ref.extract(file) - shutil.move(os.path.join(*file.split('/')[:-1], ffmpeg_exe), ffmpeg_exe) - break - - print("Cleaning up...") - print("正在清理...") - os.remove(filename) - if system != "Linux": + shutil.move(os.path.join(*file.split('/')[:-1], os.path.basename(file)), os.path.basename(file)) + + console.print(Panel("Cleaning up...", style="cyan")) + os.remove(filename) + if system == "Windows": for item in os.listdir(): if os.path.isdir(item) and "ffmpeg" in item.lower(): shutil.rmtree(item) - print("FFmpeg extraction completed.") - print("FFmpeg解压完成。") + console.print("[green]FFmpeg and FFprobe extraction completed.[/green]") else: - print("Failed to download FFmpeg") - print("下载FFmpeg失败") + console.print("[red]Failed to download FFmpeg and FFprobe[/red]") def init_config(): """Initialize the config.py file with the specified API key and base URL.""" if not os.path.exists("config.py"): # Copy config.py from config.example.py - # 从 config.example.py 复制 config.py shutil.copy("config.example.py", "config.py") - print("config.py file has been created. Please fill in the API key and base URL in the config.py file.") - print("config.py文件已创建。请在config.py文件中填写API密钥和基础URL。") + console.print("[green]config.py file has been created. Please fill in the API key and base URL in the config.py file.[/green]") else: - print("config.py file already exists.") - print("config.py文件已存在。") + console.print("[yellow]config.py file already exists.[/yellow]") def install_whisper_model(choice): if choice == '1': - print("Installing whisper_timestamped...") - print("正在安装 whisper_timestamped...") + console.print(Panel("Installing whisper_timestamped...", style="cyan")) subprocess.check_call([sys.executable, "-m", "pip", "install", "whisper-timestamped"]) elif choice == '2': - print("Installing whisperX...") - print("正在安装 whisperX...") + console.print(Panel("Installing whisperX...", style="cyan")) current_dir = os.getcwd() whisperx_dir = os.path.join(current_dir, "third_party", "whisperX") os.chdir(whisperx_dir) @@ -141,57 +137,47 @@ def install_whisper_model(choice): os.chdir(current_dir) def main(): - print("Starting installation...") - print("开始安装...") + console.print(Panel.fit("Starting installation...", style="bold magenta")) # Initialize config.py file - # 初始化 config.py 文件 init_config() # Install requests - # 安装 requests + console.print(Panel("Installing requests...", style="cyan")) install_package("requests") # User selects Whisper model - # User selects Whisper model - # 用户选择 Whisper 模型 - print("\nPlease select the Whisper model to install:") - print("\n请选择要安装的 Whisper 模型:") - print("If you're unsure about the differences between models, please see https://github.com/Huanshere/VideoLingo/blob/main/docs/install_locally_zh.md") - print("若不清楚各模型区别请见 https://github.com/Huanshere/VideoLingo/blob/main/docs/install_locally_zh.md") - print("1. whisper_timestamped") - print("2. whisperX ") - print("3. whisperX_api (recommended)") - choice = input("Please enter the option number (1, 2, or 3): ") + table = Table(title="Whisper Model Selection") + table.add_column("Option", style="cyan", no_wrap=True) + table.add_column("Model", style="magenta") + table.add_column("Description", style="green") + table.add_row("1", "whisper_timestamped", "") + table.add_row("2", "whisperX", "") + table.add_row("3", "whisperX_api", "(recommended)") + console.print(table) + console.print("If you're unsure about the differences between models, please see https://github.com/Huanshere/VideoLingo/blob/main/docs/install_locally_zh.md") + choice = console.input("Please enter the option number (1, 2, or 3): ") # Install PyTorch - # 安装 PyTorch if choice in ['1', '2']: - print("Installing PyTorch with CUDA support...") - print("正在安装支持 CUDA 的 PyTorch...") + console.print(Panel("Installing PyTorch with CUDA support...", style="cyan")) subprocess.check_call(["conda", "install", "pytorch==2.0.0", "torchaudio==2.0.0", "pytorch-cuda=11.8", "-c", "pytorch", "-c", "nvidia", "-y"]) elif choice == '3': - print("Installing CPU version of PyTorch...") - print("正在安装 cpu 版本的 PyTorch...") + console.print(Panel("Installing CPU version of PyTorch...", style="cyan")) subprocess.check_call([sys.executable, "-m", "pip", "install", "torch", "torchaudio"]) # Install other dependencies - # 安装其他依赖 install_requirements() # Install selected Whisper model - # 安装选择的 Whisper 模型 install_whisper_model(choice) # Download and extract FFmpeg - # 下载并解压 FFmpeg download_and_extract_ffmpeg() - print("All installation steps are completed!") - print("所有安装步骤都完成啦!") - print("Please use the following command to start Streamlit:") - print("请使用以下命令启动 Streamlit:") - print("streamlit run st.py") + console.print(Panel.fit("All installation steps are completed!", style="bold green")) + console.print("Please use the following command to start Streamlit:") + console.print("[bold cyan]streamlit run st.py[/bold cyan]") if __name__ == "__main__": main() \ No newline at end of file diff --git a/install_docker.py b/install_docker.py deleted file mode 100644 index 9c672b7d..00000000 --- a/install_docker.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import platform -import subprocess -import sys -import zipfile -import shutil - -sys.path.append(os.path.dirname(os.path.abspath(__file__))) - -def init_config(api_key, base_url, whisper_method, language): - """Initialize the config.py file with the specified API key and base URL.""" - if not os.path.exists("config.py"): - # 从 config.example.py 复制 config.py - shutil.copy("config.example.py", "config.py") - print("config.py文件已创建。正在更新API密钥和基础URL。") - - # 读取 config.py 文件内容 - with open("config.py", "r", encoding="utf-8") as file: - config_content = file.read() - - # 替换 配置项 - config_content = config_content.replace("API_KEY = 'sk-xxx'", f"API_KEY = '{api_key}'") - config_content = config_content.replace("BASE_URL = 'https://api.deepseek.com'", f"BASE_URL = '{base_url}'") - config_content = config_content.replace("WHISPER_METHOD = 'whisperxapi'", f"WHISPER_METHOD = '{whisper_method}'") - config_content = config_content.replace("cloud = 1 if sys.platform.startswith('linux') else 0", "cloud = 0") - config_content = config_content.replace("DISPLAY_LANGUAGE = 'auto'", f"DISPLAY_LANGUAGE = '{language}'") - - # 将修改后的内容写回 config.py 文件 - with open("config.py", "w", encoding="utf-8") as file: - file.write(config_content) - - print("config.py文件中的API密钥和基础URL已更新。") - else: - print("config.py文件已存在。") - - -def install_whisper_model(whisper_method): - if whisper_method == 'whisper_timestamped': - print("正在安装 whisper_timestamped...") - subprocess.check_call([sys.executable, "-m", "pip", "install", "whisper-timestamped"]) - elif whisper_method == 'whisperX': - print("正在安装 whisperX...") - current_dir = os.getcwd() - whisperx_dir = os.path.join(current_dir, "third_party", "whisperX") - os.chdir(whisperx_dir) - subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", "."]) - os.chdir(current_dir) - - -def main(): - print("开始安装...") - - # 从环境变量中获取 API_KEY 和 BASE_URL - api_key = os.getenv('API_KEY', 'xxx') - base_url = os.getenv('BASE_URL', 'https://api.deepseek.com') - - # 用户选择 Whisper 模型 - whisper_method = os.getenv('WHISPER_METHOD', 'whisper_timestamped') - - # 语言 - language = os.getenv('DISPLAY_LANGUAGE', 'auto') - - # 初始化 config.py 文件 - init_config(api_key, base_url, whisper_method, language) - - # 安装选择的 Whisper 模型 - install_whisper_model(whisper_method) - - print("所有安装步骤都完成啦!") - - -if __name__ == "__main__": - main() From d4faf5b9451fa615da15343e6ef95abb3cf680a2 Mon Sep 17 00:00:00 2001 From: HUANYU XU Date: Mon, 16 Sep 2024 19:34:22 +0800 Subject: [PATCH 3/3] polish rich print --- .gitignore | 2 ++ core/step1_ytdlp.py | 21 ++++++++++++-- core/step3_2_splitbymeaning.py | 26 +++++++++++------ core/step5_splitforsub.py | 53 +++++++++++++++------------------- docs/install_locally_zh.md | 6 ++-- 5 files changed, 64 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index 4cedaae5..a44864f3 100644 --- a/.gitignore +++ b/.gitignore @@ -158,6 +158,8 @@ _model_cache/ # large files /ffmpeg.exe /ffmpeg +/ffprobe.exe +/ffprobe .DS_Store _config.py config.py diff --git a/core/step1_ytdlp.py b/core/step1_ytdlp.py index 20890471..fae8e90a 100644 --- a/core/step1_ytdlp.py +++ b/core/step1_ytdlp.py @@ -2,9 +2,18 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import glob from yt_dlp import YoutubeDL +import re + +def sanitize_filename(filename): + # Remove or replace illegal characters + filename = re.sub(r'[<>:"/\\|?*]', '', filename) + # Ensure filename doesn't start or end with a dot or space + filename = filename.strip('. ') + # Use default name if filename is empty + return filename if filename else 'video' def download_video_ytdlp(url, save_path='output', resolution=1080): - allowed_resolutions = [360, 480, 1080] + allowed_resolutions = [360, 1080] if resolution not in allowed_resolutions: resolution = 1080 @@ -15,6 +24,14 @@ def download_video_ytdlp(url, save_path='output', resolution=1080): } with YoutubeDL(ydl_opts) as ydl: ydl.download([url]) + + # Check and rename files after download + for file in os.listdir(save_path): + if os.path.isfile(os.path.join(save_path, file)): + filename, ext = os.path.splitext(file) + new_filename = sanitize_filename(filename) + if new_filename != filename: + os.rename(os.path.join(save_path, file), os.path.join(save_path, new_filename + ext)) def find_video_files(save_path='output'): from config import ALLOWED_VIDEO_FORMATS @@ -30,7 +47,7 @@ def find_video_files(save_path='output'): if __name__ == '__main__': # Example usage url = input('Please enter the URL of the video you want to download: ') - resolution = input('Please enter the desired resolution (360/480/1080, default 1080): ') + resolution = input('Please enter the desired resolution (360/1080, default 1080): ') resolution = int(resolution) if resolution.isdigit() else 1080 download_video_ytdlp(url, resolution=resolution) print(f"🎥 Video has been downloaded to {find_video_files()}") diff --git a/core/step3_2_splitbymeaning.py b/core/step3_2_splitbymeaning.py index 634cc1dc..d20a0fb7 100644 --- a/core/step3_2_splitbymeaning.py +++ b/core/step3_2_splitbymeaning.py @@ -8,6 +8,12 @@ from core.spacy_utils.load_nlp_model import init_nlp from config import get_joiner, WHISPER_LANGUAGE from core.step2_whisper import get_whisper_language +from rich import print +from rich.panel import Panel +from rich.console import Console +from rich.table import Table + +console = Console() def tokenize_sentence(sentence, nlp): # tokenizer counts the number of words in the sentence @@ -36,12 +42,12 @@ def find_split_positions(original, modified): best_split = j if max_similarity < 0.9: - print(f"Warning: low similarity found at the best split point: {max_similarity}") + console.print(f"[yellow]Warning: low similarity found at the best split point: {max_similarity}[/yellow]") if best_split is not None: split_positions.append(best_split) start = best_split else: - print(f"Warning: Unable to find a suitable split point for the {i+1}th part.") + console.print(f"[yellow]Warning: Unable to find a suitable split point for the {i+1}th part.[/yellow]") return split_positions @@ -62,11 +68,13 @@ def split_sentence(sentence, num_parts, word_limit=18, index=-1, retry_attempt=0 parts[-1] = last_part[:split_point - split_points[i-1]] + '\n' + last_part[split_point - split_points[i-1]:] best_split = '\n'.join(parts) if index != -1: - print(f'✅ Sentence {index} has been successfully split') - print("best_split:",best_split) - print(f'📄 Original Sentence: {sentence}') - print_split = best_split.replace('\n',' [br] ') - print(f"📚 Split Sentence: {print_split}") + console.print(f'[green]✅ Sentence {index} has been successfully split[/green]') + table = Table(title="") + table.add_column("Type", style="cyan") + table.add_column("Sentence") + table.add_row("Original", sentence, style="yellow") + table.add_row("Split", best_split.replace('\n', ' ||'), style="yellow") + console.print(table) return best_split @@ -106,13 +114,13 @@ def split_sentences_by_meaning(): nlp = init_nlp() # 🔄 process sentences multiple times to ensure all are split from config import MAX_WORKERS, MAX_SPLIT_LENGTH - for retry_attempt in range(5): + for retry_attempt in range(3): sentences = parallel_split_sentences(sentences, max_length=MAX_SPLIT_LENGTH, max_workers=MAX_WORKERS, nlp=nlp, retry_attempt=retry_attempt) # 💾 save results with open('output/log/sentence_splitbymeaning.txt', 'w', encoding='utf-8') as f: f.write('\n'.join(sentences)) - print('✅ All sentences have been successfully split!') + console.print('[green]✅ All sentences have been successfully split![/green]') if __name__ == '__main__': # print(split_sentence('Which makes no sense to the... average guy who always pushes the character creation slider all the way to the right.', 2, 22)) diff --git a/core/step5_splitforsub.py b/core/step5_splitforsub.py index ebec8d68..abd6c9fd 100644 --- a/core/step5_splitforsub.py +++ b/core/step5_splitforsub.py @@ -7,6 +7,12 @@ from core.step3_2_splitbymeaning import split_sentence from core.ask_gpt import ask_gpt, step5_align_model from core.prompts_storage import get_align_prompt +from rich import print +from rich.panel import Panel +from rich.console import Console +from rich.table import Table + +console = Console() # ! You can modify your own weights here # Chinese and Japanese 2.5 characters, Korean 2 characters, Thai 1.5 characters, full-width symbols 2 characters, other English-based and half-width symbols 1 character @@ -38,23 +44,31 @@ def align_subs(src_sub: str, tr_sub: str, src_part: str) -> Tuple[List[str], Lis src_parts = src_part.split('\n') tr_parts = [item[f'target_part_{i+1}'].strip() for i, item in enumerate(align_data)] - print(f"🔗 Aligned parts:\nSRC_LANG: {src_parts}\nTARGET_LANG: {tr_parts}\n") + table = Table(title="🔗 Aligned parts") + table.add_column("Language", style="cyan") + table.add_column("Parts", style="magenta") + table.add_row("SRC_LANG", "\n".join(src_parts)) + table.add_row("TARGET_LANG", "\n".join(tr_parts)) + console.print(table) + return src_parts, tr_parts def split_align_subs(src_lines: List[str], tr_lines: List[str], max_retry=5) -> Tuple[List[str], List[str]]: from config import MAX_SUB_LENGTH, TARGET_SUB_MULTIPLIER, MAX_WORKERS for attempt in range(max_retry): - print(f"🔄 Split attempt {attempt + 1}") + console.print(Panel(f"🔄 Split attempt {attempt + 1}", expand=False)) to_split = [] for i, (src, tr) in enumerate(zip(src_lines, tr_lines)): src, tr = str(src), str(tr) if len(src) > MAX_SUB_LENGTH or calc_len(tr) * TARGET_SUB_MULTIPLIER > MAX_SUB_LENGTH: to_split.append(i) - print(f"📏 Line {i} needs to be split:") - print(f"Source Line: {src}") - print(f"Target Line: {tr}") - print() + table = Table(title=f"📏 Line {i} needs to be split") + table.add_column("Type", style="cyan") + table.add_column("Content", style="magenta") + table.add_row("Source Line", src) + table.add_row("Target Line", tr) + console.print(table) def process(i): split_src = split_sentence(src_lines[i], num_parts=2).strip() @@ -74,37 +88,16 @@ def process(i): def split_for_sub_main(): if os.path.exists("output/log/translation_results_for_subtitles.xlsx"): - print("🚨 File `translation_results_for_subtitles.xlsx` already exists, skipping this step.") + console.print("[yellow]🚨 File `translation_results_for_subtitles.xlsx` already exists, skipping this step.[/yellow]") return - print('🚀 Start splitting subtitles...') + console.print("[bold green]🚀 Start splitting subtitles...[/bold green]") df = pd.read_excel("output/log/translation_results.xlsx") src_lines = df['Source'].tolist() tr_lines = df['Translation'].tolist() src_lines, tr_lines = split_align_subs(src_lines, tr_lines, max_retry=5) pd.DataFrame({'Source': src_lines, 'Translation': tr_lines}).to_excel("output/log/translation_results_for_subtitles.xlsx", index=False) - print('✅ Subtitles splitting completed!') + console.print("[bold green]✅ Subtitles splitting completed![/bold green]") if __name__ == '__main__': split_for_sub_main() - - # # 短句 - # print(calc_len("你好")) # 4 - # print(calc_len("Hello")) # 5 - # print(calc_len("こんにちは")) # 5 - # print(calc_len("안녕하세요")) # 5 - # print(calc_len("สวัสดี")) # 3 - - # # 中等长度句子 - # print(calc_len("你好,世界!")) # 8 - # print(calc_len("Hello, world!")) # 13 - # print(calc_len("こんにちは、世界!")) # 10 - # print(calc_len("안녕하세요, 세계!")) # 10 - # print(calc_len("สวัสดีครับ, โลก!")) # 10 - - # # 较长句子 - # print(calc_len("欢迎来到美丽的中国,希望你玩得开心!")) # 22 - # print(calc_len("Welcome to beautiful China, hope you have a great time!")) # 55 - # print(calc_len("美しい中国へようこそ、楽しい時間を過ごせますように!")) # 26 - # print(calc_len("아름다운 중국에 오신 것을 환영합니다. 즐거운 시간 보내세요!")) # 31 - # print(calc_len("ยินดีต้อนรับสู่ประเทศจีนที่สวยงาม หวังว่าคุณจะสนุกนะครับ!")) # 35 diff --git a/docs/install_locally_zh.md b/docs/install_locally_zh.md index e9b2a32b..39056bf7 100644 --- a/docs/install_locally_zh.md +++ b/docs/install_locally_zh.md @@ -28,9 +28,9 @@ VideoLingo 语音识别文本步骤提供多种 Whisper 方案的选择(因为 > 提示: 侧边栏配置 key 的说明可以参考最下方图片 -## 🛠️ 源码安装流程 (Windows) +## 🛠️ 源码安装流程 -### 前置依赖 +### Windows 前置依赖 在开始安装 VideoLingo 之前,注意预留至少 **20G** 硬盘空间,并请确保完成以下步骤: @@ -50,7 +50,7 @@ VideoLingo 语音识别文本步骤提供多种 Whisper 方案的选择(因为 - 安装 [CMake](https://github.com/Kitware/CMake/releases/download/v3.30.2/cmake-3.30.2-windows-x86_64.msi) ### 安装步骤 -> 遇到问题可以把整个步骤丢给 GPT 问问~ +> 支持Win, Mac, Linux。遇到问题可以把整个步骤丢给 GPT 问问~ 1. 打开 Anaconda Powershell Prompt 并切换到桌面目录: ```bash cd desktop