Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Chenyme authored Apr 25, 2024
1 parent 3b6a5a4 commit 711b884
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 77 deletions.
9 changes: 6 additions & 3 deletions AAVT-HomePage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


st.set_page_config(
page_title="AAVT v0.6.3",
page_title="AAVT v0.6.4",
page_icon="🎞️",
layout="wide", # 设置布局样式为宽展示
initial_sidebar_state="expanded" # 设置初始边栏状态为展开
Expand All @@ -25,7 +25,7 @@
markdown_content = file.read()


st.title("🖥Chenyme-AAVT V0.6.3")
st.title("🖥Chenyme-AAVT V0.6.4")
st.caption("POWERED BY @CHENYME")

tab1, tab2, tab3 = st.tabs(["主页", "设置", "关于"])
Expand Down Expand Up @@ -88,8 +88,11 @@
config["WHISPER"]["whisper_version_default"] = w_version_option
else:
w_version_option = st.selectbox('选择whisper版本', list(options.keys()), index=1, disabled=1)

model_names = os.listdir(model_dir)
wlm_option = st.selectbox('选择本地模型', model_names)
a = faster_whisper_local_path
index_model = model_names.index(a.replace(model_dir + '/', ''))
wlm_option = st.selectbox('选择本地模型', model_names, index=index_model)
w_local_model_path = model_dir + '/' + wlm_option
config["WHISPER"]["faster_whisper_model_local_path"] = w_local_model_path

Expand Down
145 changes: 76 additions & 69 deletions pages/📽️视频(Video).py
Original file line number Diff line number Diff line change
Expand Up @@ -33,49 +33,62 @@
st.session_state.faster_whisper_model = faster_whisper_model
st.session_state.openai_whisper_model = openai_whisper_model

# 启用设置
opt_w, opt_g = 1, 1
if whisper_version == "faster-whisper":
opt_w = 0
if "distil" not in faster_whisper_model or torch.cuda.is_available():
opt_g = 0

# 主页面
st.set_page_config(page_title="AI全自动视频翻译", page_icon="📽️", layout="wide", initial_sidebar_state="expanded")
st.title("AI全自动视频翻译📽️")
st.write("")

with st.sidebar:
# 文件上传
st.write("### 文件上传器")
uploaded_file = st.file_uploader("请在这里上传视频:", type=['mp4', 'mov'], label_visibility="collapsed")
if uploaded_file is not None: # 判断是否上传成功
st.write("文件类型:", uploaded_file.type)
st.success("上传成功!")

col1, col2 = st.columns(2, gap="medium")
with col1:
with st.expander("**识别设置**", expanded=True):
# GPU
GPU_on = st.toggle('启用GPU加速*', disabled=not torch.cuda.is_available(), help='自动检测cuda、pytorch可用后开启!')
device = 'cuda' if GPU_on else 'cpu'
# VAD
VAD_on = st.toggle('启用VAD辅助*', help='启用语音活动检测(VAD)以过滤掉没有语音的音频部分,仅支持faster-whisper使用。')
vad = 'True' if GPU_on else 'False'
# language
language = ('自动识别', 'zh', 'en', 'ja', 'ko', 'it', 'de')
lang = st.selectbox('选择视频语言', language, index=0, help="强制指定视频语言会提高识别准确度,但也可能会造成识别出错。")
col3, col4 = st.columns(2)
with col3:
# GPU
GPU_on = st.toggle('启用GPU加速', disabled=opt_g, help='自动检测cuda、pytorch可用后开启!')
device = 'cuda' if GPU_on else 'cpu'
# VAD
VAD_on = st.toggle('启用VAD辅助', disabled=opt_w, help='启用语音活动检测(VAD)以过滤掉没有语音的音频部分,仅支持faster-whisper使用。')
vad = 'True' if VAD_on else 'False'
with col4:
# language
language = ('自动识别', 'zh', 'en', 'ja', 'ko', 'it', 'de')
lang = st.selectbox('选择视频语言', language, index=0, help="强制指定视频语言会提高识别准确度,但也可能会造成识别出错。")

with st.expander("**翻译设置**", expanded=True):
translate_option = st.selectbox('选择翻译引擎', ('无需翻译', 'kimi-moonshot-v1-8k', 'kimi-moonshot-v1-32k', 'kimi-moonshot-v1-128k', 'gpt-3.5-turbo', 'gpt-4'), index=0)
if translate_option != '无需翻译':
language = ('中文', 'English', '日本語', '한국인', 'Italiano', 'Deutsch')
col3, col4 = st.columns(2)
col3, col4, col5= st.columns(3)
with col3:
language1 = st.selectbox('选择原始语言', language, index=1)
with col4:
language2 = st.selectbox('选择目标语言', language, index=0)
with col5:
waittime = st.number_input('翻译间隔设置', min_value=0.0, max_value=5.0, value=0.5, step=0.5)
proxy_on = st.toggle('启用代理', help='如果你能直接访问openai.com,则无需启用。')

with st.expander("**字幕设置**", expanded=True):
with open(project_dir.replace("/pages", "/config") + '/font_data.txt', 'r', encoding='utf-8') as file:
lines = file.readlines()
fonts = [line.strip() for line in lines]
subtitle_model = st.selectbox('字幕方式:', ("硬字幕", "软字幕"), help="请注意:由于软字幕会导致部分字体会无法正常显示,因此可能会出现乱码!同时,您无法在网页中预览字幕效果,请打开文件夹访问原视频并使用支持外挂字幕的视频播放器挂载字幕查看效果!")
font = st.selectbox('视频字幕字体:', fonts, help="所有字体均从系统读取加载,支持用户自行安装字体。请注意商用风险!")
st.session_state.font = font
col3, col4 = st.columns(2, gap="medium")
with col3:
subtitle_model = st.selectbox('字幕方式:', ("硬字幕", "软字幕"), help="请注意:由于软字幕会导致部分字体会无法正常显示,因此可能会出现乱码!同时,您无法在网页中预览字幕效果,请打开文件夹访问原视频并使用支持外挂字幕的视频播放器挂载字幕查看效果!")
with col4:
font = st.selectbox('视频字幕字体:', fonts, help="所有字体均从系统读取加载,支持用户自行安装字体。请注意商用风险!")
st.session_state.font = font
col3, col4 = st.columns([0.9, 0.1], gap="medium")
with col3:
font_size = st.number_input('字幕字体大小', min_value=1, max_value=30, value=18, step=1, help="推荐大小:18")
Expand All @@ -84,93 +97,87 @@
font_color = st.color_picker('颜色', '#FFFFFF')
st.session_state.font_color = font_color
with col2:
with st.expander("**高级功能**"):
with st.expander("**高级设置**"):
token_num = st.number_input('翻译最大token限制', min_value=10, max_value=500, value=100, step=10)
min_vad = st.number_input('VAD静音检测(ms)', min_value=100, max_value=5000, value=500, step=100,
min_vad = st.number_input('VAD静音检测(ms)', min_value=100, max_value=5000, value=500, step=100, disabled=opt_w,
help="启用VAD辅助后生效!对应`min_silence_duration_ms`参数,最小静音持续时间。")
beam_size = st.number_input('束搜索大小', min_value=1, max_value=20, value=5, step=1,
beam_size = st.number_input('束搜索大小', min_value=1, max_value=20, value=5, step=1, disabled=opt_w,
help="`beam_size`参数。用于定义束搜索算法中每个时间步保留的候选项数量。束搜索算法通过在每个时间步选择最有可能的候选项来构建搜索树,并根据候选项的得分进行排序和剪枝。较大的beam_size值会保留更多的候选项,扩大搜索空间,可能提高生成结果的准确性,但也会增加计算开销。相反,较小的beam_size值会减少计算开销,但可能导致搜索过早地放弃最佳序列。")

with col1:
if st.button('生成视频', type="primary", use_container_width=True):
if uploaded_file is not None:

msg = st.toast('开始生成!')
time1 = time.time()
with st.spinner('正在加载视频缓存...'):
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")
output_file = cache_dir + current_time
os.makedirs(output_file)
with open(output_file + "/uploaded.mp4", "wb") as file:
file.write(uploaded_file.getbuffer())
msg.toast('正在进行视频读取📽️')
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")
output_file = cache_dir + current_time
os.makedirs(output_file)
with open(output_file + "/uploaded.mp4", "wb") as file:
file.write(uploaded_file.getbuffer())

time2 = time.time()
with st.spinner('正在识别视频内容...'):
if st.session_state.whisper_version_name == "faster-whisper":
models_option = st.session_state.faster_whisper_model
else:
models_option = st.session_state.openai_whisper_model
if st.session_state.model_local:
models_option = st.session_state.faster_whisper_model

print("加载模型:" + models_option)
result = get_whisper_result(uploaded_file, output_file, device, models_option,
st.session_state.whisper_version_name, vad, lang, beam_size, min_vad)
print("whisper识别:" + result['text'])
msg.toast('正在识别视频内容🔍')
if st.session_state.whisper_version_name == "faster-whisper":
models_option = st.session_state.faster_whisper_model
else:
models_option = st.session_state.openai_whisper_model
if st.session_state.model_local:
models_option = st.session_state.model_local
print("加载模型:" + models_option)
result = get_whisper_result(uploaded_file, output_file, device, models_option,
st.session_state.whisper_version_name, vad, lang, beam_size, min_vad)
print("whisper识别:" + result['text'])

time3 = time.time()
if translate_option != '无需翻译':
with st.spinner('正在翻译文本...'):
if translate_option == 'gpt-3.5-turbo':
result = openai_translate1(st.session_state.openai_key, st.session_state.openai_base,
proxy_on, result, language1, language2)
elif translate_option == 'gpt-4':
result = openai_translate2(st.session_state.openai_key, st.session_state.openai_base,
proxy_on, result, language1, language2, token_num)
else:
result = kimi_translate(st.session_state.kimi_key, translate_option, result, language1, language2, token_num)
msg.toast('正在翻译文本🤖')
if translate_option == 'gpt-3.5-turbo':
result = openai_translate1(st.session_state.openai_key, st.session_state.openai_base,
proxy_on, result, language1, language2, waittime)
elif translate_option == 'gpt-4':
result = openai_translate2(st.session_state.openai_key, st.session_state.openai_base,
proxy_on, result, language1, language2, token_num, waittime)
else:
result = kimi_translate(st.session_state.kimi_key, translate_option, result, language1, language2, token_num, waittime)

time4 = time.time()
with st.spinner('正在生成SRT字幕文件...'):
srt_content = generate_srt_from_result(result)
srt_content2 = generate_srt_from_result_2(result, font, font_size, font_color)
with open(output_file + "/output.srt", 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_content2)
msg.toast('正在生成SRT字幕文件📃')
srt_content = generate_srt_from_result(result)
srt_content2 = generate_srt_from_result_2(result, font, font_size, font_color)
with open(output_file + "/output.srt", 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_content2)

time5 = time.time()
with st.spinner('正在合并视频,请耐心等待视频生成...'):
srt_mv(output_file, font, font_size, font_color, subtitle_model)
msg.toast('正在合并视频,请耐心等待生成⚙️')
srt_mv(output_file, font, font_size, font_color, subtitle_model)

time6 = time.time()
st.toast("🎉🎉🎉")
st.session_state.srt_content = srt_content
st.session_state.output = output_file
st.session_state.current = current_time
st.session_state.time = time6 - time1
formatted_result = f"{st.session_state.time:.2f}"
msg.toast('运行成功!总用时:' + str(formatted_result) + "秒")
else:
st.warning("请先上传视频")
st.toast("请先上传视频")

with col2:
with st.expander("**视频预览**", expanded=True):
try:
video_bytes = show_video(st.session_state.output)
st.video(video_bytes)
formatted_result = f"{st.session_state.time:.2f}"
st.success(f"合并成功!总用时:{formatted_result}秒")

if st.button('查看文件目录', use_container_width=True):
os.startfile(st.session_state.output)
st.warning("注意:文件夹已成功打开,可能未置顶显示,请检查任务栏!")
st.toast("注意:文件夹已成功打开,可能未置顶显示,请检查任务栏!")
except:
with st.container(height=300):
st.write("")
st.success('''
**这里是视频预览窗口**
**运行后自动显示预览结果**
###### 详细步骤
1. **配置设置:** 在主页-设置中,选择适合您需求的识别模型和翻译引擎。
2. **上传文件:** 在侧栏的文件上传器中,上传您要转换的视频文件。
3. **调整参数:** 在页面左侧调整视频生成的相关参数,您也可以根据需要配置高级功能。
4. **生成视频:** 点击生成视频按钮,等待生成完成。
###### 以下可跳过
5. **字幕校对:** 生成完成后,您可以在下方查看字幕内容并进行二次校对。
6. **更多格式:** 按照需要,下载其他的字幕格式。
7. **再次生成:** 在下方设置重新参数后,再次生成视频。
**这里是视频预览窗口**
**运行后自动显示预览结果**
''')

st.write('''------''')
Expand Down
Loading

0 comments on commit 711b884

Please sign in to comment.