diff --git a/README.md b/README.md index e8aad42..83c48d8 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,10 @@ The following command will generate a `subtitled/video.mp4` file contained the i faster_auto_subtitle /path/to/video.mp4 -o subtitled/ +You can also specify a folder with multiple videos, and it will process all of them: + + faster_auto_subtitle /path/to/videos/ -o subtitled/ + The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`. diff --git a/auto_subtitle/main.py b/auto_subtitle/main.py index 171dd27..2ac0763 100644 --- a/auto_subtitle/main.py +++ b/auto_subtitle/main.py @@ -4,7 +4,7 @@ from typing import Optional from .models.Subtitles import Subtitles, SegmentsIterable from .utils.files import filename, write_srt -from .utils.ffmpeg import get_audio, add_subtitles, preprocess_audio +from .utils.ffmpeg import get_audio, add_subtitles, preprocess_audio, file_has_audio from .utils.whisper import WhisperAI from .translation.easynmt_utils import EasyNMTWrapper @@ -39,7 +39,7 @@ def process(args: dict): "subtitle_type": args.pop("subtitle_type") } - videos = args.pop('video') + paths_to_process = args.pop('video') audio_channel = args.pop('audio_channel') model_args = { "model_size_or_path": model_name, @@ -51,16 +51,42 @@ def process(args: dict): device=model_args['device']) if target_language != 'en' else None os.makedirs(output_args["output_dir"], exist_ok=True) - for video in videos: - if video.endswith('.wav'): - audio = preprocess_audio(video, audio_channel, sample_interval) - else: - audio = get_audio(video, audio_channel, sample_interval) + for path_to_process in paths_to_process: + process_path(audio_channel, language, output_args, path_to_process, sample_interval, + target_language, transcribe_model, translate_model) - transcribed, translated = perform_task(video, audio, language, target_language, - transcribe_model, translate_model) - save_result(video, transcribed, translated, sample_interval, output_args) +def process_path(audio_channel, language, output_args, path_to_process, sample_interval, + target_language, transcribe_model, translate_model): + if not os.path.exists(path_to_process): + logger.error("File %s does not exist.", path_to_process) + return + + if not os.path.isdir(path_to_process): + process_file(audio_channel, language, output_args, sample_interval, target_language, + transcribe_model, translate_model, path_to_process) + return + + logger.info("Processing all files in directory %s", path_to_process) + for file_name in os.listdir(path_to_process): + process_file(audio_channel, language, output_args, sample_interval, target_language, + transcribe_model, translate_model, os.path.join(path_to_process, file_name)) + + +def process_file(audio_channel, language, output_args, sample_interval, target_language, + transcribe_model, translate_model, file_name): + if not file_has_audio(file_name): + logger.info("File %s has no audio, skipping.", file_name) + return + + if file_name.endswith('.wav'): + audio = preprocess_audio(file_name, audio_channel, sample_interval) + else: + audio = get_audio(file_name, audio_channel, sample_interval) + + transcribed, translated = perform_task(file_name, audio, language, target_language, + transcribe_model, translate_model) + save_result(file_name, transcribed, translated, sample_interval, output_args) def save_result(video: str, transcribed: Subtitles, translated: Subtitles, sample_interval: list, @@ -84,9 +110,7 @@ def perform_task(video: str, audio: str, language: str, target_language: str, transcribed = get_subtitles(video, audio, transcribe_model) translated = None - logger.info('Subtitles generated.') if target_language != 'en': - logger.info('Translating subtitles... This might take a while.') translated = translate_subtitles( transcribed, language, target_language, translate_model) @@ -99,8 +123,11 @@ def translate_subtitles(subtitles: Subtitles, source_lang: str, target_lang: str if src_lang == '' or src_lang is None: src_lang = subtitles.language + segments = list(subtitles.segments) + logger.info('Subtitles generated.') + logger.info('Translating subtitles... This might take a while.') translated_segments = model.translate( - list(subtitles.segments), src_lang, target_lang) + segments, src_lang, target_lang) return Subtitles(SegmentsIterable(translated_segments), target_lang) diff --git a/auto_subtitle/utils/ffmpeg.py b/auto_subtitle/utils/ffmpeg.py index c2f8893..7e4e6f2 100644 --- a/auto_subtitle/utils/ffmpeg.py +++ b/auto_subtitle/utils/ffmpeg.py @@ -1,8 +1,8 @@ import os import tempfile import logging -from typing import Optional import ffmpeg +from typing import Optional from .tempfile import SubtitlesTempFile from .files import filename from ..models.Subtitles import Subtitles @@ -39,6 +39,16 @@ def get_audio(path: str, audio_channel_index: int, sample_interval: Optional[lis return output_path +def file_has_audio(path: str) -> bool: + try: + audio_info = ffmpeg.probe(path, select_streams='a') + return 'streams' in audio_info \ + and audio_info['streams'] is not None \ + and len(audio_info['streams']) > 0 + except ffmpeg.Error: + return False + + def preprocess_audio(path: str, audio_channel_index: int, sample_interval: Optional[list]) -> str: if sample_interval is not None or audio_channel_index != 0: return get_audio(path, audio_channel_index, sample_interval) @@ -71,7 +81,7 @@ def add_subtitles(path: str, transcribed: Subtitles, translated: Optional[Subtit ffmpeg_output_args['t'] = str( sample_interval[1] - sample_interval[0]) - # HACK: On Windows it's impossible to use absolute subtitle file path with ffmpeg + # HACK: On Windows it's impossible to use absolute subtitle file path with ffmpeg, # so we use temp copy instead # see: https://github.com/kkroening/ffmpeg-python/issues/745 with SubtitlesTempFile(transcribed) as transcribed_tmp, SubtitlesTempFile(