diff --git a/ConsoleWhisper/Model/Argument.cs b/ConsoleWhisper/Model/Argument.cs index 72b12af..d6b48c2 100644 --- a/ConsoleWhisper/Model/Argument.cs +++ b/ConsoleWhisper/Model/Argument.cs @@ -5,20 +5,28 @@ namespace ConsoleWhisper.Model { public class Argument { [Option('i', "input", Required = true, Hidden = false, Separator = ' ', HelpText = "Input media files.")] - [Value(3)] + [Value(4)] public IEnumerable Files { get; set; } [Option('m', "model", Required = false, Hidden = false, Default = "small", HelpText = "Whisper model: base, tiny, small, medium, large.")] [Value(0, Min = 0, Max = 1, Required = false)] public string ModelType { get; set; } - [Option('o', "output", Required = false, Hidden = false, HelpText = "Output directory.")] + [Option('o', "output", Required = false, Hidden = false, HelpText = "(Default: current directory) Output directory.")] [Value(1, Min = 0, Max = 1, Required = false)] public string OutputDir { get; set; } - [Option('g', "gpu", Required = false, Hidden = true, HelpText = "Currently not implemented.")] - [Value(2, Required = false, Default = false)] - public bool GPU { get; set; } + [Option('l', "language", Required = false, Hidden = false, Default = "auto", HelpText = "Specify transcribe language.")] + [Value(2, Required = false)] + public string Language { get; set; } + + [Option("only-extract", Required = false, Hidden = false, Default = false, HelpText = "Extract audio stream, without transcribing.")] + [Value(3, Required = false)] + public bool OnlyExtract { get; set; } + + //[Option('g', "gpu", Required = false, Hidden = true, HelpText = "Currently not implemented.")] + //[Value(2, Required = false, Default = false)] + //public bool GPU { get; set; } public Argument() { OutputDir = Environment.CurrentDirectory; @@ -27,10 +35,20 @@ public class Argument { public void Validate() { if (!SupportedModels.Contains(ModelType)) throw new ArgumentException(message: $"Whisper model type \"{ModelType}\" is not supported."); + if (!SupportedLanguages.Contains(Language)) + throw new ArgumentException(message: $"Language \"{Language}\" is not supported.\nCheck {LanguageLink} for available languages."); } - internal const int SupportedArgumentsCount = 5; + internal const int SupportedArgumentsCount = 7; private static readonly HashSet SupportedModels = new() { "base", "tiny", "small", "medium", "large" }; + private static readonly HashSet SupportedLanguages = new() { "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", + "pl", "ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "ro", "da", "hu", "ta", "no", "th", "ur", + "hr", "bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", + "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", + "gu", "am", "yi", "lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg", "as", "tt", "haw", "ln", + "ha", "ba", "jw", "su", "auto" }; + + private const string LanguageLink = "https://github.com/ggerganov/whisper.cpp/blob/57543c169e27312e7546d07ed0d8c6eb806ebc36/whisper.cpp#L121"; } } diff --git a/ConsoleWhisper/Module/AudioHelper.cs b/ConsoleWhisper/Module/AudioHelper.cs index 69a6324..7a8a176 100644 --- a/ConsoleWhisper/Module/AudioHelper.cs +++ b/ConsoleWhisper/Module/AudioHelper.cs @@ -1,15 +1,16 @@ using NAudio.Wave; using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Threading.Tasks; using Xabe.FFmpeg; namespace ConsoleWhisper.Module { public static class AudioHelper { - public static async Task Extract(string mediaFilename) { + public static async Task Extract(string outputDir, string mediaFilename, bool isOnlyExtract) { try { - var audioFilename = FileHelper.GetTempMp3File(); + var audioFilename = isOnlyExtract ? FileHelper.GetAudioPath(outputDir, mediaFilename) : FileHelper.GetTempMp3File(); var mediaInfo = await FFmpeg.GetMediaInfo(mediaFilename); @@ -20,10 +21,14 @@ public static class AudioHelper { await DoConversion(audioStream, audioFilename); - var resampledWaveFilename = Resample(audioFilename); - FileHelper.DelFile(audioFilename); + if (!isOnlyExtract) { + var resampledWaveFilename = Resample(audioFilename); + FileHelper.DelFile(audioFilename); - return resampledWaveFilename; + return resampledWaveFilename; + } + + return audioFilename; } catch (Exception) { throw; } @@ -36,6 +41,8 @@ public static class AudioHelper { .SetOutputFormat(Format.mp3) .SetOutput(audioFilename); + Output.Info($"Extracting audio to {Path.GetFileName(audioFilename)}"); + await conversion.Start(); } diff --git a/ConsoleWhisper/Module/FileHelper.cs b/ConsoleWhisper/Module/FileHelper.cs index 28d1616..4b40c67 100644 --- a/ConsoleWhisper/Module/FileHelper.cs +++ b/ConsoleWhisper/Module/FileHelper.cs @@ -35,6 +35,21 @@ internal static class FileHelper { File.Delete(filePath); } + internal static string GetTranscriptPath(string outputDir, string filename) { + var extension = Path.GetExtension(filename).TrimStart('.'); + var filenameWithoutExtension = Path.GetFileNameWithoutExtension(filename); + var transcriptName = Path.ChangeExtension($"{filenameWithoutExtension}-{extension}", SrtExtension); + return Path.Combine(outputDir, transcriptName); + } + + internal static string GetAudioPath(string outputDir, string filename) { + var extension = Path.GetExtension(filename).TrimStart('.'); + var filenameWithoutExtension = Path.GetFileNameWithoutExtension(filename); + var transcriptName = Path.ChangeExtension($"{filenameWithoutExtension}-{extension}", Mp3Extension); + return Path.Combine(outputDir, transcriptName); + } + + #region Get temp file name internal static string GetTempFile() { return Path.GetTempFileName(); } @@ -59,7 +74,9 @@ internal static class FileHelper { DelFile(tempFilename); return waveFilename; } + #endregion + #region Whisper model related operations internal static bool ModelExists(string modelFilename) { return File.Exists(Path.Combine(ModelDirectory, modelFilename)); } @@ -71,14 +88,9 @@ internal static class FileHelper { internal static string GetModelPath(string modelType) { return Path.Combine(ModelDirectory, modelType); } + #endregion - internal static string GetTranscriptPath(string outputDir, string filename) { - var extension = Path.GetExtension(filename).TrimStart('.'); - var filenameWithoutExtension = Path.GetFileNameWithoutExtension(filename); - var transcriptName = Path.ChangeExtension($"{filenameWithoutExtension}-{extension}", SrtExtension); - return Path.Combine(outputDir, transcriptName); - } - + #region Add Text to Filestream internal static async Task AddText(FileStream fs, string value) { byte[] info = encoder.GetBytes(value); await fs.WriteAsync(info); @@ -93,12 +105,15 @@ internal static class FileHelper { byte[] info = encoder.GetBytes(value.ToString("G")); await fs.WriteAsync(info); } + #endregion private static readonly UTF8Encoding encoder = new(true); + #region Extension string private const string WaveExtension = "wav"; private const string AacExtension = "aac"; private const string Mp3Extension = "mp3"; private const string SrtExtension = "srt"; + #endregion } } diff --git a/ConsoleWhisper/Module/WhisperHelper.cs b/ConsoleWhisper/Module/WhisperHelper.cs index 97abf7a..65fdf88 100644 --- a/ConsoleWhisper/Module/WhisperHelper.cs +++ b/ConsoleWhisper/Module/WhisperHelper.cs @@ -7,12 +7,12 @@ namespace ConsoleWhisper.Module { internal class WhisperHelper { - internal static async Task Transcribe(string modelType, string wavFilename, string mediaFilename, string outputDir) { + internal static async Task Transcribe(string modelType, string wavFilename, string mediaFilename, string outputDir, string language) { try { using var whisperFactory = WhisperFactory.FromPath(FileHelper.GetModelPath(modelType)); using var processor = whisperFactory.CreateBuilder() - .WithLanguage("auto") + .WithLanguage(language) .WithPrintProgress() .Build(); diff --git a/ConsoleWhisper/Program.cs b/ConsoleWhisper/Program.cs index 5dbf276..38148c2 100644 --- a/ConsoleWhisper/Program.cs +++ b/ConsoleWhisper/Program.cs @@ -48,10 +48,13 @@ internal class Program { int cnt = 1; foreach (var file in arg.Files) { var mediaFilename = Path.GetFileName(file); - var wavFilename = await AudioHelper.Extract(file); - Output.Info($"Start transcribing file #{cnt++}: {mediaFilename}"); - await WhisperHelper.Transcribe(arg.ModelType, wavFilename, mediaFilename, arg.OutputDir); - FileHelper.DelFile(wavFilename); + var wavFilename = await AudioHelper.Extract(arg.OutputDir, file, arg.OnlyExtract); + + if (!arg.OnlyExtract) { + Output.Info($"Start transcribing file #{cnt++}: {mediaFilename}"); + await WhisperHelper.Transcribe(arg.ModelType, wavFilename, mediaFilename, arg.OutputDir, arg.Language); + FileHelper.DelFile(wavFilename); + } }