Skip to content

Commit

Permalink
Add arguments
Browse files Browse the repository at this point in the history
- language: specify transcribe language
- only extract: only extract audio
  • Loading branch information
azhuge233 committed Jun 22, 2023
1 parent 7375c34 commit 5244505
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 24 deletions.
30 changes: 24 additions & 6 deletions ConsoleWhisper/Model/Argument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,28 @@
namespace ConsoleWhisper.Model {
public class Argument {
[Option('i', "input", Required = true, Hidden = false, Separator = ' ', HelpText = "Input media files.")]
[Value(3)]
[Value(4)]
public IEnumerable<string> Files { get; set; }

[Option('m', "model", Required = false, Hidden = false, Default = "small", HelpText = "Whisper model: base, tiny, small, medium, large.")]
[Value(0, Min = 0, Max = 1, Required = false)]
public string ModelType { get; set; }

[Option('o', "output", Required = false, Hidden = false, HelpText = "Output directory.")]
[Option('o', "output", Required = false, Hidden = false, HelpText = "(Default: current directory) Output directory.")]
[Value(1, Min = 0, Max = 1, Required = false)]
public string OutputDir { get; set; }

[Option('g', "gpu", Required = false, Hidden = true, HelpText = "Currently not implemented.")]
[Value(2, Required = false, Default = false)]
public bool GPU { get; set; }
[Option('l', "language", Required = false, Hidden = false, Default = "auto", HelpText = "Specify transcribe language.")]
[Value(2, Required = false)]
public string Language { get; set; }

[Option("only-extract", Required = false, Hidden = false, Default = false, HelpText = "Extract audio stream, without transcribing.")]
[Value(3, Required = false)]
public bool OnlyExtract { get; set; }

//[Option('g', "gpu", Required = false, Hidden = true, HelpText = "Currently not implemented.")]
//[Value(2, Required = false, Default = false)]
//public bool GPU { get; set; }

public Argument() {
OutputDir = Environment.CurrentDirectory;
Expand All @@ -27,10 +35,20 @@ public class Argument {
public void Validate() {
if (!SupportedModels.Contains(ModelType))
throw new ArgumentException(message: $"Whisper model type \"{ModelType}\" is not supported.");
if (!SupportedLanguages.Contains(Language))
throw new ArgumentException(message: $"Language \"{Language}\" is not supported.\nCheck {LanguageLink} for available languages.");
}

internal const int SupportedArgumentsCount = 5;
internal const int SupportedArgumentsCount = 7;

private static readonly HashSet<string> SupportedModels = new() { "base", "tiny", "small", "medium", "large" };
private static readonly HashSet<string> SupportedLanguages = new() { "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr",
"pl", "ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "ro", "da", "hu", "ta", "no", "th", "ur",
"hr", "bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is",
"hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd",
"gu", "am", "yi", "lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg", "as", "tt", "haw", "ln",
"ha", "ba", "jw", "su", "auto" };

private const string LanguageLink = "https://github.com/ggerganov/whisper.cpp/blob/57543c169e27312e7546d07ed0d8c6eb806ebc36/whisper.cpp#L121";
}
}
17 changes: 12 additions & 5 deletions ConsoleWhisper/Module/AudioHelper.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
using NAudio.Wave;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Xabe.FFmpeg;

namespace ConsoleWhisper.Module {
public static class AudioHelper {
public static async Task<string> Extract(string mediaFilename) {
public static async Task<string> Extract(string outputDir, string mediaFilename, bool isOnlyExtract) {
try {
var audioFilename = FileHelper.GetTempMp3File();
var audioFilename = isOnlyExtract ? FileHelper.GetAudioPath(outputDir, mediaFilename) : FileHelper.GetTempMp3File();

var mediaInfo = await FFmpeg.GetMediaInfo(mediaFilename);

Expand All @@ -20,10 +21,14 @@ public static class AudioHelper {

await DoConversion(audioStream, audioFilename);

var resampledWaveFilename = Resample(audioFilename);
FileHelper.DelFile(audioFilename);
if (!isOnlyExtract) {
var resampledWaveFilename = Resample(audioFilename);
FileHelper.DelFile(audioFilename);

return resampledWaveFilename;
return resampledWaveFilename;
}

return audioFilename;
} catch (Exception) {
throw;
}
Expand All @@ -36,6 +41,8 @@ public static class AudioHelper {
.SetOutputFormat(Format.mp3)
.SetOutput(audioFilename);

Output.Info($"Extracting audio to {Path.GetFileName(audioFilename)}");

await conversion.Start();
}

Expand Down
29 changes: 22 additions & 7 deletions ConsoleWhisper/Module/FileHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ internal static class FileHelper {
File.Delete(filePath);
}

internal static string GetTranscriptPath(string outputDir, string filename) {
var extension = Path.GetExtension(filename).TrimStart('.');
var filenameWithoutExtension = Path.GetFileNameWithoutExtension(filename);
var transcriptName = Path.ChangeExtension($"{filenameWithoutExtension}-{extension}", SrtExtension);
return Path.Combine(outputDir, transcriptName);
}

internal static string GetAudioPath(string outputDir, string filename) {
var extension = Path.GetExtension(filename).TrimStart('.');
var filenameWithoutExtension = Path.GetFileNameWithoutExtension(filename);
var transcriptName = Path.ChangeExtension($"{filenameWithoutExtension}-{extension}", Mp3Extension);
return Path.Combine(outputDir, transcriptName);
}

#region Get temp file name
internal static string GetTempFile() {
return Path.GetTempFileName();
}
Expand All @@ -59,7 +74,9 @@ internal static class FileHelper {
DelFile(tempFilename);
return waveFilename;
}
#endregion

#region Whisper model related operations
internal static bool ModelExists(string modelFilename) {
return File.Exists(Path.Combine(ModelDirectory, modelFilename));
}
Expand All @@ -71,14 +88,9 @@ internal static class FileHelper {
internal static string GetModelPath(string modelType) {
return Path.Combine(ModelDirectory, modelType);
}
#endregion

internal static string GetTranscriptPath(string outputDir, string filename) {
var extension = Path.GetExtension(filename).TrimStart('.');
var filenameWithoutExtension = Path.GetFileNameWithoutExtension(filename);
var transcriptName = Path.ChangeExtension($"{filenameWithoutExtension}-{extension}", SrtExtension);
return Path.Combine(outputDir, transcriptName);
}

#region Add Text to Filestream
internal static async Task AddText(FileStream fs, string value) {
byte[] info = encoder.GetBytes(value);
await fs.WriteAsync(info);
Expand All @@ -93,12 +105,15 @@ internal static class FileHelper {
byte[] info = encoder.GetBytes(value.ToString("G"));
await fs.WriteAsync(info);
}
#endregion

private static readonly UTF8Encoding encoder = new(true);

#region Extension string
private const string WaveExtension = "wav";
private const string AacExtension = "aac";
private const string Mp3Extension = "mp3";
private const string SrtExtension = "srt";
#endregion
}
}
4 changes: 2 additions & 2 deletions ConsoleWhisper/Module/WhisperHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

namespace ConsoleWhisper.Module {
internal class WhisperHelper {
internal static async Task Transcribe(string modelType, string wavFilename, string mediaFilename, string outputDir) {
internal static async Task Transcribe(string modelType, string wavFilename, string mediaFilename, string outputDir, string language) {
try {
using var whisperFactory = WhisperFactory.FromPath(FileHelper.GetModelPath(modelType));

using var processor = whisperFactory.CreateBuilder()
.WithLanguage("auto")
.WithLanguage(language)
.WithPrintProgress()
.Build();

Expand Down
11 changes: 7 additions & 4 deletions ConsoleWhisper/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@ internal class Program {
int cnt = 1;
foreach (var file in arg.Files) {
var mediaFilename = Path.GetFileName(file);
var wavFilename = await AudioHelper.Extract(file);
Output.Info($"Start transcribing file #{cnt++}: {mediaFilename}");
await WhisperHelper.Transcribe(arg.ModelType, wavFilename, mediaFilename, arg.OutputDir);
FileHelper.DelFile(wavFilename);
var wavFilename = await AudioHelper.Extract(arg.OutputDir, file, arg.OnlyExtract);

if (!arg.OnlyExtract) {
Output.Info($"Start transcribing file #{cnt++}: {mediaFilename}");
await WhisperHelper.Transcribe(arg.ModelType, wavFilename, mediaFilename, arg.OutputDir, arg.Language);
FileHelper.DelFile(wavFilename);
}
}


Expand Down

0 comments on commit 5244505

Please sign in to comment.