|
| 1 | +package whisper |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "os" |
| 6 | + "os/exec" |
| 7 | + |
| 8 | + "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" |
| 9 | + wav "github.com/go-audio/wav" |
| 10 | +) |
| 11 | + |
| 12 | +func sh(c string) (string, error) { |
| 13 | + cmd := exec.Command("/bin/sh", "-c", c) |
| 14 | + cmd.Env = os.Environ() |
| 15 | + o, err := cmd.CombinedOutput() |
| 16 | + return string(o), err |
| 17 | +} |
| 18 | + |
| 19 | +// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg |
| 20 | +// TODO: use https://github.com/mccoyst/ogg? |
| 21 | +func AudioToWav(src, dst string) error { |
| 22 | + out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst)) |
| 23 | + if err != nil { |
| 24 | + return fmt.Errorf("error: %w out: %s", err, out) |
| 25 | + } |
| 26 | + |
| 27 | + return nil |
| 28 | +} |
| 29 | + |
| 30 | +func Transcribe(modelpath, audiopath, language string) (string, error) { |
| 31 | + // Open samples |
| 32 | + fh, err := os.Open(audiopath) |
| 33 | + if err != nil { |
| 34 | + return "", err |
| 35 | + } |
| 36 | + defer fh.Close() |
| 37 | + |
| 38 | + // Read samples |
| 39 | + d := wav.NewDecoder(fh) |
| 40 | + buf, err := d.FullPCMBuffer() |
| 41 | + if err != nil { |
| 42 | + return "", err |
| 43 | + } |
| 44 | + |
| 45 | + data := buf.AsFloat32Buffer().Data |
| 46 | + |
| 47 | + // Load the model |
| 48 | + model, err := whisper.New(modelpath) |
| 49 | + if err != nil { |
| 50 | + return "", err |
| 51 | + } |
| 52 | + defer model.Close() |
| 53 | + |
| 54 | + // Process samples |
| 55 | + context, err := model.NewContext() |
| 56 | + if err != nil { |
| 57 | + return "", err |
| 58 | + |
| 59 | + } |
| 60 | + |
| 61 | + if language != "" { |
| 62 | + context.SetLanguage(language) |
| 63 | + } |
| 64 | + |
| 65 | + if err := context.Process(data, nil); err != nil { |
| 66 | + return "", err |
| 67 | + } |
| 68 | + |
| 69 | + text := "" |
| 70 | + for { |
| 71 | + segment, err := context.NextSegment() |
| 72 | + if err != nil { |
| 73 | + break |
| 74 | + } |
| 75 | + text += segment.Text |
| 76 | + } |
| 77 | + |
| 78 | + return text, nil |
| 79 | +} |
0 commit comments