Skip to content

Commit

Permalink
Only process the chunks that have active frames in them (vosk)
Browse files Browse the repository at this point in the history
  • Loading branch information
kercre123 committed May 8, 2024
1 parent bea3801 commit 84d8fb4
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 14 deletions.
2 changes: 2 additions & 0 deletions chipper/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ require (
github.com/akavel/rsrc v0.10.2 // indirect
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/alphacep/vosk-api/go v0.3.50 // indirect
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
github.com/currantlabs/ble v0.0.0-20171229162446-c1d21c164cf8 // indirect
github.com/dchest/jsmin v0.0.0-20220218165748-59f39799265f // indirect
Expand All @@ -49,6 +50,7 @@ require (
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/jamesruan/sodium v0.0.0-20181216154042-9620b83ffeae // indirect
github.com/josephspurrier/goversioninfo v1.4.0 // indirect
github.com/kercre123/vosk-api v1.0.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/magiconair/properties v1.8.1 // indirect
github.com/mattn/go-colorable v0.1.8 // indirect
Expand Down
4 changes: 4 additions & 0 deletions chipper/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5Vpd
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alphacep/vosk-api/go v0.3.50 h1:2vSN41RCU1WdHEqBrhKtTggfKL6Yu5Dmj+urVszwiuw=
github.com/alphacep/vosk-api/go v0.3.50/go.mod h1:9X8IJsHnFk/b1xyvjlZifo+ZL5VTAx3LW+JQce/eRcA=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
Expand Down Expand Up @@ -306,6 +308,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
github.com/kercre123/vosk-api v1.0.1 h1:D5CeAMNHPj87M9fKrqP+a2gEQefq7sJCpaiuRscbiJY=
github.com/kercre123/vosk-api v1.0.1/go.mod h1:mJlLhtYS207jVY9QffYGxhX6Up0UfSQ3p0uNbXsf3Zc=
github.com/kercre123/vosk-api/go v1.0.2 h1:NDJUNv2ddw128amiVZ2xE2gKfKHeBRRhboSh+yiH6Wg=
github.com/kercre123/vosk-api/go v1.0.2/go.mod h1:oVZG/VFmg23uNDzjShcw7UhZHWYG2zXgBm5FqioE2Ao=
github.com/kercre123/zeroconf v1.0.1 h1:Mbd8mN6xnNWYIqBN38x3jJjiPP2RmK4orzbGZsa1EOY=
Expand Down
2 changes: 1 addition & 1 deletion chipper/pkg/wirepod/preqs/stream_houndify.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func StreamAudioToHoundify(sreq sr.SpeechRequest, client houndify.Client) string
default:
var chunk []byte
chunk, err = sreq.GetNextStreamChunkOpus()
speechDone = sreq.DetectEndOfSpeech()
speechDone, _ = sreq.DetectEndOfSpeech()
if err != nil {
fmt.Println("End of stream")
return
Expand Down
11 changes: 7 additions & 4 deletions chipper/pkg/wirepod/speechrequest/speechrequest.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func BytesToIntVAD(stream opus.OggStream, data []byte, die bool, isOpus bool) []
}

// Uses VAD to detect when the user stops speaking
func (req *SpeechRequest) DetectEndOfSpeech() bool {
func (req *SpeechRequest) DetectEndOfSpeech() (bool, bool) {
// changes InactiveFrames and ActiveFrames in req
inactiveNumMax := 23
vad := req.VADInst
Expand All @@ -111,7 +111,7 @@ func (req *SpeechRequest) DetectEndOfSpeech() bool {
if err != nil {
logger.Println("VAD err:")
logger.Println(err)
return true
return true, false
}
if active {
req.ActiveFrames = req.ActiveFrames + 1
Expand All @@ -121,10 +121,13 @@ func (req *SpeechRequest) DetectEndOfSpeech() bool {
}
if req.InactiveFrames >= inactiveNumMax && req.ActiveFrames > 18 {
logger.Println("(Bot " + req.Device + ") End of speech detected.")
return true
return true, true
}
}
return false
if req.ActiveFrames < 5 {
return false, false
}
return false, true
}

// Converts a vtt.*Request to a SpeechRequest, which allows functions like DetectEndOfSpeech to work
Expand Down
2 changes: 1 addition & 1 deletion chipper/pkg/wirepod/stt/coqui/Coqui.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func STT(req sr.SpeechRequest) (string, error) {
return "", err
}
coquiStream.FeedAudioContent(sr.BytesToSamples(chunk))
speechIsDone = req.DetectEndOfSpeech()
speechIsDone, _ = req.DetectEndOfSpeech()
if speechIsDone {
break
}
Expand Down
2 changes: 1 addition & 1 deletion chipper/pkg/wirepod/stt/houndify/Houndify.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func STT(sreq sr.SpeechRequest) (string, error) {
default:
var chunk []byte
chunk, err = sreq.GetNextStreamChunkOpus()
speechDone = sreq.DetectEndOfSpeech()
speechDone, _ = sreq.DetectEndOfSpeech()
if err != nil {
fmt.Println("End of stream")
return
Expand Down
2 changes: 1 addition & 1 deletion chipper/pkg/wirepod/stt/leopard/Leopard.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func STT(req sr.SpeechRequest) (transcribedText string, err error) {
BotNumMu.Unlock()
return "", err
}
speechIsDone = req.DetectEndOfSpeech()
speechIsDone, _ = req.DetectEndOfSpeech()
if speechIsDone {
break
}
Expand Down
8 changes: 4 additions & 4 deletions chipper/pkg/wirepod/stt/vosk/Vosk.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ func getRec(withGrm bool) (*vosk.VoskRecognizer, int) {

func STT(req sr.SpeechRequest) (string, error) {
logger.Println("(Bot " + req.Device + ", Vosk) Processing...")
speechIsDone := false
var withGrm bool
if (vars.APIConfig.Knowledge.IntentGraph || req.IsKG) || !GrammerEnable {
logger.Println("Using general recognizer")
Expand All @@ -203,9 +202,10 @@ func STT(req sr.SpeechRequest) (string, error) {
if err != nil {
return "", err
}
rec.AcceptWaveform(chunk)
// has to be split into 320 []byte chunks for VAD
speechIsDone = req.DetectEndOfSpeech()
speechIsDone, doProcess := req.DetectEndOfSpeech()
if doProcess {
rec.AcceptWaveform(chunk)
}
if speechIsDone {
break
}
Expand Down
2 changes: 1 addition & 1 deletion chipper/pkg/wirepod/stt/whisper.cpp/WhisperCpp.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func STT(req sr.SpeechRequest) (string, error) {
return "", err
}
// has to be split into 320 []byte chunks for VAD
speechIsDone = req.DetectEndOfSpeech()
speechIsDone, _ = req.DetectEndOfSpeech()
if speechIsDone {
break
}
Expand Down
2 changes: 1 addition & 1 deletion chipper/pkg/wirepod/stt/whisper/Whisper.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func STT(req sr.SpeechRequest) (string, error) {
return "", err
}
// has to be split into 320 []byte chunks for VAD
speechIsDone = req.DetectEndOfSpeech()
speechIsDone, _ = req.DetectEndOfSpeech()
if speechIsDone {
break
}
Expand Down

0 comments on commit 84d8fb4

Please sign in to comment.