-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathhoundify_client.go
271 lines (236 loc) · 7.72 KB
/
houndify_client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
package houndify
import (
"bufio"
"encoding/json"
"fmt"
"github.com/pkg/errors"
"io"
"io/ioutil"
"net/http"
"strconv"
"strings"
"sync"
"time"
)
const houndifyVoiceURL = "https://api.houndify.com:443/v1/audio"
const houndifyTextURL = "https://api.houndify.com:443/v1/text"
// Default user agent set by the SDK
const SDKUserAgent = "Go Houndify SDK"
type (
// A Client holds the configuration and state, which is used for
// sending all outgoing Houndify requests and appropriately saving their responses.
Client struct {
// The ClientID comes from the Houndify site.
ClientID string
// The ClientKey comes from the Houndify site.
// Keep the key secret.
ClientKey string
enableConversationState bool
conversationState interface{}
// If Verbose is true, all data sent from the server is printed to stdout, unformatted and unparsed.
// This includes partial transcripts, errors, HTTP headers details (status code, headers, etc.), and final response JSON.
Verbose bool
HttpClient *http.Client
RequestInfoInBody bool
}
// all of the Hound server JSON messages have these basic fields
houndServerMessage struct {
Format string `json:"Format"`
Version string `json:"FormatVersion"`
}
houndServerPartialTranscript struct {
houndServerMessage
PartialTranscript string `json:"PartialTranscript"`
DurationMS int64 `json:"DurationMS"`
Done bool `json:"Done"`
SafeToStopAudio *bool `json:"SafeToStopAudio"`
}
)
// EnableConversationState enables conversation state for future queries
func (c *Client) EnableConversationState() {
c.enableConversationState = true
}
// DisableConversationState disables conversation state for future queries
func (c *Client) DisableConversationState() {
c.enableConversationState = false
}
// ClearConversationState removes, or "forgets", the current conversation state
func (c *Client) ClearConversationState() {
var emptyConvState interface{}
c.conversationState = emptyConvState
}
// GetConversationState returns the current conversation state, useful for saving
func (c *Client) GetConversationState() interface{} {
return c.conversationState
}
// SetConversationState sets the conversation state, useful for resuming from a saved point
func (c *Client) SetConversationState(newState interface{}) {
c.conversationState = newState
}
// TextSearch sends a text request and returns the body of the Hound server response.
//
// An error is returned if there is a failure to create the request, failure to
// connect, failure to parse the response, or failure to update the conversation
// state (if applicable).
func (c *Client) TextSearch(textReq TextRequest) (string, error) {
req, err := BuildRequest(&textReq, *c)
// Add the TexRequest's context to the http request
if textReq.ctx != nil {
req = req.WithContext(textReq.ctx)
}
// Set the extra client headers
for k, v := range textReq.headers {
req.Header.Set(k, v)
}
if err != nil {
return "", err
}
if c.HttpClient == nil {
c.HttpClient = &http.Client{}
}
resp, err := c.HttpClient.Do(req)
if err != nil {
return "", errors.New("failed to successfully run request: " + err.Error())
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", errors.New("failed to read body: " + err.Error())
}
defer resp.Body.Close()
bodyStr := string(body)
if c.Verbose {
fmt.Println(resp.Proto, resp.StatusCode)
fmt.Println("Headers: ", resp.Header)
fmt.Println(bodyStr)
}
//don't try to parse out conversation state from a bad response
if resp.StatusCode >= 400 {
return bodyStr, errors.New("error response")
}
// update with new conversation state
if c.enableConversationState {
newConvState, err := parseConversationState(bodyStr)
if err != nil {
return bodyStr, errors.Wrap(err, "unable to parse new conversation state from response")
}
c.conversationState = newConvState
}
return bodyStr, nil
}
// VoiceSearch sends an audio request and returns the body of the Hound server response.
//
// The partialTranscriptChan parameter allows the caller to receive for PartialTranscripts
// while the Hound server is listening to the voice search. If partial transcripts are not
// needed, create a throwaway channel that listens and discards all the PartialTranscripts
// sent.
//
// An error is returned if there is a failure to create the request, failure to
// connect, failure to parse the response, or failure to update the conversation
// state (if applicable).
func (c *Client) VoiceSearch(voiceReq VoiceRequest, partialTranscriptChan chan PartialTranscript) (string, error) {
//so the partial transcript channel doesn't get closed before all transcripts are sent
partialChanWait := sync.WaitGroup{}
defer func() {
go func() {
//don't close the open partial transcript channel
partialChanWait.Wait()
close(partialTranscriptChan)
}()
}()
// Ensure that RequestInfoInBody isn't set for VoiceRequests because the Audio stream
// has to go into the body
c.RequestInfoInBody = false
req, err := BuildRequest(&voiceReq, *c)
if voiceReq.ctx != nil {
req = req.WithContext(voiceReq.ctx)
}
// Set the extra client headers
for k, v := range voiceReq.headers {
req.Header.Set(k, v)
}
if err != nil {
return "", err
}
req.Body = ioutil.NopCloser(voiceReq.AudioStream)
if c.HttpClient == nil {
c.HttpClient = &http.Client{}
}
// send the request
resp, err := c.HttpClient.Do(req)
if err != nil {
return "", errors.New("failed to successfully run request: " + err.Error())
}
if c.Verbose {
fmt.Println(resp.Proto, resp.StatusCode)
fmt.Println("Headers: ", resp.Header)
}
// partial transcript parsing
reader := bufio.NewReader(resp.Body)
var line string
for {
bytes, err := reader.ReadBytes('\n')
line = strings.TrimSpace(string(bytes))
if c.Verbose {
fmt.Println(line)
}
if err != nil {
if err != io.EOF {
fmt.Println(err)
return "", errors.New("error reading Houndify server response")
}
//EOF means this line must be the final response, done with partial transcripts
break
}
if line == "" {
continue
}
if _, convertErr := strconv.Atoi(line); convertErr == nil {
// this is an integer, so one of the ObjectByteCountPrefixes, skip it
continue
}
// attempt to parse incoming json into partial transcript
incoming := houndServerPartialTranscript{}
if err := json.Unmarshal([]byte(line), &incoming); err != nil {
fmt.Println("fail reading hound server message")
continue
}
if incoming.Format == "HoundVoiceQueryPartialTranscript" || incoming.Format == "SoundHoundVoiceSearchParialTranscript" {
// convert from houndify server's struct to SDK's simplified struct
partialDuration, err := time.ParseDuration(fmt.Sprintf("%d", incoming.DurationMS) + "ms")
if err != nil {
fmt.Println("failed reading the time in partial transcript")
continue
}
partialChanWait.Add(1)
go func() {
partialTranscriptChan <- PartialTranscript{
Message: incoming.PartialTranscript,
Duration: partialDuration,
Done: incoming.Done,
SafeToStopAudio: incoming.SafeToStopAudio,
}
partialChanWait.Done()
}()
continue
}
if incoming.Format == "SoundHoundVoiceSearchResult" {
//this line is the final response, done with partial transcripts
break
}
}
bodyStr := line
defer resp.Body.Close()
//don't try to parse out conversation state from a bad response
if resp.StatusCode >= 400 {
return bodyStr, errors.New("error response")
}
// update with new conversation state
if c.enableConversationState {
newConvState, err := parseConversationState(bodyStr)
if err != nil {
return bodyStr, errors.Wrap(err, "unable to parse new conversation state from response")
}
c.conversationState = newConvState
}
return bodyStr, nil
}