-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech-to-text.js
124 lines (104 loc) · 5.29 KB
/
speech-to-text.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// University of Illinois/NCSA
// Open Source License
// http://otm.illinois.edu/disclose-protect/illinois-open-source-license
// Copyright (c) 2020 Grainger Engineering Library Information Center. All rights reserved.
// Developed by: IDEA Lab
// Grainger Engineering Library Information Center - University of Illinois Urbana-Champaign
// https://library.illinois.edu/enx
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal with
// the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to
// do so, subject to the following conditions:
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimers in the documentation
// and/or other materials provided with the distribution.
// * Neither the names of IDEA Lab, Grainger Engineering Library Information Center,
// nor the names of its contributors may be used to endorse or promote products
// derived from this Software without specific prior written permission.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
// SOFTWARE.
/* jshint esversion: 6 */
// speech-to-text
// docs: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-to-text
const config = require('./config');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const subscriptionKey = config.azure.subscriptionKey;
const serviceRegion = config.azure.serviceRegion; // e.g., "westus"
// used to convert raw audio pcm data into format ready for speech-to-text
function convertFloat32ToInt16(buffer) {
l = buffer.length;
buf = new Int16Array(l);
while (l--) {
buf[l] = Math.min(1, buffer[l])*0x7FFF;
}
return buf;
}
module.exports = {
processSpeech: function (audioBuffer, session_id, client_id, client_name, logger) {
// create the push stream we need for the speech sdk.
var pushStream = sdk.AudioInputStream.createPushStream();
// open the file and push it to the push stream.
pushStream.write(audioBuffer);
pushStream.close();
// now create the audio-config pointing to our stream and
// the speech config specifying the language.
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
// setting the recognition language to English.
speechConfig.speechRecognitionLanguage = "en-US";
// create the speech recognizer.
var recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
// start the recognizer and wait for a result.
recognizer.recognizeOnceAsync(
function (result) {
if (result.privText) {
io.of('chat').to(session_id.toString()).emit('micText', {
ts: Date.now(),
session_id: session_id,
client_id: client_id,
client_name: client_name,
text: result.privText,
type: "speech-to-text"
});
let session = sessions.get(session_id);
if (session) {
if (session.isRecording) {
let sttObj = {
ts: Date.now(),
session_id: session_id,
client_id: client_id,
text: result.privText
}
let path = getCapturePath(session_id, session.recordingStart, 'stt');
let wstream = fs.createWriteStream(path, { flags: 'a' })
wstream.write(JSON.stringify(sttObj)+'\n');
wstream.close();
}
}
}
try {
recognizer.close();
} catch (error) {
if (logger) logger.error(`Error closing SpeechRecognizer: ${error}`);
}
},
function(err) {
if (logger) logger.error(`Error recognizing speech-to-text: ${err}`);
try {
recognizer.close();
} catch (error) {
if (logger) logger.error(`Error closing SpeechRecognizer: ${error}`);
}
}
);
}
}