Skip to content

Commit bae05d6

Browse files
Dmitry Donskihhrosa
authored andcommitted
Merged in alexander_nikiforov/telscale-media-server-asr/asr_driver_fixes (pull request #42)
Asr driver fixes Approved-by: Henrique Rosa <[email protected]>
2 parents d8df6a9 + 0923ac3 commit bae05d6

File tree

9 files changed

+95
-41
lines changed

9 files changed

+95
-41
lines changed

bootstrap/src/main/config/mediaserver.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@
7979
<!--driver name="google-api" class="org.mobicents.media.drivers.asr.gcs.GoogleAsrDriverImpl">
8080
<parameter name="hertz">8000</parameter>
8181
<parameter name="responseTimeout">1000</parameter>
82+
<parameter name="interimResults">false</parameter>
83+
<parameter name="singleUtterance">false</parameter>
8284
</driver-->
8385
</subsystem>
8486
</subsystems>

controls/mgcp2/src/main/java/org/mobicents/media/control/mgcp/pkg/au/asr/AsrContext.java

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ public class AsrContext {
4949
private char lastTone;
5050
private int returnCode;
5151
private String lastRecognizedText;
52+
private String interimRecognizedText;
5253
private StringBuilder finalRecognizedText;
5354

5455
public AsrContext(Parameters params) {
@@ -177,13 +178,18 @@ protected void setReturnCode(int returnCode) {
177178
this.returnCode = returnCode;
178179
}
179180

180-
public void appendRecognizedText(final String recognizedText) {
181+
public void appendRecognizedText(final String recognizedText, final boolean isFinal) {
181182
lastRecognizedText = recognizedText;
182-
if (finalRecognizedText != null) {
183-
finalRecognizedText.append(" ");
184-
finalRecognizedText.append(recognizedText);
183+
if (isFinal) {
184+
if (finalRecognizedText != null) {
185+
finalRecognizedText.append(System.lineSeparator());
186+
finalRecognizedText.append(recognizedText);
187+
} else {
188+
finalRecognizedText = new StringBuilder(recognizedText);
189+
}
190+
interimRecognizedText = null;
185191
} else {
186-
finalRecognizedText = new StringBuilder(recognizedText);
192+
interimRecognizedText = recognizedText;
187193
}
188194
}
189195

@@ -192,7 +198,14 @@ public String getLastRecognizedText() {
192198
}
193199

194200
public String getFinalRecognizedText() {
195-
return finalRecognizedText.toString();
201+
StringBuilder result = finalRecognizedText != null ? new StringBuilder(finalRecognizedText.toString()) : new StringBuilder();
202+
if (interimRecognizedText != null) {
203+
if (result.length() > 0) {
204+
result.append(System.lineSeparator());
205+
}
206+
result.append(interimRecognizedText);
207+
}
208+
return result.toString();
196209
}
197210

198211
public static class Parameters {

controls/mgcp2/src/main/java/org/mobicents/media/control/mgcp/pkg/au/asr/AsrFsmImpl.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ public void enterCollecting(AsrState from, AsrState to, AsrEvent event,
243243

244244
@Override
245245
public void onTextRecognized(AsrState from, AsrState to, AsrEvent event, AsrContext context) {
246-
final OperationComplete operationComplete = new OperationComplete(AsrSignal.SYMBOL, ReturnCode.PARTIAL_SUCCESS.code());
246+
final OperationComplete operationComplete = new OperationComplete(AsrSignal.SYMBOL,
247+
ReturnCode.PARTIAL_SUCCESS.code());
247248
operationComplete.setParameter(ReturnParameters.ASR_RESULT.symbol(),
248249
new String(Hex.encodeHex(context.getLastRecognizedText().getBytes())));
249250
mgcpEventSubject.notify(mgcpEventSubject, operationComplete);
@@ -359,7 +360,7 @@ public void enterEvaluating(AsrState from, AsrState to, AsrEvent event, AsrConte
359360
// Pattern validation was successful
360361
fire(AsrEvent.SUCCEED, context);
361362
}
362-
} else if (!StringUtils.isEmpty(context.getLastRecognizedText())) {
363+
} else if (!StringUtils.isEmpty(context.getFinalRecognizedText())) {
363364
fire(AsrEvent.SUCCEED, context);
364365
} else {
365366
fire(AsrEvent.NO_RECOGNIZED_TEXT, context);
@@ -378,7 +379,7 @@ public void enterCanceled(AsrState from, AsrState to, AsrEvent event, AsrContext
378379
if (log.isTraceEnabled()) {
379380
log.trace("Entered CANCELED state");
380381
}
381-
if (!StringUtils.isEmpty(context.getLastRecognizedText())) {
382+
if (!StringUtils.isEmpty(context.getFinalRecognizedText())) {
382383
fire(AsrEvent.SUCCEED, context);
383384
} else {
384385
context.setReturnCode(ReturnCode.NO_SPEECH.code());
@@ -612,17 +613,19 @@ public void onSpeechDetected() {
612613

613614
private final class LocalAsrEngineListener implements AsrEngine.AsrEngineListener {
614615
@Override
615-
public void onSpeechRecognized(String text) {
616+
public void onSpeechRecognized(final String text, final boolean isFinal) {
616617
if (StringUtils.isEmpty(text)) {
617-
log.info("Recognized text is empty. Ignore it");
618+
if (log.isTraceEnabled()){
619+
log.trace("Recognized text is empty. Ignore it");
620+
}
618621
return;
619622
}
620623
if (log.isTraceEnabled()) {
621624
log.trace("onSpeechRecognized: " + text);
622625
}
623626
if (isStillCollecting()) {
624627
if (!context.isDigitsOnlyMode()) {
625-
context.appendRecognizedText(text);
628+
context.appendRecognizedText(text, isFinal);
626629
fire(AsrEvent.RECOGNIZED_TEXT, context);
627630
} else if (log.isTraceEnabled()) {
628631
log.trace("We are in DigitsOnly mode, so we ignore recognized text");

controls/mgcp2/src/test/java/org/mobicents/media/control/mgcp/pkg/au/asr/AsrCollectTest.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,9 @@ public void testCollectWithIncludeEndInputKey() throws InterruptedException, Dec
135135
asr.execute();
136136

137137
detectorListener.process(new DtmfEventImpl(detector, "1", -30)); // should be ignored
138-
asrEngineListener.onSpeechRecognized("one");
139-
asrEngineListener.onSpeechRecognized("two");
140-
asrEngineListener.onSpeechRecognized("three");
138+
asrEngineListener.onSpeechRecognized("one", true);
139+
asrEngineListener.onSpeechRecognized("two", true);
140+
asrEngineListener.onSpeechRecognized("three", true);
141141
detectorListener.process(new DtmfEventImpl(detector, "#", -30));
142142

143143
waitForResponse();
@@ -187,7 +187,7 @@ public void testCollectWithEndInputKeyAndResponseAfterIt() throws InterruptedExc
187187

188188
detectorListener.process(new DtmfEventImpl(detector, "1", -30)); // should be ignored
189189
detectorListener.process(new DtmfEventImpl(detector, "#", -30));
190-
asrEngineListener.onSpeechRecognized("text");
190+
asrEngineListener.onSpeechRecognized("text", true);
191191

192192
waitForResponse();
193193

@@ -259,9 +259,9 @@ public void testCollectWithDriverError() throws InterruptedException, DecoderExc
259259
verify(player, never()).activate();
260260
verify(observer, never()).onEvent(eq(asr), eventCaptor.capture());
261261

262-
asrEngineListener.onSpeechRecognized("text");
262+
asrEngineListener.onSpeechRecognized("text", true);
263263
asrEngineListener.onDriverError();
264-
asrEngineListener.onSpeechRecognized("text");
264+
asrEngineListener.onSpeechRecognized("text", true);
265265

266266
// then
267267
verify(detector, times(1)).activate();
@@ -322,7 +322,7 @@ public void testAsrCancelWithResult() throws InterruptedException, DecoderExcept
322322
asr.observe(observer);
323323
asr.execute();
324324

325-
asrEngineListener.onSpeechRecognized("text");
325+
asrEngineListener.onSpeechRecognized("text", true);
326326
asr.cancel();
327327

328328
// then
@@ -421,7 +421,7 @@ public void testMrtWithResult() throws InterruptedException, DecoderException {
421421
asr.observe(observer);
422422
asr.execute();
423423

424-
asrEngineListener.onSpeechRecognized("text");
424+
asrEngineListener.onSpeechRecognized("text", true);
425425
asr.getInputTimeoutDetectorListener().onMaximumRecognitionTime();
426426

427427
waitForResponse();
@@ -460,7 +460,7 @@ public void testPstWithResult() throws InterruptedException, DecoderException {
460460
asr.observe(observer);
461461
asr.execute();
462462

463-
asrEngineListener.onSpeechRecognized("text");
463+
asrEngineListener.onSpeechRecognized("text", true);
464464
asr.getInputTimeoutDetectorListener().onPostSpeechTimer();
465465

466466
waitForResponse();

controls/mgcp2/src/test/java/org/mobicents/media/control/mgcp/pkg/au/asr/DtmfSpeechCollectTest.java

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/*
2+
* TeleStax, Open Source Cloud Communications
3+
* Copyright 2011-2017, Telestax Inc and individual contributors
4+
* by the @authors tag.
5+
*
6+
* This is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU Lesser General Public License as
8+
* published by the Free Software Foundation; either version 2.1 of
9+
* the License, or (at your option) any later version.
10+
*
11+
* This software is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public
17+
* License along with this software; if not, write to the Free
18+
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
19+
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
20+
*/
21+
122
package org.mobicents.media.control.mgcp.pkg.au.asr;
223

324
import com.google.common.util.concurrent.ListeningScheduledExecutorService;
@@ -121,7 +142,7 @@ public void testCollectSpeechWithEndInputKey() throws InterruptedException, Deco
121142
asr.observe(observer);
122143
asr.execute();
123144

124-
asrEngineListener.onSpeechRecognized("text");
145+
asrEngineListener.onSpeechRecognized("text", true);
125146
detectorListener.process(new DtmfEventImpl(detector, "#", -30));
126147

127148
waitForResponse();
@@ -164,7 +185,7 @@ public void testCollectWithEndInputKeyAndResponseAfterIt() throws InterruptedExc
164185
verify(observer, never()).onEvent(eq(asr), eventCaptor.capture());
165186

166187
detectorListener.process(new DtmfEventImpl(detector, "#", -30));
167-
asrEngineListener.onSpeechRecognized("text");
188+
asrEngineListener.onSpeechRecognized("text", true);
168189

169190
waitForResponse();
170191

@@ -230,7 +251,7 @@ public void testAsrCancelWithResult() throws InterruptedException, DecoderExcept
230251
asr.observe(observer);
231252
asr.execute();
232253

233-
asrEngineListener.onSpeechRecognized("text");
254+
asrEngineListener.onSpeechRecognized("text", true);
234255
asr.cancel();
235256

236257
// then
@@ -328,7 +349,7 @@ public void testMrtWithResult() throws InterruptedException, DecoderException {
328349
asr.observe(observer);
329350
asr.execute();
330351

331-
asrEngineListener.onSpeechRecognized("text");
352+
asrEngineListener.onSpeechRecognized("text", true);
332353

333354
asr.getInputTimeoutDetectorListener().onMaximumRecognitionTime();
334355

@@ -366,7 +387,7 @@ public void testPstWithResult() throws InterruptedException, DecoderException {
366387
asr.observe(observer);
367388
asr.execute();
368389

369-
asrEngineListener.onSpeechRecognized("text");
390+
asrEngineListener.onSpeechRecognized("text", true);
370391
asr.getInputTimeoutDetectorListener().onPostSpeechTimer();
371392

372393
waitForResponse();
@@ -407,7 +428,7 @@ public void testCollectDigitsWithEndInputKey() throws InterruptedException, Deco
407428
asr.observe(observer);
408429
asr.execute();
409430

410-
asrEngineListener.onSpeechRecognized("text");
431+
asrEngineListener.onSpeechRecognized("text", true);
411432

412433
log.info("We are sending '1'");
413434
detectorListener.process(new DtmfEventImpl(detector, "1", -30));
@@ -454,13 +475,13 @@ public void testCollectSpeechAfterDigitsCollectingStarted() throws InterruptedEx
454475
asr.observe(observer);
455476
asr.execute();
456477

457-
asrEngineListener.onSpeechRecognized("first text");
478+
asrEngineListener.onSpeechRecognized("first text", true);
458479

459480
log.info("We are sending '1'");
460481
detectorListener.process(new DtmfEventImpl(detector, "1", -30));
461482
log.info("We sent '1'");
462-
asrEngineListener.onSpeechRecognized("second text"); // will be ignored
463-
asrEngineListener.onSpeechRecognized("third text"); //will be ignored
483+
asrEngineListener.onSpeechRecognized("second text", true); // will be ignored
484+
asrEngineListener.onSpeechRecognized("third text", true); //will be ignored
464485

465486
waitForResponse();
466487

drivers/asr-driver/src/main/java/org/mobicents/media/drivers/asr/gcs/GoogleAsrDriverImpl.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.mobicents.media.resource.asr.api.AsrException;
3232

3333
import java.io.File;
34-
import java.io.IOException;
3534
import java.util.ArrayList;
3635
import java.util.List;
3736
import java.util.Map;
@@ -45,7 +44,12 @@ public class GoogleAsrDriverImpl implements AsrDriver {
4544
private static final int DEFAULT_HERTZ = 8000;
4645
private static final String HERTZ_PROPERTY = "hertz";
4746
private static final int DEFAULT_RESPONSE_TIMEOUT_IN_MILLISECONDS = 1000;
47+
private static final boolean DEFAULT_INTERIM_RESULTS_VALUE= false;
48+
private static final boolean DEFAULT_UTTERANCE_VALUE = false;
4849
private static final String RESPONSE_TIMEOUT_PROPERTY = "responseTimeout";
50+
private static final String INTERIM_RESULTS_PROPERTY = "interimResults";
51+
private static final String SINGLE_UTTERANCE_PROPERTY = "singleUtterance";
52+
4953

5054

5155
private Logger logger = Logger.getLogger(GoogleAsrDriverImpl.class);
@@ -58,6 +62,8 @@ public class GoogleAsrDriverImpl implements AsrDriver {
5862

5963
private int hertz = DEFAULT_HERTZ;
6064
private int responseTimeoutInMilliseconds = DEFAULT_RESPONSE_TIMEOUT_IN_MILLISECONDS;
65+
private boolean interimResults = DEFAULT_INTERIM_RESULTS_VALUE;
66+
private boolean singleUtterance = DEFAULT_UTTERANCE_VALUE;
6167

6268
@Override
6369
public void configure(Map<String, String> map) {
@@ -89,6 +95,12 @@ public void configure(Map<String, String> map) {
8995
logger.error("responseTimeout property contains not number chars");
9096
}
9197
}
98+
if (map.containsKey(INTERIM_RESULTS_PROPERTY)){
99+
interimResults = Boolean.parseBoolean(map.get(INTERIM_RESULTS_PROPERTY));
100+
}
101+
if (map.containsKey(SINGLE_UTTERANCE_PROPERTY)){
102+
singleUtterance = Boolean.parseBoolean(map.get(SINGLE_UTTERANCE_PROPERTY));
103+
}
92104
}
93105

94106
@Override
@@ -203,9 +215,13 @@ private StreamingRecognitionConfig setConfig(String language, List<String> hints
203215
.addSpeechContexts(SpeechContext.newBuilder().addAllPhrases(hints).build())
204216
.setMaxAlternatives(1)
205217
.build();
218+
206219
return StreamingRecognitionConfig.newBuilder()
207220
.setConfig(recConfig)
221+
.setInterimResults(interimResults)
222+
.setSingleUtterance(singleUtterance)
208223
.build();
224+
209225
} catch (Exception e) {
210226
logger.error("setConfig error", e);
211227
return null;
@@ -240,12 +256,11 @@ public void onNext(T message) {
240256
if (!results.isEmpty()) {
241257
StreamingRecognitionResult result = results.get(0);
242258
SpeechRecognitionAlternative alternatives = result.getAlternatives(0);
243-
244259
if (logger.isTraceEnabled()) {
245260
logger.trace("Got recognition result from google: " + alternatives.getTranscript());
246261
}
247262
if (listener != null) {
248-
listener.onSpeechRecognized(alternatives.getTranscript());
263+
listener.onSpeechRecognized(alternatives.getTranscript(), result.getIsFinal());
249264
}
250265
} else {
251266
if (logger.isTraceEnabled()) {

resources/asr-api/src/main/java/org/mobicents/media/resource/asr/api/AsrDriver.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public interface AsrDriver {
4242
int getResponseTimeoutInMilliseconds();
4343

4444
interface AsrDriverEventListener {
45-
void onSpeechRecognized(String text);
45+
void onSpeechRecognized(String text, boolean isFinal);
4646
void onError(final AsrException error);
4747
}
48-
}
48+
}

resources/asr/src/main/java/org/mobicents/media/resource/asr/AsrEngine.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public UnknownAsrDriverException(final String driverName) {
4343
void finishRecognizing();
4444

4545
interface AsrEngineListener {
46-
void onSpeechRecognized(String text);
46+
void onSpeechRecognized(String text, boolean isFinal);
4747
void onDriverError();
4848
}
4949
}

resources/asr/src/main/java/org/mobicents/media/resource/asr/AsrEngineImpl.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ public AudioOutput getAudioOutput() {
164164
return output;
165165
}
166166

167-
private void fireSpeechRecognizedEvent(final String text) {
167+
private void fireSpeechRecognizedEvent(final String text, final boolean isFinal) {
168168
scheduler.submit(new Task() {
169169
@Override
170170
public int getQueueNumber() {
@@ -174,7 +174,7 @@ public int getQueueNumber() {
174174
@Override
175175
public long perform() {
176176
if (AsrEngineImpl.this.listener != null) {
177-
AsrEngineImpl.this.listener.onSpeechRecognized(text);
177+
AsrEngineImpl.this.listener.onSpeechRecognized(text, isFinal);
178178
}
179179
return 0;
180180
}
@@ -200,11 +200,11 @@ public long perform() {
200200

201201
private AsrDriver.AsrDriverEventListener driverEventListener = new AsrDriver.AsrDriverEventListener() {
202202
@Override
203-
public void onSpeechRecognized(final String text) {
203+
public void onSpeechRecognized(final String text, final boolean isFinal) {
204204
if (logger.isTraceEnabled()) {
205-
logger.trace("ASR driver recognized text: " + text);
205+
logger.trace("ASR driver recognized text: \'" + text + "\', isFinal=" + isFinal);
206206
}
207-
fireSpeechRecognizedEvent(text);
207+
fireSpeechRecognizedEvent(text, isFinal);
208208
}
209209

210210
@Override

0 commit comments

Comments
 (0)