diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSharedSpeechToTextImplementation.macios.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSharedSpeechToTextImplementation.macios.cs index 3e584d093e..dc225afc16 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSharedSpeechToTextImplementation.macios.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSharedSpeechToTextImplementation.macios.cs @@ -1,11 +1,13 @@ using AVFoundation; +using Microsoft.Maui.Dispatching; using Speech; namespace CommunityToolkit.Maui.Media; public sealed partial class OfflineSpeechToTextImplementation { - AVAudioEngine? audioEngine; + readonly AVAudioEngine audioEngine = new(); + IDispatcherTimer? silenceTimer; SFSpeechRecognizer? speechRecognizer; SFSpeechRecognitionTask? recognitionTask; SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest; @@ -19,12 +21,11 @@ public sealed partial class OfflineSpeechToTextImplementation /// public ValueTask DisposeAsync() { - audioEngine?.Dispose(); + audioEngine.Dispose(); speechRecognizer?.Dispose(); liveSpeechRequest?.Dispose(); recognitionTask?.Dispose(); - audioEngine = null; speechRecognizer = null; liveSpeechRequest = null; recognitionTask = null; @@ -41,12 +42,6 @@ public Task RequestPermissions(CancellationToken cancellationToken = defau return taskResult.Task.WaitAsync(cancellationToken); } - static Task IsSpeechPermissionAuthorized(CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - return Task.FromResult(SFSpeechRecognizer.AuthorizationStatus is SFSpeechRecognizerAuthorizationStatus.Authorized); - } - static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession) { sharedAvAudioSession = AVAudioSession.SharedInstance(); @@ -62,10 +57,77 @@ static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession) void InternalStopListening() { - audioEngine?.InputNode.RemoveTapOnBus(0); - audioEngine?.Stop(); + silenceTimer?.Tick -= OnSilenceTimerTick; + silenceTimer?.Stop(); liveSpeechRequest?.EndAudio(); - recognitionTask?.Cancel(); + recognitionTask?.Finish(); + audioEngine.Stop(); + audioEngine.InputNode.RemoveTapOnBus(0); + OnSpeechToTextStateChanged(CurrentState); + + recognitionTask?.Dispose(); + speechRecognizer?.Dispose(); + liveSpeechRequest?.Dispose(); + + speechRecognizer = null; + liveSpeechRequest = null; + recognitionTask = null; + } + + void OnSilenceTimerTick(object? sender, EventArgs e) + { + InternalStopListening(); + } + + SFSpeechRecognitionTask CreateSpeechRecognizerTask(SFSpeechRecognizer sfSpeechRecognizer, SFSpeechAudioBufferRecognitionRequest sfSpeechAudioBufferRecognitionRequest) + { + int currentIndex = 0; + return sfSpeechRecognizer.GetRecognitionTask(sfSpeechAudioBufferRecognitionRequest, (result, err) => + { + if (err is not null) + { + currentIndex = 0; + InternalStopListening(); + OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription))); + } + else + { + if (result.Final) + { + currentIndex = 0; + InternalStopListening(); + OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString)); + } + else + { + RestartTimer(); + if (currentIndex <= 0) + { + OnSpeechToTextStateChanged(CurrentState); + } + + currentIndex++; + OnRecognitionResultUpdated(result.BestTranscription.FormattedString); + } + } + }); + } + + void InitSilenceTimer(SpeechToTextOptions options) + { + if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero) + { + silenceTimer = Dispatcher.GetForCurrentThread()?.CreateTimer(); + silenceTimer?.Tick += OnSilenceTimerTick; + silenceTimer?.Interval = options.AutoStopSilenceTimeout; + silenceTimer?.Start(); + } + } + + void RestartTimer() + { + silenceTimer?.Stop(); + silenceTimer?.Start(); } } \ No newline at end of file diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.android.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.android.cs index 1f4050564f..4f2db19edd 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.android.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.android.cs @@ -53,7 +53,12 @@ static Intent CreateSpeechIntent(SpeechToTextOptions options) intent.PutExtra(RecognizerIntent.ExtraLanguage, javaLocale); intent.PutExtra(RecognizerIntent.ExtraLanguagePreference, javaLocale); intent.PutExtra(RecognizerIntent.ExtraOnlyReturnLanguagePreference, javaLocale); - + if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero) + { + intent.PutExtra(RecognizerIntent.ExtraSpeechInputCompleteSilenceLengthMillis, (long)options.AutoStopSilenceTimeout.TotalMilliseconds); + intent.PutExtra(RecognizerIntent.ExtraSpeechInputPossiblyCompleteSilenceLengthMillis, (long)options.AutoStopSilenceTimeout.TotalMilliseconds); + } + return intent; } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.ios.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.ios.cs index 3949cacfca..9095608e80 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.ios.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.ios.cs @@ -9,7 +9,7 @@ namespace CommunityToolkit.Maui.Media; /// public sealed partial class OfflineSpeechToTextImplementation { - [MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))] + [MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))] [SupportedOSPlatform("ios13.0")] [SupportedOSPlatform("maccatalyst")] Task InternalStartListening(SpeechToTextOptions options, CancellationToken token = default) @@ -27,7 +27,6 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token throw new ArgumentException("Speech recognizer is not available"); } - audioEngine = new AVAudioEngine(); liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest() { ShouldReportPartialResults = options.ShouldReportPartialResults, @@ -48,39 +47,9 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token throw new ArgumentException("Error starting audio engine - " + error.LocalizedDescription); } - var currentIndex = 0; - recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) => - { - if (err is not null) - { - InternalStopListening(); - OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription))); - } - else - { - if (result.Final) - { - currentIndex = 0; - InternalStopListening(); - OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString)); - } - else - { - if (currentIndex <= 0) - { - OnSpeechToTextStateChanged(CurrentState); - } - - for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++) - { - var s = result.BestTranscription.Segments[i].Substring; - currentIndex++; - OnRecognitionResultUpdated(s); - } - } - } - }); - + InitSilenceTimer(options); + recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest); + return Task.CompletedTask; } } \ No newline at end of file diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.macos.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.macos.cs index 89366ae868..eb0908f79e 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.macos.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.macos.cs @@ -1,5 +1,4 @@ using System.Diagnostics.CodeAnalysis; -using System.Globalization; using AVFoundation; using Speech; @@ -8,7 +7,7 @@ namespace CommunityToolkit.Maui.Media; /// public sealed partial class OfflineSpeechToTextImplementation { - [MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))] + [MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))] Task InternalStartListening(SpeechToTextOptions options, CancellationToken token = default) { speechRecognizer = new SFSpeechRecognizer(NSLocale.FromLocaleIdentifier(options.Culture.Name)); @@ -19,10 +18,6 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token throw new ArgumentException("Speech recognizer is not available"); } - audioEngine = new AVAudioEngine - { - AutoShutdownEnabled = false - }; liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest() { ShouldReportPartialResults = options.ShouldReportPartialResults, @@ -59,38 +54,8 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token throw new Exception(error.LocalizedDescription); } - var currentIndex = 0; - recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) => - { - if (err is not null) - { - InternalStopListening(); - OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription))); - } - else - { - if (result.Final) - { - currentIndex = 0; - InternalStopListening(); - OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString)); - } - else - { - if (currentIndex <= 0) - { - OnSpeechToTextStateChanged(CurrentState); - } - - for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++) - { - var s = result.BestTranscription.Segments[i].Substring; - currentIndex++; - OnRecognitionResultUpdated(s); - } - } - } - }); + InitSilenceTimer(options); + recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest); return Task.CompletedTask; } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.shared.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.shared.cs index e28d506e41..36427add17 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.shared.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.shared.cs @@ -35,6 +35,11 @@ public event EventHandler StateChanged public async Task StartListenAsync(SpeechToTextOptions options, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); + if (CurrentState != SpeechToTextState.Stopped) + { + return; + } + await InternalStartListening(options, cancellationToken); } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.windows.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.windows.cs index 007eb24dd2..3eaa29975b 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.windows.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.windows.cs @@ -37,8 +37,8 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token offlineSpeechRecognizer.AudioStateChanged += OfflineSpeechRecognizer_StateChanged; - offlineSpeechRecognizer.InitialSilenceTimeout = TimeSpan.MaxValue; - offlineSpeechRecognizer.BabbleTimeout = TimeSpan.MaxValue; + offlineSpeechRecognizer.InitialSilenceTimeout = options.AutoStopSilenceTimeout; + offlineSpeechRecognizer.BabbleTimeout = options.AutoStopSilenceTimeout; offlineSpeechRecognizer.SetInputToDefaultAudioDevice(); diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SharedSpeechToTextImplementation.macios.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SharedSpeechToTextImplementation.macios.cs index da55d39f47..674be61153 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SharedSpeechToTextImplementation.macios.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SharedSpeechToTextImplementation.macios.cs @@ -1,11 +1,13 @@ using AVFoundation; +using Microsoft.Maui.Dispatching; using Speech; namespace CommunityToolkit.Maui.Media; public sealed partial class SpeechToTextImplementation { - AVAudioEngine? audioEngine; + readonly AVAudioEngine audioEngine = new(); + IDispatcherTimer? silenceTimer; SFSpeechRecognizer? speechRecognizer; SFSpeechRecognitionTask? recognitionTask; SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest; @@ -19,12 +21,11 @@ public sealed partial class SpeechToTextImplementation /// public ValueTask DisposeAsync() { - audioEngine?.Dispose(); + audioEngine.Dispose(); speechRecognizer?.Dispose(); liveSpeechRequest?.Dispose(); recognitionTask?.Dispose(); - audioEngine = null; speechRecognizer = null; liveSpeechRequest = null; recognitionTask = null; @@ -56,11 +57,22 @@ static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession) void StopRecording() { - audioEngine?.InputNode.RemoveTapOnBus(0); - audioEngine?.Stop(); + silenceTimer?.Tick -= OnSilenceTimerTick; + silenceTimer?.Stop(); liveSpeechRequest?.EndAudio(); - recognitionTask?.Cancel(); + recognitionTask?.Finish(); + audioEngine.Stop(); + audioEngine.InputNode.RemoveTapOnBus(0); + OnSpeechToTextStateChanged(CurrentState); + + recognitionTask?.Dispose(); + speechRecognizer?.Dispose(); + liveSpeechRequest?.Dispose(); + + speechRecognizer = null; + liveSpeechRequest = null; + recognitionTask = null; } Task InternalStopListeningAsync(CancellationToken cancellationToken) @@ -69,4 +81,60 @@ Task InternalStopListeningAsync(CancellationToken cancellationToken) StopRecording(); return Task.CompletedTask; } + + void OnSilenceTimerTick(object? sender, EventArgs e) + { + StopRecording(); + } + + SFSpeechRecognitionTask CreateSpeechRecognizerTask(SFSpeechRecognizer sfSpeechRecognizer, SFSpeechAudioBufferRecognitionRequest sfSpeechAudioBufferRecognitionRequest) + { + int currentIndex = 0; + return sfSpeechRecognizer.GetRecognitionTask(sfSpeechAudioBufferRecognitionRequest, (result, err) => + { + if (err is not null) + { + currentIndex = 0; + StopRecording(); + OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription))); + } + else + { + if (result.Final) + { + currentIndex = 0; + StopRecording(); + OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString)); + } + else + { + RestartTimer(); + if (currentIndex <= 0) + { + OnSpeechToTextStateChanged(CurrentState); + } + + currentIndex++; + OnRecognitionResultUpdated(result.BestTranscription.FormattedString); + } + } + }); + } + + void InitSilenceTimer(SpeechToTextOptions options) + { + if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero) + { + silenceTimer = Dispatcher.GetForCurrentThread()?.CreateTimer(); + silenceTimer?.Tick += OnSilenceTimerTick; + silenceTimer?.Interval = options.AutoStopSilenceTimeout; + silenceTimer?.Start(); + } + } + + void RestartTimer() + { + silenceTimer?.Stop(); + silenceTimer?.Start(); + } } \ No newline at end of file diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.android.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.android.cs index 2ea3b73e13..c5d23ae18e 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.android.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.android.cs @@ -49,7 +49,12 @@ static Intent CreateSpeechIntent(SpeechToTextOptions options) intent.PutExtra(RecognizerIntent.ExtraLanguage, javaLocale); intent.PutExtra(RecognizerIntent.ExtraLanguagePreference, javaLocale); intent.PutExtra(RecognizerIntent.ExtraOnlyReturnLanguagePreference, javaLocale); - + if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero) + { + intent.PutExtra(RecognizerIntent.ExtraSpeechInputCompleteSilenceLengthMillis, (long)options.AutoStopSilenceTimeout.TotalMilliseconds); + intent.PutExtra(RecognizerIntent.ExtraSpeechInputPossiblyCompleteSilenceLengthMillis, (long)options.AutoStopSilenceTimeout.TotalMilliseconds); + } + return intent; } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.ios.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.ios.cs index 433c2cf920..9ba58d2f4c 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.ios.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.ios.cs @@ -1,6 +1,4 @@ using System.Diagnostics.CodeAnalysis; -using System.Globalization; -using AVFoundation; using Speech; namespace CommunityToolkit.Maui.Media; @@ -8,7 +6,7 @@ namespace CommunityToolkit.Maui.Media; /// public sealed partial class SpeechToTextImplementation { - [MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))] + [MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))] Task InternalStartListeningAsync(SpeechToTextOptions options, CancellationToken cancellationToken) { speechRecognizer = new SFSpeechRecognizer(NSLocale.FromLocaleIdentifier(options.Culture.Name)); @@ -18,7 +16,6 @@ Task InternalStartListeningAsync(SpeechToTextOptions options, CancellationToken throw new ArgumentException("Speech recognizer is not available"); } - audioEngine = new AVAudioEngine(); liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest() { ShouldReportPartialResults = options.ShouldReportPartialResults @@ -40,38 +37,8 @@ Task InternalStartListeningAsync(SpeechToTextOptions options, CancellationToken cancellationToken.ThrowIfCancellationRequested(); - var currentIndex = 0; - recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) => - { - if (err is not null) - { - StopRecording(); - OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription))); - } - else - { - if (result.Final) - { - currentIndex = 0; - StopRecording(); - OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString)); - } - else - { - if (currentIndex <= 0) - { - OnSpeechToTextStateChanged(CurrentState); - } - - for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++) - { - var s = result.BestTranscription.Segments[i].Substring; - currentIndex++; - OnRecognitionResultUpdated(s); - } - } - } - }); + InitSilenceTimer(options); + recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest); return Task.CompletedTask; } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.macos.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.macos.cs index 4de6c17bbd..8819499ec7 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.macos.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.macos.cs @@ -1,5 +1,4 @@ using System.Diagnostics.CodeAnalysis; -using System.Globalization; using AVFoundation; using Speech; @@ -8,7 +7,7 @@ namespace CommunityToolkit.Maui.Media; /// public sealed partial class SpeechToTextImplementation { - [MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))] + [MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))] Task InternalStartListeningAsync(SpeechToTextOptions options, CancellationToken cancellationToken) { speechRecognizer = new SFSpeechRecognizer(NSLocale.FromLocaleIdentifier(options.Culture.Name)); @@ -18,10 +17,6 @@ Task InternalStartListeningAsync(SpeechToTextOptions options, CancellationToken throw new ArgumentException("Speech recognizer is not available"); } - audioEngine = new AVAudioEngine - { - AutoShutdownEnabled = false - }; liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest() { ShouldReportPartialResults = options.ShouldReportPartialResults @@ -59,38 +54,8 @@ Task InternalStartListeningAsync(SpeechToTextOptions options, CancellationToken cancellationToken.ThrowIfCancellationRequested(); - var currentIndex = 0; - recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) => - { - if (err is not null) - { - StopRecording(); - OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription))); - } - else - { - if (result.Final) - { - currentIndex = 0; - StopRecording(); - OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString)); - } - else - { - if (currentIndex <= 0) - { - OnSpeechToTextStateChanged(CurrentState); - } - - for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++) - { - var s = result.BestTranscription.Segments[i].Substring; - currentIndex++; - OnRecognitionResultUpdated(s); - } - } - } - }); + InitSilenceTimer(options); + recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest); return Task.CompletedTask; } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.shared.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.shared.cs index 95cd305546..bbf9017d89 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.shared.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.shared.cs @@ -35,7 +35,11 @@ public event EventHandler StateChanged public async Task StartListenAsync(SpeechToTextOptions options, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); - + if (CurrentState != SpeechToTextState.Stopped) + { + return; + } + await InternalStartListeningAsync(options, cancellationToken).ConfigureAwait(false); } diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.windows.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.windows.cs index 85bb30c672..b74239ed0d 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.windows.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextImplementation.windows.cs @@ -23,7 +23,9 @@ public SpeechToTextState CurrentState { return speechRecognizer?.State switch { - SpeechRecognizerState.Capturing or SpeechRecognizerState.SoundStarted or SpeechRecognizerState.SpeechDetected or SpeechRecognizerState.Processing => SpeechToTextState.Listening, + SpeechRecognizerState.Capturing or SpeechRecognizerState.SoundStarted + or SpeechRecognizerState.SpeechDetected + or SpeechRecognizerState.Processing => SpeechToTextState.Listening, SpeechRecognizerState.SoundEnded => SpeechToTextState.Silence, _ => SpeechToTextState.Stopped, }; @@ -40,7 +42,7 @@ async Task InternalStartListeningAsync(SpeechToTextOptions options, Cancellation { await Initialize(options, cancellationToken); - speechRecognizer.ContinuousRecognitionSession.AutoStopSilenceTimeout = TimeSpan.MaxValue; + speechRecognizer.ContinuousRecognitionSession.AutoStopSilenceTimeout = options.AutoStopSilenceTimeout; speechRecognizer.ContinuousRecognitionSession.ResultGenerated += ResultGenerated; speechRecognizer.ContinuousRecognitionSession.Completed += OnCompleted; try @@ -65,7 +67,8 @@ void OnCompleted(SpeechContinuousRecognitionSession sender, SpeechContinuousReco OnRecognitionResultCompleted(SpeechToTextResult.Success(recognitionText)); break; case SpeechRecognitionResultStatus.UserCanceled: - OnRecognitionResultCompleted(new SpeechToTextResult(recognitionText, new TaskCanceledException("Operation cancelled"))); + OnRecognitionResultCompleted(new SpeechToTextResult(recognitionText, + new TaskCanceledException("Operation cancelled"))); break; default: OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(args.Status.ToString()))); @@ -73,7 +76,8 @@ void OnCompleted(SpeechContinuousRecognitionSession sender, SpeechContinuousReco } } - void ResultGenerated(SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionResultGeneratedEventArgs args) + void ResultGenerated(SpeechContinuousRecognitionSession sender, + SpeechContinuousRecognitionResultGeneratedEventArgs args) { recognitionText += args.Result.Text; if (speechToTextOptions?.ShouldReportPartialResults == true) @@ -117,6 +121,11 @@ async Task Initialize(SpeechToTextOptions options, CancellationToken cancellatio speechToTextOptions = options; recognitionText = string.Empty; speechRecognizer = new SpeechRecognizer(new Language(options.Culture.IetfLanguageTag)); + + speechRecognizer.UIOptions.AudiblePrompt = string.Empty; + speechRecognizer.UIOptions.IsReadBackEnabled = false; + speechRecognizer.UIOptions.ShowConfirmation = false; + speechRecognizer.StateChanged += SpeechRecognizer_StateChanged; cancellationToken.ThrowIfCancellationRequested(); await speechRecognizer.CompileConstraintsAsync().AsTask(cancellationToken); diff --git a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextOptions.cs b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextOptions.cs index b402bf8b0a..eba8da4304 100644 --- a/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextOptions.cs +++ b/src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechToTextOptions.cs @@ -16,4 +16,10 @@ public class SpeechToTextOptions /// Include partial recognition results. /// public bool ShouldReportPartialResults { get; init; } = true; + + /// + /// The duration of continuous silence after which speech recognition will automatically stop. + /// Use (the default) to indicate that auto-stop based on silence is disabled. + /// + public TimeSpan AutoStopSilenceTimeout { get; init; } = TimeSpan.MaxValue; } \ No newline at end of file