CommunityToolkit · VladislavAntonyuk · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/...olkit.Maui.Core/Essentials/SpeechToText/OfflineSharedSpeechToTextImplementation.macios.cs b/...olkit.Maui.Core/Essentials/SpeechToText/OfflineSharedSpeechToTextImplementation.macios.cs
@@ -1,11 +1,13 @@
 using AVFoundation;
+using Microsoft.Maui.Dispatching;
 using Speech;
 
 namespace CommunityToolkit.Maui.Media;
 
 public sealed partial class OfflineSpeechToTextImplementation
 {
-	AVAudioEngine? audioEngine;
+	readonly AVAudioEngine audioEngine = new();
+	IDispatcherTimer? silenceTimer;
 	SFSpeechRecognizer? speechRecognizer;
 	SFSpeechRecognitionTask? recognitionTask;
 	SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest;
@@ -19,12 +21,11 @@ public sealed partial class OfflineSpeechToTextImplementation
 	/// <inheritdoc />
 	public ValueTask DisposeAsync()
 	{
-		audioEngine?.Dispose();
+		audioEngine.Dispose();
 		speechRecognizer?.Dispose();
 		liveSpeechRequest?.Dispose();
 		recognitionTask?.Dispose();
 
-		audioEngine = null;
 		speechRecognizer = null;
 		liveSpeechRequest = null;
 		recognitionTask = null;
@@ -41,12 +42,6 @@ public Task<bool> RequestPermissions(CancellationToken cancellationToken = defau
 		return taskResult.Task.WaitAsync(cancellationToken);
 	}
 
-	static Task<bool> IsSpeechPermissionAuthorized(CancellationToken cancellationToken)
-	{
-		cancellationToken.ThrowIfCancellationRequested();
-		return Task.FromResult(SFSpeechRecognizer.AuthorizationStatus is SFSpeechRecognizerAuthorizationStatus.Authorized);
-	}
-
 	static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)
 	{
 		sharedAvAudioSession = AVAudioSession.SharedInstance();
@@ -62,10 +57,77 @@ static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)
 
 	void InternalStopListening()
 	{
-		audioEngine?.InputNode.RemoveTapOnBus(0);
-		audioEngine?.Stop();
+		silenceTimer?.Tick -= OnSilenceTimerTick;
+		silenceTimer?.Stop();
 		liveSpeechRequest?.EndAudio();
-		recognitionTask?.Cancel();
+		recognitionTask?.Finish();
+		audioEngine.Stop();
+		audioEngine.InputNode.RemoveTapOnBus(0);
+
 		OnSpeechToTextStateChanged(CurrentState);
+
+		recognitionTask?.Dispose();
+		speechRecognizer?.Dispose();
+		liveSpeechRequest?.Dispose();
+
+		speechRecognizer = null;
+		liveSpeechRequest = null;
+		recognitionTask = null;
+	}
+
+	void OnSilenceTimerTick(object? sender, EventArgs e)
+	{
+		InternalStopListening();
+	}
+
+	SFSpeechRecognitionTask CreateSpeechRecognizerTask(SFSpeechRecognizer sfSpeechRecognizer, SFSpeechAudioBufferRecognitionRequest sfSpeechAudioBufferRecognitionRequest)
+	{
+		int currentIndex = 0;
+		return sfSpeechRecognizer.GetRecognitionTask(sfSpeechAudioBufferRecognitionRequest, (result, err) =>
+		{
+			if (err is not null)
+			{
+				currentIndex = 0;
+				InternalStopListening();
+				OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
+			}
+			else
+			{
+				if (result.Final)
+				{
+					currentIndex = 0;
+					InternalStopListening();
+					OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
+				}
+				else
+				{
+					RestartTimer();
+					if (currentIndex <= 0)
+					{
+						OnSpeechToTextStateChanged(CurrentState);
+					}
+
+					currentIndex++;
+					OnRecognitionResultUpdated(result.BestTranscription.FormattedString);
+				}
+			}
+		});
+	}
+
+	void InitSilenceTimer(SpeechToTextOptions options)
-	void InitSilenceTimer(SpeechToTextOptions options)
+	void InitializeSilenceTimer(SpeechToTextOptions options)
-	void InitSilenceTimer(SpeechToTextOptions options)
+	void InitializeSilenceTimer(SpeechToTextOptions options)
+	{
+		if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero)
+		{
+			silenceTimer = Dispatcher.GetForCurrentThread()?.CreateTimer();
+			silenceTimer?.Tick += OnSilenceTimerTick;
+			silenceTimer?.Interval = options.AutoStopSilenceTimeout;
+			silenceTimer?.Start();
+		}
+	}
+
+	void RestartTimer()
+	{
+		silenceTimer?.Stop();
+		silenceTimer?.Start();
 	}
 }
diff --git a/...ityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.android.cs b/...ityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.android.cs
@@ -53,7 +53,12 @@ static Intent CreateSpeechIntent(SpeechToTextOptions options)
 		intent.PutExtra(RecognizerIntent.ExtraLanguage, javaLocale);
 		intent.PutExtra(RecognizerIntent.ExtraLanguagePreference, javaLocale);
 		intent.PutExtra(RecognizerIntent.ExtraOnlyReturnLanguagePreference, javaLocale);
-
+		if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero)
-		if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero)
+		if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue)
-		if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue && options.AutoStopSilenceTimeout > TimeSpan.Zero)
+		if (options.AutoStopSilenceTimeout < TimeSpan.MaxValue)
+		{
+			intent.PutExtra(RecognizerIntent.ExtraSpeechInputCompleteSilenceLengthMillis, (long)options.AutoStopSilenceTimeout.TotalMilliseconds);
+			intent.PutExtra(RecognizerIntent.ExtraSpeechInputPossiblyCompleteSilenceLengthMillis, (long)options.AutoStopSilenceTimeout.TotalMilliseconds);
+		}
+
 		return intent;
 	}
 

diff --git a/...mmunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.ios.cs b/...mmunityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.ios.cs
@@ -9,7 +9,7 @@ namespace CommunityToolkit.Maui.Media;
 /// <inheritdoc />
 public sealed partial class OfflineSpeechToTextImplementation
 {
-	[MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))]
+	[MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))]
 	[SupportedOSPlatform("ios13.0")]
 	[SupportedOSPlatform("maccatalyst")]
 	Task InternalStartListening(SpeechToTextOptions options, CancellationToken token = default)
@@ -27,7 +27,6 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
 			throw new ArgumentException("Speech recognizer is not available");
 		}
 
-		audioEngine = new AVAudioEngine();
 		liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest()
 		{
 			ShouldReportPartialResults = options.ShouldReportPartialResults,
@@ -48,39 +47,9 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
 			throw new ArgumentException("Error starting audio engine - " + error.LocalizedDescription);
 		}
 
-		var currentIndex = 0;
-		recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) =>
-		{
-			if (err is not null)
-			{
-				InternalStopListening();
-				OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
-			}
-			else
-			{
-				if (result.Final)
-				{
-					currentIndex = 0;
-					InternalStopListening();
-					OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
-				}
-				else
-				{
-					if (currentIndex <= 0)
-					{
-						OnSpeechToTextStateChanged(CurrentState);
-					}
-
-					for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++)
-					{
-						var s = result.BestTranscription.Segments[i].Substring;
-						currentIndex++;
-						OnRecognitionResultUpdated(s);
-					}
-				}
-			}
-		});
-
+		InitSilenceTimer(options);
+		recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest);
+
 		return Task.CompletedTask;
 	}
 }
diff --git a/...unityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.macos.cs b/...unityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.macos.cs
@@ -1,5 +1,4 @@
 using System.Diagnostics.CodeAnalysis;
-using System.Globalization;
 using AVFoundation;
 using Speech;
 
@@ -8,7 +7,7 @@ namespace CommunityToolkit.Maui.Media;
 /// <inheritdoc />
 public sealed partial class OfflineSpeechToTextImplementation
 {
-	[MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))]
+	[MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))]
 	Task InternalStartListening(SpeechToTextOptions options, CancellationToken token = default)
 	{
 		speechRecognizer = new SFSpeechRecognizer(NSLocale.FromLocaleIdentifier(options.Culture.Name));
@@ -19,10 +18,6 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
 			throw new ArgumentException("Speech recognizer is not available");
 		}
 
-		audioEngine = new AVAudioEngine
-		{
-			AutoShutdownEnabled = false
-		};
 		liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest()
 		{
 			ShouldReportPartialResults = options.ShouldReportPartialResults,
@@ -59,38 +54,8 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
 			throw new Exception(error.LocalizedDescription);
 		}
 
-		var currentIndex = 0;
-		recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) =>
-		{
-			if (err is not null)
-			{
-				InternalStopListening();
-				OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
-			}
-			else
-			{
-				if (result.Final)
-				{
-					currentIndex = 0;
-					InternalStopListening();
-					OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
-				}
-				else
-				{
-					if (currentIndex <= 0)
-					{
-						OnSpeechToTextStateChanged(CurrentState);
-					}
-
-					for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++)
-					{
-						var s = result.BestTranscription.Segments[i].Substring;
-						currentIndex++;
-						OnRecognitionResultUpdated(s);
-					}
-				}
-			}
-		});
+		InitSilenceTimer(options);
+		recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest);
 
 		return Task.CompletedTask;
 	}

diff --git a/...nityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.shared.cs b/...nityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.shared.cs
@@ -35,6 +35,11 @@ public event EventHandler<SpeechToTextStateChangedEventArgs> StateChanged
 	public async Task StartListenAsync(SpeechToTextOptions options, CancellationToken cancellationToken = default)
 	{
 		cancellationToken.ThrowIfCancellationRequested();
+		if (CurrentState != SpeechToTextState.Stopped)
+		{
+			return;
+		}
+
 		await InternalStartListening(options, cancellationToken);
 	}
 

diff --git a/...ityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.windows.cs b/...ityToolkit.Maui.Core/Essentials/SpeechToText/OfflineSpeechToTextImplementation.windows.cs
@@ -37,8 +37,8 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
 
 		offlineSpeechRecognizer.AudioStateChanged += OfflineSpeechRecognizer_StateChanged;
 
-		offlineSpeechRecognizer.InitialSilenceTimeout = TimeSpan.MaxValue;
-		offlineSpeechRecognizer.BabbleTimeout = TimeSpan.MaxValue;
+		offlineSpeechRecognizer.InitialSilenceTimeout = options.AutoStopSilenceTimeout;
+		offlineSpeechRecognizer.BabbleTimeout = options.AutoStopSilenceTimeout;
 
 		offlineSpeechRecognizer.SetInputToDefaultAudioDevice();