diff --git a/dialogflow/detect_intent_texts_with_location.py b/dialogflow/detect_intent_texts_with_location.py index d52ac178dd7..99e2eabb2f3 100644 --- a/dialogflow/detect_intent_texts_with_location.py +++ b/dialogflow/detect_intent_texts_with_location.py @@ -58,7 +58,7 @@ def detect_intent_texts_with_location( print("=" * 20) print(f"Query text: {response.query_result.query_text}") print( - f"Detected intent: {response.query_result.intent.display_name} (confidence: {response.query_result.intent_detection_confidence,})\n" + f"Detected intent: {response.query_result.intent.display_name} (confidence: {response.query_result.intent_detection_confidence})\n" ) print(f"Fulfillment text: {response.query_result.fulfillment_text}\n") diff --git a/dialogflow/participant_management.py b/dialogflow/participant_management.py index e2f9a486c1a..d0bfa9decf3 100644 --- a/dialogflow/participant_management.py +++ b/dialogflow/participant_management.py @@ -196,6 +196,7 @@ def analyze_content_audio_stream( timeout: int, language_code: str, single_utterance=False, + output_multiple_utterances=False, ): import google.auth from google.cloud import dialogflow_v2beta1 as dialogflow @@ -231,7 +232,9 @@ def gen_requests(participant_name, audio_config, stream): """Generates requests for streaming.""" audio_generator = stream.generator() yield dialogflow.types.participant.StreamingAnalyzeContentRequest( - participant=participant_name, audio_config=audio_config + participant=participant_name, + audio_config=audio_config, + output_multiple_utterances=output_multiple_utterances ) for content in audio_generator: yield dialogflow.types.participant.StreamingAnalyzeContentRequest( diff --git a/dialogflow/requirements.txt b/dialogflow/requirements.txt index 4c7d355eb45..ed176a19af0 100644 --- a/dialogflow/requirements.txt +++ b/dialogflow/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-dialogflow==2.36.0 +google-cloud-dialogflow==2.46.0 Flask==3.0.3 pyaudio==0.2.14 termcolor==3.0.0 diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index 6395a30b3a8..fe88afb581f 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -34,7 +34,7 @@ import re import sys -from google.api_core.exceptions import DeadlineExceeded +from google.api_core.exceptions import DeadlineExceeded, OutOfRange import pyaudio @@ -51,6 +51,7 @@ CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms RESTART_TIMEOUT = 160 # seconds MAX_LOOKBACK = 3 # seconds +HALF_CLOSE_DURATION_MS = 90 * 1000 # milliseconds YELLOW = "\033[0;33m" @@ -198,6 +199,9 @@ def main(): timeout=RESTART_TIMEOUT, language_code="en-US", single_utterance=False, + # Uncomment to process multiple utterances detected in the audio stream + # individually instead of stitching together to form a single utterance. + # output_multiple_utterances=True, ) # Now, print the final transcription responses to user. @@ -213,8 +217,10 @@ def main(): offset.seconds * 1000 + offset.microseconds / 1000 ) transcript = response.recognition_result.transcript - # Half-close the stream with gRPC (in Python just stop yielding requests) - stream.is_final = True + # Half-close upon final results for better streaming experiences + # (in Python just stop yielding requests) + if stream.is_final_offset > HALF_CLOSE_DURATION_MS: + stream.is_final = True # Exit recognition if any of the transcribed phrase could be # one of our keywords. if re.search(r"\b(exit|quit)\b", transcript, re.I): @@ -223,6 +229,8 @@ def main(): terminate = True stream.closed = True break + except OutOfRange: + print("Maximum audio duration exceeded in the stream, restarting.") except DeadlineExceeded: print("Deadline Exceeded, restarting.")