-
Notifications
You must be signed in to change notification settings - Fork 680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Audio recording enhacement #1341
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,8 @@ | |
''' | ||
|
||
import time | ||
import math | ||
import struct | ||
|
||
try: | ||
import pyaudio # install using, "sudo apt-get install python3-pyaudio" | ||
|
@@ -15,6 +17,15 @@ | |
print("chat: failed to import pyaudio, wave or openai. See https://ardupilot.org/mavproxy/docs/modules/chat.html") | ||
exit() | ||
|
||
def rms( data ): | ||
count = len(data)/2 | ||
format = "%dh"%(count) | ||
shorts = struct.unpack( format, data ) | ||
sum_squares = 0.0 | ||
for sample in shorts: | ||
n = sample * (1.0/32768) | ||
sum_squares += n*n | ||
return math.sqrt( sum_squares / count ) | ||
|
||
class chat_voice_to_text(): | ||
def __init__(self): | ||
|
@@ -34,7 +45,7 @@ def check_connection(self): | |
try: | ||
self.client = OpenAI() | ||
except Exception: | ||
print("chat: failed to connect to OpenAI") | ||
print("chat: failed to connect to OpenAI - 4") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this change to the print statement can be removed. |
||
return False | ||
|
||
# return True if connected | ||
|
@@ -55,15 +66,25 @@ def record_audio(self): | |
|
||
# calculate time recording should stop | ||
curr_time = time.time() | ||
time_stop = curr_time + 5 | ||
time_stop = curr_time + 3 | ||
|
||
# record until specified time | ||
frames = [] | ||
while curr_time < time_stop: | ||
|
||
# logic for recording sound until someone is speaking. | ||
isSpeaking = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, I think our normal style is to use underscores between variables so let's change "isSpeaking" to "is_speaking". Also maybe change the comment to be "record sound while user is speaking" |
||
while curr_time < time_stop or isSpeaking: | ||
data = stream.read(1024) | ||
frames.append(data) | ||
rms1 = rms(data) | ||
if rms1!=0.0: | ||
decibel = 20 * math.log10(rms1) | ||
isSpeaking = decibel>-80.0 # -80 is the hardcoded threshold. higher number means louder. Set threshold in the range (-100,0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possible let's move this -80 to be a definition at the top of the file where it will be easier to find. |
||
if isSpeaking: | ||
time_stop = time.time()+3 | ||
else: | ||
isSpeaking = False | ||
curr_time = time.time() | ||
|
||
# Stop and close the stream | ||
stream.stop_stream() | ||
stream.close() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, thanks for this. A few things to fix here: