Skip to content

Commit 113df9f

Browse files
committed
adding in the new files and fixing the generic search tool so it doesnt just think its about weather...
1 parent a9ea1bd commit 113df9f

14 files changed

+2566
-1639
lines changed

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
recursive-include npc_profiles *
2-
include npcsh.png

npcsh.png

-22 KB
Binary file not shown.

npcsh/audio.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
# audio
2+
try:
3+
import whisper
4+
from playsound import playsound
5+
from gtts import gTTS
6+
import pyaudio
7+
except Exception as e:
8+
print(f"Error importing whisper: {e}")
9+
10+
11+
12+
def get_audio_level(audio_data):
13+
return np.max(np.abs(np.frombuffer(audio_data, dtype=np.int16)))
14+
15+
16+
def calibrate_silence(sample_rate=16000, duration=2):
17+
"""
18+
Function Description:
19+
This function calibrates the silence level for audio recording.
20+
Args:
21+
None
22+
Keyword Args:
23+
sample_rate: The sample rate for audio recording.
24+
duration: The duration in seconds for calibration.
25+
Returns:
26+
The silence threshold level.
27+
"""
28+
29+
p = pyaudio.PyAudio()
30+
stream = p.open(
31+
format=pyaudio.paInt16,
32+
channels=1,
33+
rate=sample_rate,
34+
input=True,
35+
frames_per_buffer=1024,
36+
)
37+
38+
print("Calibrating silence level. Please remain quiet...")
39+
levels = []
40+
for _ in range(int(sample_rate * duration / 1024)):
41+
data = stream.read(1024)
42+
levels.append(get_audio_level(data))
43+
44+
stream.stop_stream()
45+
stream.close()
46+
p.terminate()
47+
48+
avg_level = np.mean(levels)
49+
silence_threshold = avg_level * 1.5 # Set threshold slightly above average
50+
print(f"Silence threshold set to: {silence_threshold}")
51+
return silence_threshold
52+
53+
54+
def is_silent(audio_data : bytes,
55+
threshold : float) -> bool:
56+
"""
57+
Function Description:
58+
This function checks if audio data is silent based on a threshold.
59+
Args:
60+
audio_data: The audio data to check.
61+
threshold: The silence threshold level.
62+
Keyword Args:
63+
None
64+
Returns:
65+
A boolean indicating whether the audio is silent.
66+
"""
67+
68+
69+
return get_audio_level(audio_data) < threshold
70+
71+
72+
def record_audio(sample_rate : int = 16000, max_duration : int = 10, silence_threshold : Optional[float] = None) -> bytes:
73+
"""
74+
Function Description:
75+
This function records audio from the microphone.
76+
Args:
77+
None
78+
Keyword Args:
79+
sample_rate: The sample rate for audio recording.
80+
max_duration: The maximum duration in seconds.
81+
silence_threshold: The silence threshold level.
82+
Returns:
83+
The recorded audio data.
84+
"""
85+
86+
if silence_threshold is None:
87+
silence_threshold = calibrate_silence()
88+
89+
p = pyaudio.PyAudio()
90+
stream = p.open(
91+
format=pyaudio.paInt16,
92+
channels=1,
93+
rate=sample_rate,
94+
input=True,
95+
frames_per_buffer=1024,
96+
)
97+
98+
print("Listening... (speak now)")
99+
frames = []
100+
silent_chunks = 0
101+
has_speech = False
102+
max_silent_chunks = int(sample_rate * 1.0 / 1024) # 1.0 seconds of silence
103+
max_chunks = int(sample_rate * max_duration / 1024) # Maximum duration in chunks
104+
105+
start_time = time.time()
106+
for _ in range(max_chunks):
107+
data = stream.read(1024)
108+
frames.append(data)
109+
110+
if is_silent(data, silence_threshold):
111+
silent_chunks += 1
112+
if has_speech and silent_chunks > max_silent_chunks:
113+
break
114+
else:
115+
silent_chunks = 0
116+
has_speech = True
117+
118+
if len(frames) % 10 == 0: # Print a dot every ~0.5 seconds
119+
print(".", end="", flush=True)
120+
121+
if time.time() - start_time > max_duration:
122+
print("\nMax duration reached.")
123+
break
124+
125+
print("\nProcessing...")
126+
127+
stream.stop_stream()
128+
stream.close()
129+
p.terminate()
130+
131+
return b"".join(frames)
132+
133+
134+
def speak_text(text : str) -> None:
135+
"""
136+
Function Description:
137+
This function converts text to speech and plays the audio.
138+
Args:
139+
text: The text to convert to speech.
140+
Keyword Args:
141+
None
142+
Returns:
143+
None
144+
"""
145+
146+
try:
147+
tts = gTTS(text=text, lang="en")
148+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
149+
tts.save(fp.name)
150+
playsound(fp.name)
151+
os.unlink(fp.name)
152+
except Exception as e:
153+
print(f"Text-to-speech error: {e}")
154+
155+
156+
157+
158+
159+
def process_audio(file_path : str, table_name : str) -> List:
160+
"""
161+
Function Description:
162+
This function is used to process an audio file.
163+
Args:
164+
file_path : str : The file path.
165+
table_name : str : The table name.
166+
Keyword Args:
167+
None
168+
Returns:
169+
List : The embeddings and texts.
170+
"""
171+
172+
embeddings = []
173+
texts = []
174+
try:
175+
audio, sr = librosa.load(file_path)
176+
# Transcribe audio using Whisper
177+
model = whisper.load_model("base") # Or a larger model if available
178+
result = model.transcribe(file_path)
179+
transcribed_text = result["text"].strip()
180+
181+
# Split transcribed text into chunks (adjust chunk_size as needed)
182+
chunk_size = 1000
183+
for i in range(0, len(transcribed_text), chunk_size):
184+
chunk = transcribed_text[i : i + chunk_size]
185+
text_embedding_response = get_llm_response(
186+
f"Generate an embedding for: {chunk}",
187+
model="text-embedding-ada-002",
188+
provider="openai",
189+
) # Use a text embedding model
190+
if (
191+
isinstance(text_embedding_response, dict)
192+
and "error" in text_embedding_response
193+
):
194+
print(
195+
f"Error generating text embedding: {text_embedding_response['error']}"
196+
)
197+
else:
198+
embeddings.append(text_embedding_response) # Store the embedding
199+
texts.append(chunk) # Store the corresponding text chunk
200+
201+
return embeddings, texts
202+
203+
except Exception as e:
204+
print(f"Error processing audio: {e}")
205+
return [], [] # Return empty lists in case of error

0 commit comments

Comments
 (0)