-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsimple_sound_stream.py
176 lines (140 loc) · 6.16 KB
/
simple_sound_stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""
Implements a naoqi ALModule for live streaming of Pepper's microphone to default host default audio output device.
Based on: https://github.com/JBramauer/pepperspeechrecognition/blob/master/module_speechrecognition.py
Author: Finn Rietz
"""
import sounddevice as sd
from sounddevice import PortAudioError
import time
import numpy as np
from utils import rawToWav
import naoqi
from naoqi import ALProxy
CHANNELS = 1
SAMPLE_RATE = 48000
IP = "130.239.182.11"
PORT = 9559
MOD_NAME = "SpeechRecognition"
# we need to inherit from ALModule so that we can subscribe to the audio device...
class SpeechRecognitionModule(naoqi.ALModule):
def __init__(self, strModuleName, strNaoIp, noaPort):
# kill previous instance, useful for developing ;)
try:
p = ALProxy(MOD_NAME)
p.exit()
except RuntimeError: # when there is no instance in the broke...
pass
self.strNaoIp = strNaoIp
self.naoPort = noaPort
self.broker = self.setup_broker() # setup naoqi broker for module communication
try:
naoqi.ALModule.__init__(self, strModuleName) # init module
except RuntimeError:
# When module is already registered (eg camera tab has been closed and is now reopened)
pass
self.BIND_PYTHON(self.getName(), "callback") # not sure what this does?
self.memory = naoqi.ALProxy("ALMemory")
self.memory.declareEvent(self.getName()) # needed for callback
self.audio = naoqi.ALProxy("ALAudioDevice")
# audio buffer
self.buffer = []
self.stream_latency = 0.5
# sounddevice stream for audio playback in realtime
# dtype=np.int16 is very important! This fixes the insane static noises
self.stream = sd.OutputStream(channels=CHANNELS, samplerate=SAMPLE_RATE, dtype=np.int16, latency=self.stream_latency)
self.livestream = True
self.isStarted = False
def setup_broker(self):
return naoqi.ALBroker(
"myBroker", # we need to use the broker when when we implement own module...
"0.0.0.0", # listen to anyone
0, # find a free port and use it
self.strNaoIp, # parent broker IP
self.naoPort) # parent broker port
def start(self):
# audio = naoqi.ALProxy("ALAudioDevice")
nNbrChannelFlag = 3 # ALL_Channels: 0, AL::LEFTCHANNEL: 1, AL::RIGHTCHANNEL: 2 AL::FRONTCHANNEL: 3 or AL::REARCHANNEL: 4.
nDeinterleave = 0
self.audio.setClientPreferences(self.getName(), SAMPLE_RATE, nNbrChannelFlag,
nDeinterleave) # setting same as default generate a bug !?!
# we can only subscribe to the ALAudiodevice with an implementation of ALModule...
# needs to have a "process" method that will be used as callback...
self.audio.subscribe(self.getName())
# also start the sounddevice stream so that we can write data on it
try:
self.stream.start()
self.isStarted = True
# print "SD STREAM ACTIVE: ", self.stream.active
except PortAudioError:
# when stream has been closed, pointer become invalid, so we have to make a new stream
self.stream = sd.OutputStream(channels=CHANNELS, samplerate=SAMPLE_RATE, dtype=np.int16, latency=self.stream_latency)
self.stream.start()
self.isStarted = True
def stop(self):
if not self.isStarted:
return
else:
self.isStarted = False
self.stream.close()
self.audio.unsubscribe(self.getName())
def processRemote(self, nbOfChannels, nbrOfSamplesByChannel, aTimeStamp, buffer):
"""
This is our callback method!
Due to inheritance, this will be called once our module subscribes to the audio device in start()
Name of method may not be changed!
:param nbOfChannels: param required for signature to work
:param nbrOfSamplesByChannel: param required for signature to work
:param aTimeStamp: param required for signature to work
:param buffer: the actual, buffer audio data from Pepper's mic
:return: None
"""
if self.isStarted:
# calculate a decimal seconds timestamp
# timestamp = float(str(aTimeStamp[0]) + "." + str(aTimeStamp[1]))
# print str(timestamp), "processRemote!!!!"
aSoundDataInterlaced = np.fromstring(str(buffer), dtype=np.int16)
aSoundData = np.reshape(aSoundDataInterlaced, (nbOfChannels, nbrOfSamplesByChannel), 'F')
self.buffer.append(aSoundData)
# write the callback data from ALAudiodevice to sounddevice stream, causing it to be played
# we need to transpose, because sounddevice expects columns to be channels, and we get rows as channels
if self.livestream:
print np.shape(aSoundData)
self.stream.write(aSoundData.T)
def save_buffer(self):
"""
Saves buffered audio data to physical .wav file.
:return:
"""
filename = "simple_out"
outfile = open(filename + ".raw", "wb")
data = self.transform_buffer()
data.tofile(outfile)
outfile.close()
rawToWav(filename)
print filename
def transform_buffer(self):
"""
Reshapes buffer matrix to 1d array of microphone energy values, so that it can be treated as audio data
:return:
"""
return np.concatenate(self.buffer, axis=1)[0]
def main():
# Warning: SpeechRecognition must be a global variable
# The name given to the constructor must be the name of the
# variable
global SpeechRecognition
SpeechRecognition = SpeechRecognitionModule("SpeechRecognition", IP, PORT)
SpeechRecognition.start()
# # sound quality testing
# time.sleep(10)
# SpeechRecognition.save_buffer()
# sd.play(SpeechRecognition.transform_buffer(), 48000)
# time.sleep(10)
# exit(0)
try:
while True:
pass
except KeyboardInterrupt:
print('interrupted!')
if __name__ == "__main__":
main()