This repository has been archived by the owner on Mar 5, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathaudioProcessing.py
143 lines (96 loc) · 3.59 KB
/
audioProcessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# preprocessing of .wav and .ogg sound files to a common format
# then return different spectral and audio representations
import os
import librosa
import matplotlib.pyplot as plt
import numpy as np
def get_mfcc(audioFile, sampleRate=16000, n_mfcc=13):
audio, rate = librosa.load(audioFile)
# resample to a set rate
audio = librosa.core.resample(audio, rate, sampleRate)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(audio, sr=sampleRate, n_mels=128)
# Convert to log scale (dB). We'll use the peak power (max) as reference.
log_S = librosa.power_to_db(S, ref=np.max)
# we'll extract the top 13 Mel-frequency cepstral coefficients (MFCCs)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=n_mfcc)
#return mfcc time series tensor
return mfcc
def audio_spectogram(audioFile, sampleRate=16000, complexity='magnitude'):
audio, rate = librosa.load(audioFile)
# resample to a set rate
audio = librosa.core.resample(audio, rate, sampleRate)
# fast mellin transform, double sided laplace transform
audio_spec = librosa.core.fmt(audio)
# return audio spectrogam that as a matrix
if complexity == 'magnitude':
magnitude, phase = librosa.core.magphase(audio_spec)
return magnitude
elif complexity == 'phase':
magnitude, phase = librosa.core.magphase(audio_spec)
return phase
elif complexity =='complex':
return audio_spec
else:
print("???. not sure what format you want from audio_spectrogram()")
def short_fft(audioFile, sampleRate=16000, window=2048, numFrames=10,
complexity='magnitude'):
audio, rate = librosa.load(audioFile)
# resample to a set rate
audio = librosa.core.resample(audio, rate, sampleRate)
fft_transform = librosa.core.stft(audio,
n_fft=window,
hop_length=numFrames)
if complexity=='magnitude': # if we only want the signal magnitude
magnitude, phase = librosa.core.magphase(fft_transform)
return magnitude
elif complexity=='phase': # if we only want the phase
magnitude, phase = librosa.core.magphase(fft_transform)
return phase
elif complexity=='complex': # if we want the real and imaginary parts
return fft_transform
else:
print("WOOPS, not sure what short_fft return format you want")
# return the short time fourier transform of the audiofile
#######################################################
## MAIN ##
#######################################################
def main():
audioFiles = os.listdir("sounds")
'''
audio = {}
for file in audioFiles[:2]: # just do the first few for now
try:
audio[file] = librosa.load("sounds/"+file)
except:
print("failed to load %s" %file)
sampleRates = [rate[1] for rate in audio.values()]
sampleRate = np.median(sampleRates)
# make sure all the files are at the same sample rate
for file in audio.values():
if (file[1] != sampleRate):
file = librosa.core.resample(file[0], file[1], sampleRate)
'''
audioFile = "sounds/"+audioFiles[0]
print('---------------------')
mfcc = get_mfcc(audioFile, sampleRate=16000, n_mfcc=13)
print(mfcc.shape)
print('---------------------')
audioSpec = audio_spectogram(audioFile, sampleRate=16000)
# return audio spectrogam that as a matrix
print(audioSpec.shape)
print('---------------------')
fft = short_fft(audioFile, sampleRate=16000, window=2048, numFrames=10)
# return the short time fourier transform of the audiofile
print(fft.shape)
print('---------------------')
# TO-DO: break up these spectra into training samples
plt.figure()
plt.plot(mfcc.T)
plt.figure()
plt.plot(audioSpec)
#plt.figure()
#plt.plot(fft)
plt.show()
if __name__ == '__main__':
main()