-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathenvelope.py
72 lines (57 loc) · 2.18 KB
/
envelope.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""Code to calculate speech envelopes."""
import numpy as np
from brian2 import Hz, kHz
from brian2hears import Sound, erbspace, Gammatone, Filterbank
from scipy import signal
def calculate_envelope(audio_path, power_factor=0.6, target_fs=64):
"""Calculates gammatone envelope of a raw speech file.
-- the calculation of the envelope is based on the
following publication:
"Auditory-Inspired Speech Envelope Extraction Methods for Improved EEG-Based
Auditory Attention Detection in a Cocktail Party Scenario" (Biesmans et al., 2016)
parameters
----------
audio_path: str
Audio file path
power_factor: float
Power used in power law relation, which is used to model
relation between perceived loudness and actual speech
intensity
target_fs: int
Sampling frequency of the calculated envelope
returns
-------
numpy.ndarray
Envelope of a speech
"""
speech = np.load(audio_path)
audio, fs = speech["audio"], speech["fs"]
del speech
sound = Sound(audio, samplerate=fs * Hz)
# 28 center frequencies from 50 Hz till 5kHz
center_frequencies = erbspace(50 * Hz, 5 * kHz, 28)
filter_bank = Gammatone(sound, center_frequencies)
envelope_calculation = EnvelopeFromGammatone(filter_bank, power_factor)
output = envelope_calculation.process()
# Downsample to 64 Hz
envelope = signal.resample_poly(output, target_fs, fs)
return envelope
class EnvelopeFromGammatone(Filterbank):
"""Converts the output of a GammatoneFilterbank to an envelope."""
def __init__(self, source, power_factor):
"""Initialize the envelope transformation.
Parameters
----------
source : Gammatone
Gammatone filterbank output to convert to envelope
power_factor : float
The power factor for each sample.
"""
super().__init__(source)
self.power_factor = power_factor
self.nchannels = 1
def buffer_apply(self, input_):
return np.reshape(
np.sum(np.power(np.abs(input_), self.power_factor), axis=1, keepdims=True),
(np.shape(input_)[0], self.nchannels),
)