Skip to content

Commit

Permalink
writing comments, refactoring the codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
ljj7975 committed Aug 11, 2020
1 parent 67ba05f commit 76827a9
Show file tree
Hide file tree
Showing 15 changed files with 113 additions and 689 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,3 @@ data/*.gz
**/__pycache__
/node_modules
package-lock.json
honkling-assistant/Mojibar-darwin-x64
31 changes: 22 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,35 @@
# hey_firefox
This branch contains minimal code for the pocketsphinx/honlking based hey firefox detection demo.
# HOWL

Unlike the existing implementation of Honkling, this implementation functions as a web service on its own exploiting Node.js.
In-browser keyword spotting for *hey firefox*

A weights can be obtained from the python implementation [howl](https://github.com/castorini/howl)

## Instructions
* Fetch trained weights: `git submodule update --init --recursive`

* [Install docker](https://docs.docker.com/engine/install/) and [enable GPU support](https://cnvrg.io/how-to-setup-docker-and-nvidia-docker-2-0-on-ubuntu-18-04/)

* `docker build -t honkling .`
* `docker build -t howl .`

## In-browser keyword spotting

To see the working demo, simply run

* `nvidia-docker run -it -p 8000:8000 -v $(pwd):/app/src/ -v /data/kws/mozilla_processed:/data honkling` (use `docker` instead of `nvidia-docker` if GPU is missing)
```
docker run -it -p 8000:8000 -v $(pwd):/app/src/ howl
npm run dev
```

* `npm run dev` for starting up a server
The server is running at localhost:8000

* `npm run eval` for running the evaluation (meyda version not supported)
## Evaluating the performance of JS implementation

processed dataset for evaluation can be found from [howl](https://github.com/castorini/howl)

Open a browser (Firefox recommended) and navigate to `localhost:8000` for honkling-based implementation
```
nvidia-docker run -it -p 8000:8000 -v <path_to_dataset>:/data -v $(pwd):/app/src/ howl
npm run eval
```

PocketSphinx-based implementation is available at `localhost:8000/pocketsphinx`. For this implementation, user needs to click start manually.
## Things to note
* Eventhough we use Meyda.js fore feature extraction, it has been modified quite a bit that we recommend looking at our source code directly to understand what is going on with feature extraction.
8 changes: 4 additions & 4 deletions common/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ if (typeof util === 'undefined') {
}

// in-browser related configs
// units are all in seconds unless specified otherwise

var config = {
'commands': ["hey", "fire", "fox", "unknown3", "unknown4", "unknown5", "unknown6", "unknown7", "unknown8", "unknown9"],
'predictionFrequency': 62,
'hopSize' : 12.5,
'windowSize' : 0.5, // in s
'predictionFrequency': 0.062, // 62 ms
'windowSize' : 0.5, // 500 ms
'sampleRate': 16000
}

Expand All @@ -22,7 +22,7 @@ config['micAudioProcessorConfig'] = micAudioProcessorConfig

var featureExtractionConfig = {
'melBands': 40, // n_mels (only used for Meyda)
'hopSize': config.sampleRate / 1000 * config.hopSize
'hopSize': config.sampleRate * 0.0125 // hop by 12.5 ms
}


Expand Down
116 changes: 0 additions & 116 deletions common/featureExtractor.js

This file was deleted.

10 changes: 3 additions & 7 deletions common/inferenceEngine.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@

class InferenceEngine {
constructor(config) {
this.inference_window_ms = config.inferenceEngineConfig.inference_window_ms
this.inference_window_ms = config.inferenceEngineConfig.inference_window_ms // how much we look for phrase detection
this.smoothing_window_ms = config.inferenceEngineConfig.smoothing_window_ms
this.tolerance_window_ms = config.inferenceEngineConfig.tolerance_window_ms
this.tolerance_window_ms = config.inferenceEngineConfig.tolerance_window_ms // time period where false detections are acceptable for
this.inference_weights = config.inferenceEngineConfig.inference_weights
this.inference_sequence = config.inferenceEngineConfig.inference_sequence

Expand Down Expand Up @@ -127,10 +126,7 @@ class InferenceEngine {
let label = this.getPrediction(d.getTime());
let command = this.commands[label];

// let raw_pred = this.argmax(pred)
// console.log(this.commands[raw_pred], command)

return command
return command;
}
}

Expand Down
40 changes: 15 additions & 25 deletions common/micAudioProcessor.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
let micProc;
let _micProc;

class MicAudioProcessor {
constructor(config) {
micProc = this;
_micProc = this;

this.sampleRate = config.sampleRate;
this.windowSize = config.windowSize * this.sampleRate; // convert from s to n_samples
Expand All @@ -21,21 +21,14 @@ class MicAudioProcessor {
}

this.audioContext = new AudioContext();

this.browserSampleRate = this.audioContext.sampleRate; // 44100
this.paddingSize = config.micAudioProcessorConfig.paddingSize
this.srcBufferSize = 1024;
// with buffer size of 1024, we can capture 44032 features for original sample rate of 44100
// once audio of 44100 features is down sampled to 16000 features,
// resulting number of features is 15953


this.paddingSize = config.micAudioProcessorConfig.paddingSize

// To be used when meyda is removed
// this.paddingSize = 512;

this.initDownSampleNode();
// this.featureExtractor = new FeatureExtractor(config);
this.data = [];
}

Expand All @@ -44,24 +37,24 @@ class MicAudioProcessor {

var successCallback = function (micStream) {
console.log('User allowed microphone access.');
micProc.micSource = micProc.audioContext.createMediaStreamSource(micStream);
micProc.micSource.connect(micProc.downSampleNode);
micProc.downSampleNode.connect(micProc.audioContext.destination);
_micProc.micSource = _micProc.audioContext.createMediaStreamSource(micStream);
_micProc.micSource.connect(_micProc.downSampleNode);
_micProc.downSampleNode.connect(_micProc.audioContext.destination);
visualizer({
parent: "#waveform",
stream: micStream
});

if (micProc.audioContext.state == "suspended") {
if (_micProc.audioContext.state == "suspended") {
// audio context start suspended on Chrome due to auto play policy
micProc.audioContext.resume();
_micProc.audioContext.resume();
}
micProc.permissionDeferred.resolve();
_micProc.permissionDeferred.resolve();
};

var errorCallback = function (err) {
console.log('Initializing microphone has failed. Falling back to default audio file', err);
micProc.permissionDeferred.reject();
_micProc.permissionDeferred.reject();
};

try {
Expand Down Expand Up @@ -106,21 +99,18 @@ class MicAudioProcessor {

this.downSampleNode.onaudioprocess = function(audioProcessingEvent) {
var inputData = audioProcessingEvent.inputBuffer.getChannelData(0);
var downSampledData = interpolateArray(inputData, micProc.downSampledBufferSize);

// micProc.featureExtractor.appendData(downSampledData);
var downSampledData = interpolateArray(inputData, _micProc.downSampledBufferSize);

micProc.data = micProc.data.concat(downSampledData);
_micProc.data = _micProc.data.concat(downSampledData);

// always keep last window
if (micProc.data.length > micProc.windowSize + micProc.paddingSize) {
micProc.data.splice(0, micProc.data.length - (micProc.windowSize + micProc.paddingSize));
// always keep the last window
if (_micProc.data.length > _micProc.windowSize + _micProc.paddingSize) {
_micProc.data.splice(0, _micProc.data.length - (_micProc.windowSize + _micProc.paddingSize));
}
}
}

getData() {
// return this.featureExtractor.extract();
return this.data;
}
}
Loading

0 comments on commit 76827a9

Please sign in to comment.