Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2.1 node #364

Merged
merged 2 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/nodejs-demos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ jobs:
with:
node-version: ${{ matrix.node-version }}

# ************** REMOVE AFTER RELEASE ********************
- name: Build Local Packages
run: yarn && yarn build
working-directory: binding/nodejs
# ********************************************************

- name: Install dependencies
run: yarn install

Expand All @@ -55,6 +61,12 @@ jobs:
steps:
- uses: actions/checkout@v3

# ************** REMOVE AFTER RELEASE ********************
- name: Build Local Packages
run: yarn && yarn build
working-directory: binding/nodejs
# ********************************************************

- name: Install dependencies
run: yarn install

Expand Down
8 changes: 8 additions & 0 deletions binding/nodejs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ while (true) {
Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/). Finally, when done be sure to explicitly release the resources using
`handle.release()`.

### Language Model

The Cheetah Node.js SDK comes preloaded with a default English language model (`.pv` file).
Default models for other supported languages can be found in [lib/common](../../lib/common).

Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train
language models with custom vocabulary and boost words in the existing vocabulary.

## Demos

[Cheetah Node.js demo package](https://www.npmjs.com/package/@picovoice/cheetah-node-demo) provides command-line utilities for processing audio using cheetah.
2 changes: 1 addition & 1 deletion binding/nodejs/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@picovoice/cheetah-node",
"version": "2.0.3",
"version": "2.1.0",
"description": "Picovoice Cheetah Node.js binding",
"main": "dist/index.js",
"types": "dist/types/index.d.ts",
Expand Down
87 changes: 48 additions & 39 deletions binding/nodejs/test/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2022-2023 Picovoice Inc.
// Copyright 2022-2024 Picovoice Inc.
//
// You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
// file accompanying this source.
Expand All @@ -20,17 +20,12 @@ import { WaveFile } from 'wavefile';
import { getSystemLibraryPath } from '../src/platforms';

import {
TRANSCRIPT,
getModelPathByLanguage,
getAudioFile,
getModelPath,
getTestParameters,
getLanguageTestParameters,
} from './test_utils';

const MODEL_PATH = getModelPath();
const TEST_PARAMETERS = getTestParameters();
const WAV_PATH = "test.wav";

const libraryPath = getSystemLibraryPath();
const LANGUAGE_TEST_PARAMETERS = getLanguageTestParameters();

const ACCESS_KEY = process.argv
.filter(x => x.startsWith('--access_key='))[0]
Expand Down Expand Up @@ -76,8 +71,7 @@ const loadPcm = (audioFile: string): Int16Array => {
const waveBuffer = fs.readFileSync(waveFilePath);
const waveAudioFile = new WaveFile(waveBuffer);

const pcm: any = waveAudioFile.getSamples(false, Int16Array);
return pcm;
return waveAudioFile.getSamples(false, Int16Array) as any;
};

const cheetahProcessWaveFile = (
Expand All @@ -101,60 +95,73 @@ const cheetahProcessWaveFile = (


const testCheetahProcess = (
_: string,
transcript: string,
testPunctuation: boolean,
language: string,
audioFile: string,
referenceTranscript: string,
punctuations: string[],
enableAutomaticPunctuation: boolean,
errorRate: number,
audioFile: string
) => {
const modelPath = getModelPathByLanguage(language);

let cheetahEngine = new Cheetah(ACCESS_KEY, {
enableAutomaticPunctuation: testPunctuation,
modelPath,
enableAutomaticPunctuation,
});

let [res, __] = cheetahProcessWaveFile(cheetahEngine, audioFile);
let [transcript] = cheetahProcessWaveFile(cheetahEngine, audioFile);

let normalizedTranscript = referenceTranscript;
if (!enableAutomaticPunctuation) {
for (const punctuation of punctuations) {
normalizedTranscript = normalizedTranscript.replace(punctuation, "");
}
}

expect(
characterErrorRate(res, transcript) < errorRate
characterErrorRate(transcript, normalizedTranscript) < errorRate
).toBeTruthy();

cheetahEngine.release();
};

describe('successful processes', () => {
it.each(TEST_PARAMETERS)(
it.each(LANGUAGE_TEST_PARAMETERS)(
'testing process `%p`',
(
language: string,
audioFile: string,
transcript: string,
_: string,
punctuations: string[],
errorRate: number,
audioFile: string
) => {
testCheetahProcess(
language,
audioFile,
transcript,
punctuations,
false,
errorRate,
audioFile
);
}
);

it.each(TEST_PARAMETERS)(
'testing process `%p` with punctuation',
it.each(LANGUAGE_TEST_PARAMETERS)(
'testing process `%p` with punctuation',
(
language: string,
_: string,
transcriptWithPunctuation: string,
audioFile: string,
transcript: string,
punctuations: string[],
errorRate: number,
audioFile: string
) => {
testCheetahProcess(
language,
transcriptWithPunctuation,
audioFile,
transcript,
punctuations,
true,
errorRate,
audioFile
);
}
);
Expand All @@ -170,30 +177,32 @@ describe('Defaults', () => {

describe('manual paths', () => {
test('manual model path', () => {
let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: MODEL_PATH });
let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: getModelPathByLanguage("en") });

let [transcript, _] = cheetahProcessWaveFile(
let [transcript] = cheetahProcessWaveFile(
cheetahEngine,
WAV_PATH
"test.wav"
);

expect(transcript).toBe(TRANSCRIPT);
expect(transcript.length).toBeGreaterThan(0);
cheetahEngine.release();
});

test('manual model and library path', () => {
const libraryPath = getSystemLibraryPath();

let cheetahEngine = new Cheetah(ACCESS_KEY, {
modelPath: MODEL_PATH,
modelPath: getModelPathByLanguage("en"),
libraryPath: libraryPath,
endpointDurationSec: 0.2,
});

let [transcript, _] = cheetahProcessWaveFile(
let [transcript] = cheetahProcessWaveFile(
cheetahEngine,
WAV_PATH
"test.wav"
);

expect(transcript).toBe(TRANSCRIPT);
expect(transcript.length).toBeGreaterThan(0);
cheetahEngine.release();
});
});
Expand All @@ -202,7 +211,7 @@ describe("error message stack", () => {
test("message stack cleared after read", () => {
let error: string[] = [];
try {
new Cheetah('invalid', { modelPath: MODEL_PATH });
new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") });
} catch (e: any) {
error = e.messageStack;
}
Expand All @@ -211,7 +220,7 @@ describe("error message stack", () => {
expect(error.length).toBeLessThanOrEqual(8);

try {
new Cheetah('invalid', { modelPath: MODEL_PATH });
new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") });
} catch (e: any) {
for (let i = 0; i < error.length; i++) {
expect(error[i]).toEqual(e.messageStack[i]);
Expand Down
61 changes: 49 additions & 12 deletions binding/nodejs/test/test_utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2023 Picovoice Inc.
// Copyright 2024 Picovoice Inc.
//
// You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
// file accompanying this source.
Expand All @@ -8,34 +8,71 @@
// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
import * as fs from 'fs';
import * as path from 'path';

const ROOT_DIR = path.join(__dirname, '../../..');
const TEST_DATA_JSON = require(path.join(
ROOT_DIR,
'resources/.test/test_data.json'
));
const MB_40 = 1024 * 1024 * 40;

export const TRANSCRIPT =
'Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel';
export const TRANSCRIPT_WITH_PUNCTUATION =
'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.';
function appendLanguage(s: string, language: string): string {
if (language === 'en') {
return s;
}
return s + '_' + language;
}

export function getModelPath(): string {
export function getModelPathByLanguage(language: string): string {
return path.join(
ROOT_DIR,
`lib/common/cheetah_params.pv`
`${appendLanguage('lib/common/cheetah_params', language)}.pv`
);
}

export function getAudioFile(audioFile: string): string {
return path.join(ROOT_DIR, 'resources/audio_samples', audioFile);
}

export function getTestParameters(): [
function getCpuPart(): string {
if (!fs.existsSync('/proc/cpuinfo')) {
return "";
}
const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'ascii');
for (const infoLine of cpuInfo.split('\n')) {
if (infoLine.includes('CPU part')) {
const infoLineSplit = infoLine.split(' ');
return infoLineSplit[infoLineSplit.length - 1].toLowerCase();
}
}
return "";
}

function getModelSize(language: string): number {
const modelPath = getModelPathByLanguage(language);
const stats = fs.statSync(modelPath);
return stats.size;
}

export function getLanguageTestParameters(): [
string,
string,
string,
string[],
number,
string
][] {
return [
["en", TRANSCRIPT, TRANSCRIPT_WITH_PUNCTUATION, 0.025, "test.wav"]
];
const cpuPart = getCpuPart();
let parametersJson = TEST_DATA_JSON.tests.language_tests;
if (cpuPart === "0xd03") {
parametersJson = parametersJson.filter((x: any) => (getModelSize(x.language) < MB_40));
}
return parametersJson.map((x: any) => [
x.language,
x.audio_file,
x.transcript,
x.punctuations,
x.error_rate,
]);
}
2 changes: 1 addition & 1 deletion demo/nodejs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"author": "Picovoice Inc.",
"license": "Apache-2.0",
"dependencies": {
"@picovoice/cheetah-node": "=2.0.3",
"@picovoice/cheetah-node": "../../binding/nodejs",
"@picovoice/pvrecorder-node": "^1.2.4",
"commander": "^6.1.0",
"readline": "^1.3.0",
Expand Down
6 changes: 2 additions & 4 deletions demo/nodejs/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
# yarn lockfile v1


"@picovoice/cheetah-node@=2.0.3":
version "2.0.3"
resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.0.3.tgz#6b426ded58c2cf21e82a3282582f46698f3ddc32"
integrity sha512-BqcDV72PhjE41GQohlnfu/1xr52QTSMlpo504tTY+JgUHcoHnwT0jEp0AbpZgdXLIexYgH/dzUt8Ls12yXyCgQ==
"@picovoice/cheetah-node@../../binding/nodejs":
version "2.1.0"

"@picovoice/pvrecorder-node@^1.2.4":
version "1.2.4"
Expand Down
Loading