From 4fb2e077b7fd51ca3bc97ccf287988372f63c152 Mon Sep 17 00:00:00 2001 From: AssemblyAI Date: Mon, 24 Jun 2024 15:38:39 +0300 Subject: [PATCH] Project import generated by Copybara. GitOrigin-RevId: daf91e03013775692acc0e79d886a060228c6a02 --- CHANGELOG.md | 4 + samples/.prettierrc.json | 3 + samples/streaming-stt-from-mic/index.js | 54 ++++++------ samples/streaming-stt-from-mic/index.ts | 54 ++++++------ samples/streaming-stt-from-mic/sox.js | 89 ++++++++++--------- samples/streaming-stt-from-mic/sox.ts | 109 ++++++++++++------------ src/services/realtime/service.ts | 65 ++++++++++++++ 7 files changed, 224 insertions(+), 154 deletions(-) create mode 100644 samples/.prettierrc.json diff --git a/CHANGELOG.md b/CHANGELOG.md index cb74f8c..82892f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [4.5.1] + +- Add more TSDoc comments for `RealtimeService` documentation + ## [4.5.0] - You can now retrieve previous LeMUR responses using `client.lemur.getResponse("YOUR_REQUEST_ID")`. diff --git a/samples/.prettierrc.json b/samples/.prettierrc.json new file mode 100644 index 0000000..cce9d3c --- /dev/null +++ b/samples/.prettierrc.json @@ -0,0 +1,3 @@ +{ + "semi": false +} diff --git a/samples/streaming-stt-from-mic/index.js b/samples/streaming-stt-from-mic/index.js index 8367e7c..f7b29fb 100644 --- a/samples/streaming-stt-from-mic/index.js +++ b/samples/streaming-stt-from-mic/index.js @@ -1,47 +1,47 @@ -import "dotenv/config"; -import { AssemblyAI } from "assemblyai"; -import { SoxRecording } from "./sox.js"; -const SAMPLE_RATE = 16000; +import "dotenv/config" +import { AssemblyAI } from "assemblyai" +import { SoxRecording } from "./sox.js" +const SAMPLE_RATE = 16000 const client = new AssemblyAI({ apiKey: process.env.ASSEMBLYAI_API_KEY, -}); +}) const transcriber = client.realtime.transcriber({ sampleRate: SAMPLE_RATE, -}); +}) transcriber.on("open", ({ sessionId }) => { - console.log(`Session opened with ID: ${sessionId}`); -}); + console.log(`Session opened with ID: ${sessionId}`) +}) transcriber.on("error", (error) => { - console.error("Error:", error); -}); + console.error("Error:", error) +}) transcriber.on("close", (code, reason) => console.log("Session closed:", code, reason), -); +) transcriber.on("transcript", (transcript) => { if (!transcript.text) { - return; + return } if (transcript.message_type === "PartialTranscript") { - console.log("Partial:", transcript.text); + console.log("Partial:", transcript.text) } else { - console.log("Final:", transcript.text); + console.log("Final:", transcript.text) } -}); -console.log("Connecting to real-time transcript service"); -await transcriber.connect(); -console.log("Starting recording"); +}) +console.log("Connecting to real-time transcript service") +await transcriber.connect() +console.log("Starting recording") const recording = new SoxRecording({ channels: 1, sampleRate: SAMPLE_RATE, audioType: "wav", // Linear PCM -}); -recording.stream().pipeTo(transcriber.stream()); +}) +recording.stream().pipeTo(transcriber.stream()) // Stop recording and close connection using Ctrl-C. process.on("SIGINT", async function () { - console.log(); - console.log("Stopping recording"); - recording.stop(); - console.log("Closing real-time transcript connection"); - await transcriber.close(); - process.exit(); -}); + console.log() + console.log("Stopping recording") + recording.stop() + console.log("Closing real-time transcript connection") + await transcriber.close() + process.exit() +}) diff --git a/samples/streaming-stt-from-mic/index.ts b/samples/streaming-stt-from-mic/index.ts index e546637..c40c031 100644 --- a/samples/streaming-stt-from-mic/index.ts +++ b/samples/streaming-stt-from-mic/index.ts @@ -1,61 +1,61 @@ -import "dotenv/config"; -import { AssemblyAI, RealtimeTranscript } from "assemblyai"; -import { SoxRecording } from "./sox.js"; +import "dotenv/config" +import { AssemblyAI, RealtimeTranscript } from "assemblyai" +import { SoxRecording } from "./sox.js" -const SAMPLE_RATE = 16_000; +const SAMPLE_RATE = 16_000 const client = new AssemblyAI({ apiKey: process.env.ASSEMBLYAI_API_KEY!, -}); +}) const transcriber = client.realtime.transcriber({ sampleRate: SAMPLE_RATE, -}); +}) transcriber.on("open", ({ sessionId }) => { - console.log(`Session opened with ID: ${sessionId}`); -}); + console.log(`Session opened with ID: ${sessionId}`) +}) transcriber.on("error", (error: Error) => { - console.error("Error:", error); -}); + console.error("Error:", error) +}) transcriber.on("close", (code: number, reason: string) => console.log("Session closed:", code, reason), -); +) transcriber.on("transcript", (transcript: RealtimeTranscript) => { if (!transcript.text) { - return; + return } if (transcript.message_type === "PartialTranscript") { - console.log("Partial:", transcript.text); + console.log("Partial:", transcript.text) } else { - console.log("Final:", transcript.text); + console.log("Final:", transcript.text) } -}); +}) -console.log("Connecting to real-time transcript service"); -await transcriber.connect(); +console.log("Connecting to real-time transcript service") +await transcriber.connect() -console.log("Starting recording"); +console.log("Starting recording") const recording = new SoxRecording({ channels: 1, sampleRate: SAMPLE_RATE, audioType: "wav", // Linear PCM -}); +}) -recording.stream().pipeTo(transcriber.stream()); +recording.stream().pipeTo(transcriber.stream()) // Stop recording and close connection using Ctrl-C. process.on("SIGINT", async function () { - console.log(); - console.log("Stopping recording"); - recording.stop(); + console.log() + console.log("Stopping recording") + recording.stop() - console.log("Closing real-time transcript connection"); - await transcriber.close(); + console.log("Closing real-time transcript connection") + await transcriber.close() - process.exit(); -}); + process.exit() +}) diff --git a/samples/streaming-stt-from-mic/sox.js b/samples/streaming-stt-from-mic/sox.js index c9028d4..f75e659 100644 --- a/samples/streaming-stt-from-mic/sox.js +++ b/samples/streaming-stt-from-mic/sox.js @@ -1,16 +1,16 @@ // This code is a simplified and typed version adapted from the 'node-record-lpcm16' project by Gilles De Mey. // Original source code: https://github.com/gillesdemey/node-record-lpcm16 -import { ok as assert } from "assert"; -import { spawn } from "child_process"; -import { Readable } from "stream"; +import { ok as assert } from "assert" +import { spawn } from "child_process" +import { Readable } from "stream" const debug = !!process.env.DEBUG && process.env.DEBUG.indexOf("record") !== -1 ? console.debug - : () => {}; + : () => {} export class SoxRecording { - options; - process; - soxStream; + options + process + soxStream constructor(options = {}) { const defaults = { sampleRate: 16000, @@ -21,14 +21,14 @@ export class SoxRecording { recorder: "sox", endOnSilence: false, audioType: "wav", - }; - this.options = Object.assign(defaults, options); - debug("Started recording"); - debug(this.options); - return this.start(); + } + this.options = Object.assign(defaults, options) + debug("Started recording") + debug(this.options) + return this.start() } start() { - const cmd = "sox"; + const cmd = "sox" const args = [ "--default-device", "--no-show-progress", @@ -43,58 +43,57 @@ export class SoxRecording { "--type", this.options.audioType, "-", // pipe - ]; - debug(` ${cmd} ${args.join(" ")}`); + ] + debug(` ${cmd} ${args.join(" ")}`) const cp = spawn(cmd, args, { - encoding: "binary", stdio: "pipe", - }); - const rec = cp.stdout; - const err = cp.stderr; - this.process = cp; // expose child process - this.soxStream = cp.stdout; // expose output stream + }) + const rec = cp.stdout + const err = cp.stderr + this.process = cp // expose child process + this.soxStream = cp.stdout // expose output stream cp.on("close", (code) => { - if (code === 0) return; + if (code === 0) return rec?.emit( "error", `${cmd} has exited with error code ${code}. Enable debugging with the environment variable debug=record.`, - ); - }); + ) + }) err?.on("data", (chunk) => { - debug(`STDERR: ${chunk}`); - }); + debug(`STDERR: ${chunk}`) + }) rec?.on("data", (chunk) => { - debug(`Recording ${chunk.length} bytes`); - }); + debug(`Recording ${chunk.length} bytes`) + }) rec?.on("end", () => { - debug("Recording ended"); - }); - return this; + debug("Recording ended") + }) + return this } stop() { - assert(this.process, "Recording not yet started"); - this.process.kill(); + assert(this.process, "Recording not yet started") + this.process.kill() } pause() { - assert(this.process, "Recording not yet started"); - this.process.kill("SIGSTOP"); - this.soxStream?.pause(); - debug("Paused recording"); + assert(this.process, "Recording not yet started") + this.process.kill("SIGSTOP") + this.soxStream?.pause() + debug("Paused recording") } resume() { - assert(this.process, "Recording not yet started"); - this.process.kill("SIGCONT"); - this.soxStream?.resume(); - debug("Resumed recording"); + assert(this.process, "Recording not yet started") + this.process.kill("SIGCONT") + this.soxStream?.resume() + debug("Resumed recording") } isPaused() { - assert(this.process, "Recording not yet started"); - return this.soxStream?.isPaused(); + assert(this.process, "Recording not yet started") + return this.soxStream?.isPaused() } stream() { - assert(this?.soxStream, "Recording not yet started"); - return Readable.toWeb(this?.soxStream); + assert(this?.soxStream, "Recording not yet started") + return Readable.toWeb(this?.soxStream) } } diff --git a/samples/streaming-stt-from-mic/sox.ts b/samples/streaming-stt-from-mic/sox.ts index f64f2d1..b15670d 100644 --- a/samples/streaming-stt-from-mic/sox.ts +++ b/samples/streaming-stt-from-mic/sox.ts @@ -1,30 +1,30 @@ // This code is a simplified and typed version adapted from the 'node-record-lpcm16' project by Gilles De Mey. // Original source code: https://github.com/gillesdemey/node-record-lpcm16 -import { ok as assert } from "assert"; -import { ChildProcess, ExecFileOptions, spawn } from "child_process"; -import { Readable } from "stream"; -import { ReadableStream } from "stream/web"; +import { ok as assert } from "assert" +import { ChildProcess, spawn } from "child_process" +import { Readable } from "stream" +import { ReadableStream } from "stream/web" export type SoxRecordingOptions = { - sampleRate: number; - channels: number; - compress: boolean; - threshold: number; - silence: string; - endOnSilence: boolean; - audioType: string; -}; + sampleRate: number + channels: number + compress: boolean + threshold: number + silence: string + endOnSilence: boolean + audioType: string +} const debug = !!process.env.DEBUG && process.env.DEBUG.indexOf("record") !== -1 ? console.debug - : () => {}; + : () => {} export class SoxRecording { - private options: SoxRecordingOptions; - private process: ChildProcess | undefined; - private soxStream: Readable | null | undefined; + private options: SoxRecordingOptions + private process: ChildProcess | undefined + private soxStream: Readable | null | undefined constructor(options = {}) { const defaults = { @@ -36,18 +36,18 @@ export class SoxRecording { recorder: "sox", endOnSilence: false, audioType: "wav", - }; + } - this.options = Object.assign(defaults, options); + this.options = Object.assign(defaults, options) - debug("Started recording"); - debug(this.options); + debug("Started recording") + debug(this.options) - return this.start(); + return this.start() } start() { - const cmd = "sox"; + const cmd = "sox" const args = [ "--default-device", "--no-show-progress", // show no progress @@ -62,73 +62,72 @@ export class SoxRecording { "--type", this.options.audioType, // audio type "-", // pipe - ]; - debug(` ${cmd} ${args.join(" ")}`); + ] + debug(` ${cmd} ${args.join(" ")}`) const cp = spawn(cmd, args, { - encoding: "binary", stdio: "pipe", - } as ExecFileOptions); - const rec = cp.stdout; - const err = cp.stderr; + }) + const rec = cp.stdout + const err = cp.stderr - this.process = cp; // expose child process - this.soxStream = cp.stdout; // expose output stream + this.process = cp // expose child process + this.soxStream = cp.stdout // expose output stream cp.on("close", (code) => { - if (code === 0) return; + if (code === 0) return rec?.emit( "error", `${cmd} has exited with error code ${code}. Enable debugging with the environment variable debug=record.`, - ); - }); + ) + }) err?.on("data", (chunk) => { - debug(`STDERR: ${chunk}`); - }); + debug(`STDERR: ${chunk}`) + }) rec?.on("data", (chunk) => { - debug(`Recording ${chunk.length} bytes`); - }); + debug(`Recording ${chunk.length} bytes`) + }) rec?.on("end", () => { - debug("Recording ended"); - }); + debug("Recording ended") + }) - return this; + return this } stop() { - assert(this.process, "Recording not yet started"); - this.process.kill(); + assert(this.process, "Recording not yet started") + this.process.kill() } pause() { - assert(this.process, "Recording not yet started"); + assert(this.process, "Recording not yet started") - this.process.kill("SIGSTOP"); - this.soxStream?.pause(); - debug("Paused recording"); + this.process.kill("SIGSTOP") + this.soxStream?.pause() + debug("Paused recording") } resume() { - assert(this.process, "Recording not yet started"); + assert(this.process, "Recording not yet started") - this.process.kill("SIGCONT"); - this.soxStream?.resume(); - debug("Resumed recording"); + this.process.kill("SIGCONT") + this.soxStream?.resume() + debug("Resumed recording") } isPaused() { - assert(this.process, "Recording not yet started"); + assert(this.process, "Recording not yet started") - return this.soxStream?.isPaused(); + return this.soxStream?.isPaused() } - stream(): ReadableStream { - assert(this?.soxStream, "Recording not yet started"); - return Readable.toWeb(this?.soxStream); + stream(): ReadableStream { + assert(this?.soxStream, "Recording not yet started") + return Readable.toWeb(this?.soxStream) } } diff --git a/src/services/realtime/service.ts b/src/services/realtime/service.ts index 2bcfdfc..fc3efe6 100644 --- a/src/services/realtime/service.ts +++ b/src/services/realtime/service.ts @@ -45,6 +45,9 @@ type BufferLike = | { valueOf(): string } | { [Symbol.toPrimitive](hint: string): string }; +/** + * RealtimeTranscriber connects to the Streaming Speech-to-Text API and lets you transcribe audio in real-time. + */ export class RealtimeTranscriber { private realtimeUrl: string; private sampleRate: number; @@ -59,6 +62,10 @@ export class RealtimeTranscriber { private listeners: RealtimeListeners = {}; private sessionTerminatedResolve?: () => void; + /** + * Create a new RealtimeTranscriber. + * @param params - Parameters to configure the RealtimeTranscriber + */ constructor(params: RealtimeTranscriberParams) { this.realtimeUrl = params.realtimeUrl ?? defaultRealtimeUrl; this.sampleRate = params.sampleRate ?? 16_000; @@ -106,30 +113,75 @@ export class RealtimeTranscriber { return url; } + /** + * Listen for the open event which is emitted when the connection is established and the session begins. + * @param event - The open event. + * @param listener - The function to call when the event is emitted. + */ on(event: "open", listener: (event: SessionBeginsEventData) => void): void; + /** + * Listen for the transcript event which is emitted when a partian or final transcript is received. + * @param event - The transcript event. + * @param listener - The function to call when the event is emitted. + */ on( event: "transcript", listener: (transcript: RealtimeTranscript) => void, ): void; + /** + * Listen for the partial transcript event which is emitted when a partial transcript is received. + * @param event - The partial transcript event. + * @param listener - The function to call when the event is emitted. + */ on( event: "transcript.partial", listener: (transcript: PartialTranscript) => void, ): void; + /** + * Listen for the final transcript event which is emitted when a final transcript is received. + * @param event - The final transcript event. + * @param listener - The function to call when the event is emitted. + */ on( event: "transcript.final", listener: (transcript: FinalTranscript) => void, ): void; + /** + * Listen for the session information event which is emitted when session information is received. + * The session information is sent right before the session is terminated. + * @param event - The session information event. + * @param listener - The function to call when the event is emitted. + */ on( event: "session_information", listener: (info: SessionInformation) => void, ): void; + /** + * Listen for the error event which is emitted when an error occurs. + * @param event - The error event. + * @param listener - The function to call when the event is emitted. + */ on(event: "error", listener: (error: Error) => void): void; + /** + * Listen for the close event which is emitted when the connection is closed. + * @param event - The close event. + * @param listener - The function to call when the event is emitted. + */ on(event: "close", listener: (code: number, reason: string) => void): void; + /** + * Add a listener for an event. + * @param event - The event to listen for. + * @param listener - The function to call when the event is emitted. + */ // eslint-disable-next-line @typescript-eslint/no-explicit-any on(event: RealtimeEvents, listener: (...args: any[]) => void) { this.listeners[event] = listener; } + /** + * Connect to the server and begin a new session. + * @returns A promise that resolves when the connection is established and the session begins. + */ connect() { return new Promise((resolve) => { if (this.socket) { @@ -216,10 +268,18 @@ export class RealtimeTranscriber { }); } + /** + * Send audio data to the server. + * @param audio - The audio data to send to the server. + */ sendAudio(audio: AudioData) { this.send(audio); } + /** + * Create a writable stream that can be used to send audio data to the server. + * @returns A writable stream that can be used to send audio data to the server. + */ stream(): WritableStream { return new WritableStream({ write: (chunk: AudioData) => { @@ -251,6 +311,11 @@ export class RealtimeTranscriber { this.socket.send(data); } + /** + * Close the connection to the server. + * @param waitForSessionTermination - If true, the method will wait for the session to be terminated before closing the connection. + * While waiting for the session to be terminated, you will receive the final transcript and session information. + */ async close(waitForSessionTermination = true) { if (this.socket) { if (this.socket.readyState === this.socket.OPEN) {