Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add TTS Live Client #306

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
44 changes: 44 additions & 0 deletions examples/node-speak-live/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
const { createClient, LiveTTSEvents } = require("../../dist/main/index");
const fetch = require("cross-fetch");

const live = async () => {
const text = "Hello, how can I help you today?";

const deepgram = createClient(process.env.DEEPGRAM_API_KEY, {
global: { fetch: { options: { url: "https://api.beta.deepgram.com" } } },
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
});

const connection = deepgram.speak.live({ text }, { model: "aura-asteria-en" });
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved

connection.on(LiveTTSEvents.Open, () => {
connection.on(LiveTTSEvents.Close, () => {
console.log("Connection closed.");
});

connection.on(LiveTTSEvents.Metadata, (data) => {
console.log(`Deepgram Metadata: ${data}`);
});

connection.on(LiveTTSEvents.Audio, (data) => {
console.log(`Deepgram Audio: ${data}`);
});

connection.on(LiveTTSEvents.Flushed, (data) => {
console.log("Deepgram Flushed");
});

connection.on(LiveTTSEvents.Error, (err) => {
console.error(err);
});

fetch(url)
.then((r) => r.body)
.then((res) => {
res.on("readable", () => {
connection.send(res.read());
});
});
});
};

live();
36 changes: 36 additions & 0 deletions src/lib/enums/LiveTTSEvents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Enumeration of events related to live text-to-speech synthesis.
*
* - `Open`: Built-in socket event for when the connection is opened.
* - `Close`: Built-in socket event for when the connection is closed.
* - `Error`: Built-in socket event for when an error occurs.
* - `Metadata`: Event for when metadata is received.
* - `Flushed`: Event for when the server has flushed the buffer.
* - `Warning`: Event for when a warning is received.
* - `Unhandled`: Catch-all event for any other message event.
*/
export enum LiveTTSEvents {
/**
* Built in socket events.
*/
Open = "Open",
Close = "Close",
Error = "Error",

/**
* Message { type: string }
*/
Metadata = "Metadata",
Flushed = "Flushed",
Warning = "Warning",

/**
* Audio data event.
*/
Audio = "Audio",

/**
* Catch all for any other message event
*/
Unhandled = "Unhandled",
}
1 change: 1 addition & 0 deletions src/lib/enums/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from "./LiveConnectionState";
export * from "./LiveTranscriptionEvents";
export * from "./LiveTTSEvents";
43 changes: 43 additions & 0 deletions src/packages/AbstractLiveClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { AbstractClient, noop } from "./AbstractClient";
import { CONNECTION_STATE, SOCKET_STATES } from "../lib/constants";
import type { DeepgramClientOptions, LiveSchema } from "../lib/types";
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
import type { WebSocket as WSWebSocket } from "ws";
import { LiveTTSEvents } from "../lib/enums";

/**
* Represents a constructor for a WebSocket-like object that can be used in the application.
Expand Down Expand Up @@ -258,6 +259,48 @@ export abstract class AbstractLiveClient extends AbstractClient {
* @abstract Requires subclasses to set up context aware event handlers.
*/
abstract setupConnection(): void;

/**
* Handles incoming messages from the WebSocket connection.
* @param event - The MessageEvent object representing the received message.
*/
protected handleMessage(event: MessageEvent): void {
if (typeof event.data === "string") {
try {
const data = JSON.parse(event.data);
this.handleTextMessage(data);
} catch (error) {
this.emit(LiveTTSEvents.Error, {
event,
message: "Unable to parse `data` as JSON.",
error,
});
}
} else if (event.data instanceof ArrayBuffer) {
this.handleBinaryMessage(event.data);
} else {
this.emit(LiveTTSEvents.Error, {
event,
message: "Received unknown data type.",
});
}
}

/**
* Handles text messages received from the WebSocket connection.
* @param data - The parsed JSON data.
*/
protected handleTextMessage(data: any): void {
// To be implemented by subclasses
}

/**
* Handles binary messages received from the WebSocket connection.
* @param data - The binary data.
*/
protected handleBinaryMessage(data: ArrayBuffer): void {
// To be implemented by subclasses
}
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
}

class WSWebSocketDummy {
Expand Down
32 changes: 32 additions & 0 deletions src/packages/SpeakClient.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { AbstractClient } from "./AbstractClient";
import { SpeakLiveClient } from "./SpeakLiveClient";
import { SpeakRestClient } from "./SpeakRestClient";
import { SpeakSchema } from "../lib/types";

/**
* The `SpeakClient` class extends the `AbstractClient` class and provides access to the "speak" namespace.
* It exposes two methods:
*
* 1. `request()`: Returns a `SpeakRestClient` instance for interacting with the rest speak API.
* 2. `live(ttsOptions: SpeakSchema = {}, endpoint = ":version/speak")`: Returns a `SpeakLiveClient` instance for interacting with the live speak API, with the provided TTS options and endpoint.
*/
export class SpeakClient extends AbstractClient {
public namespace: string = "speak";

/**
* Returns a `SpeakRestClient` instance for interacting with the rest speak API.
*/
get request() {
return new SpeakRestClient(this.options);
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Returns a `SpeakLiveClient` instance for interacting with the live speak API, with the provided TTS options and endpoint.
* @param {SpeakSchema} [ttsOptions={}] - The TTS options to use for the live speak API.
* @param {string} [endpoint=":version/speak"] - The endpoint to use for the live speak API.
* @returns {SpeakLiveClient} - A `SpeakLiveClient` instance for interacting with the live speak API.
*/
public live(ttsOptions: SpeakSchema = {}, endpoint: string = ":version/speak"): SpeakLiveClient {
return new SpeakLiveClient(this.options, ttsOptions, endpoint);
}
}
136 changes: 136 additions & 0 deletions src/packages/SpeakLiveClient.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import { AbstractLiveClient } from "./AbstractLiveClient";
import { LiveTTSEvents } from "../lib/enums";
import type { SpeakSchema, DeepgramClientOptions } from "../lib/types";

/**
* The `SpeakLiveClient` class extends the `AbstractLiveClient` class and provides functionality for setting up and managing a WebSocket connection for live text-to-speech synthesis.
*
* The constructor takes in `DeepgramClientOptions` and an optional `SpeakSchema` object, as well as an optional `endpoint` string. It then calls the `connect` method of the parent `AbstractLiveClient` class to establish the WebSocket connection.
*
* The `setupConnection` method is responsible for handling the various events that can occur on the WebSocket connection, such as opening, closing, and receiving messages. It sets up event handlers for these events and emits the appropriate events based on the message type.
*
* The `configure` method allows you to send additional configuration options to the connected session.
*
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
*
* The `requestClose` method requests the server to close the connection.
*
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
*/
export class SpeakLiveClient extends AbstractLiveClient {
public namespace: string = "speak";

/**
* Constructs a new `SpeakLiveClient` instance with the provided options.
*
* @param options - The `DeepgramClientOptions` to use for the client connection.
* @param speakOptions - An optional `SpeakSchema` object containing additional configuration options for the text-to-speech.
* @param endpoint - An optional string representing the WebSocket endpoint to connect to. Defaults to `:version/speak`.
*/
constructor(
options: DeepgramClientOptions,
speakOptions: SpeakSchema = {},
endpoint: string = ":version/speak"
) {
super(options);

this.connect(speakOptions, endpoint);
}

/**
* Sets up the connection event handlers.
* This method is responsible for handling the various events that can occur on the WebSocket connection, such as opening, closing, and receiving data.
* - When the connection is opened, it emits the `LiveTTSEvents.Open` event.
* - When the connection is closed, it emits the `LiveTTSEvents.Close` event.
* - When an error occurs on the connection, it emits the `LiveTTSEvents.Error` event.
* - When a message is received, it parses the message and emits the appropriate event based on the message type, such as `LiveTTSEvents.Metadata`, `LiveTTSEvents.Flushed`, and `LiveTTSEvents.Warning`.
*/
public setupConnection(): void {
if (this.conn) {
this.conn.onopen = () => {
this.emit(LiveTTSEvents.Open, this);
};

this.conn.onclose = (event: any) => {
this.emit(LiveTTSEvents.Close, event);
};

this.conn.onerror = (event: ErrorEvent) => {
this.emit(LiveTTSEvents.Error, event);
};

this.conn.onmessage = (event: MessageEvent) => {
this.handleMessage(event);
};
}
}

/**
* Handles text messages received from the WebSocket connection.
* @param data - The parsed JSON data.
*/
protected handleTextMessage(data: any): void {
if (data.type === LiveTTSEvents.Metadata) {
this.emit(LiveTTSEvents.Metadata, data);
} else if (data.type === LiveTTSEvents.Flushed) {
this.emit(LiveTTSEvents.Flushed, data);
} else if (data.type === LiveTTSEvents.Warning) {
this.emit(LiveTTSEvents.Warning, data);
} else {
this.emit(LiveTTSEvents.Unhandled, data);
}
}

/**
* Handles binary messages received from the WebSocket connection.
* @param data - The binary data.
*/
protected handleBinaryMessage(data: ArrayBuffer): void {
this.emit(LiveTTSEvents.Audio, data);
}

/**
* Sends a text input message to the server.
*
* @param {string} text - The text to convert to speech.
*/
public sendText(text: string): void {
this.send(
JSON.stringify({
type: "Speak",
text,
})
);
}

/**
* Requests the server flush the current buffer and return generated audio.
*/
public flush(): void {
this.send(
JSON.stringify({
type: "Flush",
})
);
}

/**
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
* Requests the server reset the current buffer.
*/
public reset(): void {
this.send(
JSON.stringify({
type: "Reset",
})
);
}

/**
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
* Requests the server close the connection.
*/
public requestClose(): void {
this.send(
JSON.stringify({
type: "Close",
})
);
}
}
2 changes: 0 additions & 2 deletions src/packages/SpeakRestClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,3 @@ export class SpeakRestClient extends AbstractRestClient {
return this.result.headers;
}
}

export { SpeakRestClient as SpeakClient };
2 changes: 1 addition & 1 deletion src/packages/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ export * from "./ListenRestClient";
export * from "./ManageRestClient";
export * from "./ReadRestClient";
export * from "./SelfHostedRestClient";
export * from "./SpeakRestClient";
SandraRodgers marked this conversation as resolved.
Show resolved Hide resolved
export * from "./SpeakClient";
Loading