feat: add SpeakLiveClient and LiveTTSEvents

deepgram · Jul 3, 2024 · 9f8fd1e · 9f8fd1e
1 parent 931b749
commit 9f8fd1e
Show file tree

Hide file tree

Showing 3 changed files with 162 additions and 0 deletions.
diff --git a/src/lib/enums/LiveTTSEvents.ts b/src/lib/enums/LiveTTSEvents.ts
@@ -0,0 +1,31 @@
+/**
+ * Enumeration of events related to live text-to-speech synthesis.
+ *
+ * - `Open`: Built-in socket event for when the connection is opened.
+ * - `Close`: Built-in socket event for when the connection is closed.
+ * - `Error`: Built-in socket event for when an error occurs.
+ * - `Metadata`: Event for when metadata is received.
+ * - `Flushed`: Event for when the server has flushed the buffer.
+ * - `Warning`: Event for when a warning is received.
+ * - `Unhandled`: Catch-all event for any other message event.
+ */
+export enum LiveTTSEvents {
+  /**
+   * Built in socket events.
+   */
+  Open = "Open",
+  Close = "Close",
+  Error = "Error",
+
+  /**
+   * Message { type: string }
+   */
+  Metadata = "Metadata",
+  Flushed = "Flushed",
+  Warning = "Warning",
+
+  /**
+   * Catch all for any other message event
+   */
+  Unhandled = "Unhandled",
+}
diff --git a/src/lib/enums/index.ts b/src/lib/enums/index.ts
@@ -1,2 +1,3 @@
 export * from "./LiveConnectionState";
 export * from "./LiveTranscriptionEvents";
+export * from "./LiveTTSEvents";
diff --git a/src/packages/SpeakLiveClient.ts b/src/packages/SpeakLiveClient.ts
@@ -0,0 +1,130 @@
+import { AbstractLiveClient } from "./AbstractLiveClient";
+import { LiveTTSEvents } from "../lib/enums";
+import type { SpeakSchema, DeepgramClientOptions } from "../lib/types";
+
+/**
+ * The `SpeakLiveClient` class extends the `AbstractLiveClient` class and provides functionality for setting up and managing a WebSocket connection for live text-to-speech synthesis.
+ *
+ * The constructor takes in `DeepgramClientOptions` and an optional `SpeakSchema` object, as well as an optional `endpoint` string. It then calls the `connect` method of the parent `AbstractLiveClient` class to establish the WebSocket connection.
+ *
+ * The `setupConnection` method is responsible for handling the various events that can occur on the WebSocket connection, such as opening, closing, and receiving messages. It sets up event handlers for these events and emits the appropriate events based on the message type.
+ *
+ * The `configure` method allows you to send additional configuration options to the connected session.
+ *
+ *
+ * The `requestClose` method requests the server to close the connection.
+ *
+ */
+export class SpeakLiveClient extends AbstractLiveClient {
+  public namespace: string = "speak";
+
+  /**
+   * Constructs a new `SpeakLiveClient` instance with the provided options.
+   *
+   * @param options - The `DeepgramClientOptions` to use for the client connection.
+   * @param speakOptions - An optional `SpeakSchema` object containing additional configuration options for the text-to-speech.
+   * @param endpoint - An optional string representing the WebSocket endpoint to connect to. Defaults to `:version/speak`.
+   */
+  constructor(
+    options: DeepgramClientOptions,
+    speakOptions: SpeakSchema = {},
+    endpoint: string = ":version/speak"
+  ) {
+    super(options);
+
+    this.connect(speakOptions, endpoint);
+  }
+
+  /**
+   * Sets up the connection event handlers.
+   * This method is responsible for handling the various events that can occur on the WebSocket connection, such as opening, closing, and receiving data.
+   * - When the connection is opened, it emits the `LiveTTSEvents.Open` event.
+   * - When the connection is closed, it emits the `LiveTTSEvents.Close` event.
+   * - When an error occurs on the connection, it emits the `LiveTTSEvents.Error` event.
+   * - When a message is received, it parses the message and emits the appropriate event based on the message type, such as `LiveTTSEvents.Metadata`, `LiveTTSEvents.Flushed`, and `LiveTTSEvents.Warning`.
+   */
+  public setupConnection(): void {
+    if (this.conn) {
+      this.conn.onopen = () => {
+        this.emit(LiveTTSEvents.Open, this);
+      };
+
+      this.conn.onclose = (event: any) => {
+        this.emit(LiveTTSEvents.Close, event);
+      };
+
+      this.conn.onerror = (event: ErrorEvent) => {
+        this.emit(LiveTTSEvents.Error, event);
+      };
+
+      this.conn.onmessage = (event: MessageEvent) => {
+        try {
+          const data: any = JSON.parse(event.data.toString());
+
+          if (data.type === LiveTTSEvents.Metadata) {
+            this.emit(LiveTTSEvents.Metadata, data);
+          } else if (data.type === LiveTTSEvents.Flushed) {
+            this.emit(LiveTTSEvents.Flushed, data);
+          } else if (data.type === "Warning") {
+            this.emit(LiveTTSEvents.Warning, data);
+          } else {
+            this.emit(LiveTTSEvents.Unhandled, data);
+          }
+        } catch (error) {
+          this.emit(LiveTTSEvents.Error, {
+            event,
+            message: "Unable to parse `data` as JSON.",
+            error,
+          });
+        }
+      };
+    }
+  }
+
+  /**
+   * Sends a text input message to the server.
+   *
+   * @param text - The text to convert to speech.
+   */
+  public sendText(text: string): void {
+    this.send(
+      JSON.stringify({
+        type: "Speak",
+        text,
+      })
+    );
+  }
+
+  /**
+   * Requests the server flush the current buffer and return generated audio.
+   */
+  public flush(): void {
+    this.send(
+      JSON.stringify({
+        type: "Flush",
+      })
+    );
+  }
+  /**
+   * Requests the server reset the current buffer.
+   */
+  public reset(): void {
+    this.send(
+      JSON.stringify({
+        type: "Reset",
+      })
+    );
+  }
+  /**
+   * Requests the server close the connection.
+   */
+  public requestClose(): void {
+    this.send(
+      JSON.stringify({
+        type: "Close",
+      })
+    );
+  }
+}
+
+export { SpeakLiveClient as Liveclient };