Merge pull request #4 from deepgram-starters/add-browser-mic

Update to use Flask-SocketIO and get audio from browser mic
deepgram-starters · Jun 4, 2024 · adec194 · adec194
2 parents 7bcede6 + 7e42fec
commit adec194
Show file tree

Hide file tree

Showing 7 changed files with 166 additions and 145 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 __pycache__/
-.env
+.python-version
+.env
diff --git a/README.md b/README.md
@@ -40,10 +40,11 @@ DEEPGRAM_API_KEY=%api_key%
 
 #### Run the application
 
-Once running, you can access the application in your browser at <http://127.0.0.1:5000>
+You need to run both app.py (port 8000) and app_socketio.py (port 5001). Once running, you can access the application in your browser at <http://127.0.0.1:8000>
 
 ```bash
 python app.py
+python app_socketio.py
 ```
 
 

diff --git a/app.py b/app.py
@@ -1,125 +1,19 @@
-from flask import Flask, render_template
-from flask_socketio import SocketIO
-from dotenv import load_dotenv
 import logging
-from threading import Event
-from deepgram import (
-    DeepgramClient,
-    DeepgramClientOptions,
-    LiveTranscriptionEvents,
-    LiveOptions,
-    Microphone,
-)
-
-load_dotenv()
-
-app = Flask(__name__)
-socketio = SocketIO(app)
-
-# Set up client configuration
-config = DeepgramClientOptions(
-    verbose=logging.DEBUG,
-    options={"keepalive": "true"}
-)
-
-# Initialize Deepgram client and connection
-deepgram = DeepgramClient("", config)
-dg_connection = deepgram.listen.live.v("1")
-
-# Track transcription state
-transcribing = False
-transcription_event = Event()
-
-def configure_deepgram():
-    options = LiveOptions(
-        smart_format=True,
-        language="en-US",
-        encoding="linear16",
-        channels=1,
-        sample_rate=16000,
-    )
-    dg_connection.start(options)
-
-def start_microphone():
-    microphone = Microphone(dg_connection.send)
-    microphone.start()
-    return microphone
-
-def start_transcription_loop():
-    try:
-        global transcribing
-        while transcribing:
-            configure_deepgram()
-
-            # Open a microphone stream
-            microphone = start_microphone()
 
-            def on_message(self, result, **kwargs):
-                transcript = result.channel.alternatives[0].transcript
-                if len(transcript) > 0:
-                    socketio.emit('transcription_update', {'transcription': transcript})
-
-            dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
-
-            # Wait for the transcription to finish
-            transcription_event.wait()
-            transcription_event.clear()
-
-            # Finish the microphone and Deepgram connection
-            microphone.finish()
-            dg_connection.finish()
-            logging.info("Transcription loop finished.")
-
-    except Exception as e:
-        logging.error(f"Error: {e}")
-
-def reconnect():
-    try:
-        logging.info("Reconnecting to Deepgram...")
-        new_dg_connection = deepgram.listen.live.v("1")
-
-        # Configure and start the new Deepgram connection
-        configure_deepgram(new_dg_connection)
+from dotenv import load_dotenv
+from flask import Flask, render_template
 
-        logging.info("Reconnected to Deepgram successfully.")
-        return new_dg_connection
+load_dotenv()
 
-    except Exception as e:
-        logging.error(f"Reconnection failed: {e}")
-        return None
+app = Flask("app_http")
 
-def on_disconnect():
-    logging.info("Client disconnected")
-    global dg_connection
-    if dg_connection:
-        dg_connection.finish()
-        dg_connection = None
-        logging.info("Cleared listeners and set dg_connection to None")
-    else:
-        logging.info("No active dg_connection to disconnect from")
 
 @app.route('/')
 def index():
     return render_template('index.html')
 
-@socketio.on('disconnect')
-def handle_disconnect():
-    socketio.start_background_task(target=on_disconnect)
-
-@socketio.on('toggle_transcription')
-def toggle_transcription(data):
-    global transcribing
-    action = data.get('action')
-
-    if action == 'start' and not transcribing:
-        # Start transcription
-        transcribing = True
-        socketio.start_background_task(target=start_transcription_loop)
-    elif action == 'stop' and transcribing:
-        # Stop transcription
-        transcribing = False
-        transcription_event.set()
 
 if __name__ == '__main__':
-    logging.info("Starting SocketIO server.")
-    socketio.run(app, debug=True)
+    logging.info("Starting Flask server.")
+    # Run flask app
+    app.run(debug=True, port=8000)
diff --git a/app_socketio.py b/app_socketio.py
@@ -0,0 +1,86 @@
+import logging
+import os
+from flask import Flask
+from flask_socketio import SocketIO
+from dotenv import load_dotenv
+from deepgram import (
+    DeepgramClient,
+    LiveTranscriptionEvents,
+    LiveOptions,
+    DeepgramClientOptions
+)
+
+load_dotenv()
+
+app_socketio = Flask("app_socketio")
+socketio = SocketIO(app_socketio, cors_allowed_origins=['http://127.0.0.1:8000'])
+
+API_KEY = os.getenv("DEEPGRAM_API_KEY")
+
+# Set up client configuration
+config = DeepgramClientOptions(
+    verbose=logging.WARN,  # Change to logging.INFO or logging.DEBUG for more verbose output
+    options={"keepalive": "true"}
+)
+
+deepgram = DeepgramClient(API_KEY, config)
+
+dg_connection = None
+
+def initialize_deepgram_connection():
+    global dg_connection
+    # Initialize Deepgram client and connection
+    dg_connection = deepgram.listen.live.v("1")
+
+    def on_open(self, open, **kwargs):
+        print(f"\n\n{open}\n\n")
+
+    def on_message(self, result, **kwargs):
+        transcript = result.channel.alternatives[0].transcript
+        if len(transcript) > 0:
+            print(result.channel.alternatives[0].transcript)
+            socketio.emit('transcription_update', {'transcription': transcript})
+
+    def on_close(self, close, **kwargs):
+        print(f"\n\n{close}\n\n")
+
+    def on_error(self, error, **kwargs):
+        print(f"\n\n{error}\n\n")
+
+    dg_connection.on(LiveTranscriptionEvents.Open, on_open)
+    dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
+    dg_connection.on(LiveTranscriptionEvents.Close, on_close)
+    dg_connection.on(LiveTranscriptionEvents.Error, on_error)
+
+    # Define the options for the live transcription
+    options = LiveOptions(model="nova-2", language="en-US")
+
+    if dg_connection.start(options) is False: # THIS CAUSES ERROR
+        print("Failed to start connection")
+        exit()
+
+@socketio.on('audio_stream')
+def handle_audio_stream(data):
+    if dg_connection:
+        dg_connection.send(data)
+
+@socketio.on('toggle_transcription')
+def handle_toggle_transcription(data):
+    print("toggle_transcription", data)
+    action = data.get("action")
+    if action == "start":
+        print("Starting Deepgram connection")
+        initialize_deepgram_connection()
+
+@socketio.on('connect')
+def server_connect():
+    print('Client connected')
+
+@socketio.on('restart_deepgram')
+def restart_deepgram():
+    print('Restarting Deepgram connection')
+    initialize_deepgram_connection()
+
+if __name__ == '__main__':
+    logging.info("Starting SocketIO server.")
+    socketio.run(app_socketio, debug=True, allow_unsafe_werkzeug=True, port=5001)
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,4 @@
-deepgram-sdk==3.0.3
+deepgram-sdk==3.2.7
 Flask==3.0.0
 Flask-SocketIO==5.3.6
 python-dotenv==1.0.0
-pyaudio==0.2.14
diff --git a/static/script.js b/static/script.js
@@ -1,21 +1,75 @@
-var socket = io.connect(
-  "http://" + window.location.hostname + ":" + location.port
+let isRecording = false;
+let socket;
+let microphone;
+
+const socket_port = 5001;
+socket = io(
+  "http://" + window.location.hostname + ":" + socket_port.toString()
 );
 
-var isTranscribing = false;
+socket.on("transcription_update", (data) => {
+  document.getElementById("captions").innerHTML = data.transcription;
+});
+
+async function getMicrophone() {
+  try {
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    return new MediaRecorder(stream, { mimeType: "audio/webm" });
+  } catch (error) {
+    console.error("Error accessing microphone:", error);
+    throw error;
+  }
+}
+
+async function openMicrophone(microphone, socket) {
+  return new Promise((resolve) => {
+    microphone.onstart = () => {
+      console.log("Client: Microphone opened");
+      document.body.classList.add("recording");
+      resolve();
+    };
+    microphone.ondataavailable = async (event) => {
+      console.log("client: microphone data received");
+      if (event.data.size > 0) {
+        socket.emit("audio_stream", event.data);
+      }
+    };
+    microphone.start(1000);
+  });
+}
+
+async function startRecording() {
+  isRecording = true;
+  microphone = await getMicrophone();
+  console.log("Client: Waiting to open microphone");
+  await openMicrophone(microphone, socket);
+}
 
-document.getElementById("record").addEventListener("change", function () {
-  if (this.checked) {
-    // Start transcription
-    isTranscribing = true;
-    socket.emit("toggle_transcription", { action: "start" });
-  } else {
-    // Stop transcription
-    isTranscribing = false;
+async function stopRecording() {
+  if (isRecording === true) {
+    microphone.stop();
+    microphone.stream.getTracks().forEach((track) => track.stop()); // Stop all tracks
     socket.emit("toggle_transcription", { action: "stop" });
+    microphone = null;
+    isRecording = false;
+    console.log("Client: Microphone closed");
+    document.body.classList.remove("recording");
   }
-});
+}
 
-socket.on("transcription_update", function (data) {
-  document.getElementById("captions").innerHTML = data.transcription;
+document.addEventListener("DOMContentLoaded", () => {
+  const recordButton = document.getElementById("record");
+
+  recordButton.addEventListener("click", () => {
+    if (!isRecording) {
+      socket.emit("toggle_transcription", { action: "start" });
+      startRecording().catch((error) =>
+        console.error("Error starting recording:", error)
+      );
+    } else {
+      stopRecording().catch((error) =>
+        console.error("Error stopping recording:", error)
+      );
+    }
+  });
 });
diff --git a/templates/index.html b/templates/index.html
@@ -34,20 +34,6 @@ <h1>Captions by Deepgram</h1>
     <div class="captions" id="captions">
       <span>Realtime speech transcription API</span>
     </div>
-    <div class="button-container">
-      <a
-        href="https://console.deepgram.com/signup"
-        class="info-button sign-up"
-        target="_blank"
-        >Sign Up</a
-      >
-      <a
-        href="https://developers.deepgram.com/docs/introduction"
-        class="info-button docs"
-        target="_blank"
-        >Read the Docs</a
-      >
-    </div>
     <script src="../static/script.js"></script>
   </body>
 </html>