Skip to content

Commit

Permalink
Merge pull request #4 from deepgram-starters/add-browser-mic
Browse files Browse the repository at this point in the history
Update to use Flask-SocketIO and get audio from browser mic
  • Loading branch information
SandraRodgers committed Jun 4, 2024
2 parents 7bcede6 + 7e42fec commit adec194
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 145 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__pycache__/
.env
.python-version
.env
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ DEEPGRAM_API_KEY=%api_key%

#### Run the application

Once running, you can access the application in your browser at <http://127.0.0.1:5000>
You need to run both app.py (port 8000) and app_socketio.py (port 5001). Once running, you can access the application in your browser at <http://127.0.0.1:8000>

```bash
python app.py
python app_socketio.py
```


Expand Down
120 changes: 7 additions & 113 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,19 @@
from flask import Flask, render_template
from flask_socketio import SocketIO
from dotenv import load_dotenv
import logging
from threading import Event
from deepgram import (
DeepgramClient,
DeepgramClientOptions,
LiveTranscriptionEvents,
LiveOptions,
Microphone,
)

load_dotenv()

app = Flask(__name__)
socketio = SocketIO(app)

# Set up client configuration
config = DeepgramClientOptions(
verbose=logging.DEBUG,
options={"keepalive": "true"}
)

# Initialize Deepgram client and connection
deepgram = DeepgramClient("", config)
dg_connection = deepgram.listen.live.v("1")

# Track transcription state
transcribing = False
transcription_event = Event()

def configure_deepgram():
options = LiveOptions(
smart_format=True,
language="en-US",
encoding="linear16",
channels=1,
sample_rate=16000,
)
dg_connection.start(options)

def start_microphone():
microphone = Microphone(dg_connection.send)
microphone.start()
return microphone

def start_transcription_loop():
try:
global transcribing
while transcribing:
configure_deepgram()

# Open a microphone stream
microphone = start_microphone()

def on_message(self, result, **kwargs):
transcript = result.channel.alternatives[0].transcript
if len(transcript) > 0:
socketio.emit('transcription_update', {'transcription': transcript})

dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)

# Wait for the transcription to finish
transcription_event.wait()
transcription_event.clear()

# Finish the microphone and Deepgram connection
microphone.finish()
dg_connection.finish()
logging.info("Transcription loop finished.")

except Exception as e:
logging.error(f"Error: {e}")

def reconnect():
try:
logging.info("Reconnecting to Deepgram...")
new_dg_connection = deepgram.listen.live.v("1")

# Configure and start the new Deepgram connection
configure_deepgram(new_dg_connection)
from dotenv import load_dotenv
from flask import Flask, render_template

logging.info("Reconnected to Deepgram successfully.")
return new_dg_connection
load_dotenv()

except Exception as e:
logging.error(f"Reconnection failed: {e}")
return None
app = Flask("app_http")

def on_disconnect():
logging.info("Client disconnected")
global dg_connection
if dg_connection:
dg_connection.finish()
dg_connection = None
logging.info("Cleared listeners and set dg_connection to None")
else:
logging.info("No active dg_connection to disconnect from")

@app.route('/')
def index():
return render_template('index.html')

@socketio.on('disconnect')
def handle_disconnect():
socketio.start_background_task(target=on_disconnect)

@socketio.on('toggle_transcription')
def toggle_transcription(data):
global transcribing
action = data.get('action')

if action == 'start' and not transcribing:
# Start transcription
transcribing = True
socketio.start_background_task(target=start_transcription_loop)
elif action == 'stop' and transcribing:
# Stop transcription
transcribing = False
transcription_event.set()

if __name__ == '__main__':
logging.info("Starting SocketIO server.")
socketio.run(app, debug=True)
logging.info("Starting Flask server.")
# Run flask app
app.run(debug=True, port=8000)
86 changes: 86 additions & 0 deletions app_socketio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import logging
import os
from flask import Flask
from flask_socketio import SocketIO
from dotenv import load_dotenv
from deepgram import (
DeepgramClient,
LiveTranscriptionEvents,
LiveOptions,
DeepgramClientOptions
)

load_dotenv()

app_socketio = Flask("app_socketio")
socketio = SocketIO(app_socketio, cors_allowed_origins=['http://127.0.0.1:8000'])

API_KEY = os.getenv("DEEPGRAM_API_KEY")

# Set up client configuration
config = DeepgramClientOptions(
verbose=logging.WARN, # Change to logging.INFO or logging.DEBUG for more verbose output
options={"keepalive": "true"}
)

deepgram = DeepgramClient(API_KEY, config)

dg_connection = None

def initialize_deepgram_connection():
global dg_connection
# Initialize Deepgram client and connection
dg_connection = deepgram.listen.live.v("1")

def on_open(self, open, **kwargs):
print(f"\n\n{open}\n\n")

def on_message(self, result, **kwargs):
transcript = result.channel.alternatives[0].transcript
if len(transcript) > 0:
print(result.channel.alternatives[0].transcript)
socketio.emit('transcription_update', {'transcription': transcript})

def on_close(self, close, **kwargs):
print(f"\n\n{close}\n\n")

def on_error(self, error, **kwargs):
print(f"\n\n{error}\n\n")

dg_connection.on(LiveTranscriptionEvents.Open, on_open)
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
dg_connection.on(LiveTranscriptionEvents.Close, on_close)
dg_connection.on(LiveTranscriptionEvents.Error, on_error)

# Define the options for the live transcription
options = LiveOptions(model="nova-2", language="en-US")

if dg_connection.start(options) is False: # THIS CAUSES ERROR
print("Failed to start connection")
exit()

@socketio.on('audio_stream')
def handle_audio_stream(data):
if dg_connection:
dg_connection.send(data)

@socketio.on('toggle_transcription')
def handle_toggle_transcription(data):
print("toggle_transcription", data)
action = data.get("action")
if action == "start":
print("Starting Deepgram connection")
initialize_deepgram_connection()

@socketio.on('connect')
def server_connect():
print('Client connected')

@socketio.on('restart_deepgram')
def restart_deepgram():
print('Restarting Deepgram connection')
initialize_deepgram_connection()

if __name__ == '__main__':
logging.info("Starting SocketIO server.")
socketio.run(app_socketio, debug=True, allow_unsafe_werkzeug=True, port=5001)
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
deepgram-sdk==3.0.3
deepgram-sdk==3.2.7
Flask==3.0.0
Flask-SocketIO==5.3.6
python-dotenv==1.0.0
pyaudio==0.2.14
82 changes: 68 additions & 14 deletions static/script.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,75 @@
var socket = io.connect(
"http://" + window.location.hostname + ":" + location.port
let isRecording = false;
let socket;
let microphone;

const socket_port = 5001;
socket = io(
"http://" + window.location.hostname + ":" + socket_port.toString()
);

var isTranscribing = false;
socket.on("transcription_update", (data) => {
document.getElementById("captions").innerHTML = data.transcription;
});

async function getMicrophone() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
return new MediaRecorder(stream, { mimeType: "audio/webm" });
} catch (error) {
console.error("Error accessing microphone:", error);
throw error;
}
}

async function openMicrophone(microphone, socket) {
return new Promise((resolve) => {
microphone.onstart = () => {
console.log("Client: Microphone opened");
document.body.classList.add("recording");
resolve();
};
microphone.ondataavailable = async (event) => {
console.log("client: microphone data received");
if (event.data.size > 0) {
socket.emit("audio_stream", event.data);
}
};
microphone.start(1000);
});
}

async function startRecording() {
isRecording = true;
microphone = await getMicrophone();
console.log("Client: Waiting to open microphone");
await openMicrophone(microphone, socket);
}

document.getElementById("record").addEventListener("change", function () {
if (this.checked) {
// Start transcription
isTranscribing = true;
socket.emit("toggle_transcription", { action: "start" });
} else {
// Stop transcription
isTranscribing = false;
async function stopRecording() {
if (isRecording === true) {
microphone.stop();
microphone.stream.getTracks().forEach((track) => track.stop()); // Stop all tracks
socket.emit("toggle_transcription", { action: "stop" });
microphone = null;
isRecording = false;
console.log("Client: Microphone closed");
document.body.classList.remove("recording");
}
});
}

socket.on("transcription_update", function (data) {
document.getElementById("captions").innerHTML = data.transcription;
document.addEventListener("DOMContentLoaded", () => {
const recordButton = document.getElementById("record");

recordButton.addEventListener("click", () => {
if (!isRecording) {
socket.emit("toggle_transcription", { action: "start" });
startRecording().catch((error) =>
console.error("Error starting recording:", error)
);
} else {
stopRecording().catch((error) =>
console.error("Error stopping recording:", error)
);
}
});
});
14 changes: 0 additions & 14 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,6 @@ <h1>Captions by Deepgram</h1>
<div class="captions" id="captions">
<span>Realtime speech transcription API</span>
</div>
<div class="button-container">
<a
href="https://console.deepgram.com/signup"
class="info-button sign-up"
target="_blank"
>Sign Up</a
>
<a
href="https://developers.deepgram.com/docs/introduction"
class="info-button docs"
target="_blank"
>Read the Docs</a
>
</div>
<script src="../static/script.js"></script>
</body>
</html>

0 comments on commit adec194

Please sign in to comment.