generated from deepgram-starters/project-template
-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from deepgram-starters/add-browser-mic
Update to use Flask-SocketIO and get audio from browser mic
- Loading branch information
Showing
7 changed files
with
166 additions
and
145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
__pycache__/ | ||
.env | ||
.python-version | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,125 +1,19 @@ | ||
from flask import Flask, render_template | ||
from flask_socketio import SocketIO | ||
from dotenv import load_dotenv | ||
import logging | ||
from threading import Event | ||
from deepgram import ( | ||
DeepgramClient, | ||
DeepgramClientOptions, | ||
LiveTranscriptionEvents, | ||
LiveOptions, | ||
Microphone, | ||
) | ||
|
||
load_dotenv() | ||
|
||
app = Flask(__name__) | ||
socketio = SocketIO(app) | ||
|
||
# Set up client configuration | ||
config = DeepgramClientOptions( | ||
verbose=logging.DEBUG, | ||
options={"keepalive": "true"} | ||
) | ||
|
||
# Initialize Deepgram client and connection | ||
deepgram = DeepgramClient("", config) | ||
dg_connection = deepgram.listen.live.v("1") | ||
|
||
# Track transcription state | ||
transcribing = False | ||
transcription_event = Event() | ||
|
||
def configure_deepgram(): | ||
options = LiveOptions( | ||
smart_format=True, | ||
language="en-US", | ||
encoding="linear16", | ||
channels=1, | ||
sample_rate=16000, | ||
) | ||
dg_connection.start(options) | ||
|
||
def start_microphone(): | ||
microphone = Microphone(dg_connection.send) | ||
microphone.start() | ||
return microphone | ||
|
||
def start_transcription_loop(): | ||
try: | ||
global transcribing | ||
while transcribing: | ||
configure_deepgram() | ||
|
||
# Open a microphone stream | ||
microphone = start_microphone() | ||
|
||
def on_message(self, result, **kwargs): | ||
transcript = result.channel.alternatives[0].transcript | ||
if len(transcript) > 0: | ||
socketio.emit('transcription_update', {'transcription': transcript}) | ||
|
||
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) | ||
|
||
# Wait for the transcription to finish | ||
transcription_event.wait() | ||
transcription_event.clear() | ||
|
||
# Finish the microphone and Deepgram connection | ||
microphone.finish() | ||
dg_connection.finish() | ||
logging.info("Transcription loop finished.") | ||
|
||
except Exception as e: | ||
logging.error(f"Error: {e}") | ||
|
||
def reconnect(): | ||
try: | ||
logging.info("Reconnecting to Deepgram...") | ||
new_dg_connection = deepgram.listen.live.v("1") | ||
|
||
# Configure and start the new Deepgram connection | ||
configure_deepgram(new_dg_connection) | ||
from dotenv import load_dotenv | ||
from flask import Flask, render_template | ||
|
||
logging.info("Reconnected to Deepgram successfully.") | ||
return new_dg_connection | ||
load_dotenv() | ||
|
||
except Exception as e: | ||
logging.error(f"Reconnection failed: {e}") | ||
return None | ||
app = Flask("app_http") | ||
|
||
def on_disconnect(): | ||
logging.info("Client disconnected") | ||
global dg_connection | ||
if dg_connection: | ||
dg_connection.finish() | ||
dg_connection = None | ||
logging.info("Cleared listeners and set dg_connection to None") | ||
else: | ||
logging.info("No active dg_connection to disconnect from") | ||
|
||
@app.route('/') | ||
def index(): | ||
return render_template('index.html') | ||
|
||
@socketio.on('disconnect') | ||
def handle_disconnect(): | ||
socketio.start_background_task(target=on_disconnect) | ||
|
||
@socketio.on('toggle_transcription') | ||
def toggle_transcription(data): | ||
global transcribing | ||
action = data.get('action') | ||
|
||
if action == 'start' and not transcribing: | ||
# Start transcription | ||
transcribing = True | ||
socketio.start_background_task(target=start_transcription_loop) | ||
elif action == 'stop' and transcribing: | ||
# Stop transcription | ||
transcribing = False | ||
transcription_event.set() | ||
|
||
if __name__ == '__main__': | ||
logging.info("Starting SocketIO server.") | ||
socketio.run(app, debug=True) | ||
logging.info("Starting Flask server.") | ||
# Run flask app | ||
app.run(debug=True, port=8000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import logging | ||
import os | ||
from flask import Flask | ||
from flask_socketio import SocketIO | ||
from dotenv import load_dotenv | ||
from deepgram import ( | ||
DeepgramClient, | ||
LiveTranscriptionEvents, | ||
LiveOptions, | ||
DeepgramClientOptions | ||
) | ||
|
||
load_dotenv() | ||
|
||
app_socketio = Flask("app_socketio") | ||
socketio = SocketIO(app_socketio, cors_allowed_origins=['http://127.0.0.1:8000']) | ||
|
||
API_KEY = os.getenv("DEEPGRAM_API_KEY") | ||
|
||
# Set up client configuration | ||
config = DeepgramClientOptions( | ||
verbose=logging.WARN, # Change to logging.INFO or logging.DEBUG for more verbose output | ||
options={"keepalive": "true"} | ||
) | ||
|
||
deepgram = DeepgramClient(API_KEY, config) | ||
|
||
dg_connection = None | ||
|
||
def initialize_deepgram_connection(): | ||
global dg_connection | ||
# Initialize Deepgram client and connection | ||
dg_connection = deepgram.listen.live.v("1") | ||
|
||
def on_open(self, open, **kwargs): | ||
print(f"\n\n{open}\n\n") | ||
|
||
def on_message(self, result, **kwargs): | ||
transcript = result.channel.alternatives[0].transcript | ||
if len(transcript) > 0: | ||
print(result.channel.alternatives[0].transcript) | ||
socketio.emit('transcription_update', {'transcription': transcript}) | ||
|
||
def on_close(self, close, **kwargs): | ||
print(f"\n\n{close}\n\n") | ||
|
||
def on_error(self, error, **kwargs): | ||
print(f"\n\n{error}\n\n") | ||
|
||
dg_connection.on(LiveTranscriptionEvents.Open, on_open) | ||
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) | ||
dg_connection.on(LiveTranscriptionEvents.Close, on_close) | ||
dg_connection.on(LiveTranscriptionEvents.Error, on_error) | ||
|
||
# Define the options for the live transcription | ||
options = LiveOptions(model="nova-2", language="en-US") | ||
|
||
if dg_connection.start(options) is False: # THIS CAUSES ERROR | ||
print("Failed to start connection") | ||
exit() | ||
|
||
@socketio.on('audio_stream') | ||
def handle_audio_stream(data): | ||
if dg_connection: | ||
dg_connection.send(data) | ||
|
||
@socketio.on('toggle_transcription') | ||
def handle_toggle_transcription(data): | ||
print("toggle_transcription", data) | ||
action = data.get("action") | ||
if action == "start": | ||
print("Starting Deepgram connection") | ||
initialize_deepgram_connection() | ||
|
||
@socketio.on('connect') | ||
def server_connect(): | ||
print('Client connected') | ||
|
||
@socketio.on('restart_deepgram') | ||
def restart_deepgram(): | ||
print('Restarting Deepgram connection') | ||
initialize_deepgram_connection() | ||
|
||
if __name__ == '__main__': | ||
logging.info("Starting SocketIO server.") | ||
socketio.run(app_socketio, debug=True, allow_unsafe_werkzeug=True, port=5001) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
deepgram-sdk==3.0.3 | ||
deepgram-sdk==3.2.7 | ||
Flask==3.0.0 | ||
Flask-SocketIO==5.3.6 | ||
python-dotenv==1.0.0 | ||
pyaudio==0.2.14 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,75 @@ | ||
var socket = io.connect( | ||
"http://" + window.location.hostname + ":" + location.port | ||
let isRecording = false; | ||
let socket; | ||
let microphone; | ||
|
||
const socket_port = 5001; | ||
socket = io( | ||
"http://" + window.location.hostname + ":" + socket_port.toString() | ||
); | ||
|
||
var isTranscribing = false; | ||
socket.on("transcription_update", (data) => { | ||
document.getElementById("captions").innerHTML = data.transcription; | ||
}); | ||
|
||
async function getMicrophone() { | ||
try { | ||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | ||
return new MediaRecorder(stream, { mimeType: "audio/webm" }); | ||
} catch (error) { | ||
console.error("Error accessing microphone:", error); | ||
throw error; | ||
} | ||
} | ||
|
||
async function openMicrophone(microphone, socket) { | ||
return new Promise((resolve) => { | ||
microphone.onstart = () => { | ||
console.log("Client: Microphone opened"); | ||
document.body.classList.add("recording"); | ||
resolve(); | ||
}; | ||
microphone.ondataavailable = async (event) => { | ||
console.log("client: microphone data received"); | ||
if (event.data.size > 0) { | ||
socket.emit("audio_stream", event.data); | ||
} | ||
}; | ||
microphone.start(1000); | ||
}); | ||
} | ||
|
||
async function startRecording() { | ||
isRecording = true; | ||
microphone = await getMicrophone(); | ||
console.log("Client: Waiting to open microphone"); | ||
await openMicrophone(microphone, socket); | ||
} | ||
|
||
document.getElementById("record").addEventListener("change", function () { | ||
if (this.checked) { | ||
// Start transcription | ||
isTranscribing = true; | ||
socket.emit("toggle_transcription", { action: "start" }); | ||
} else { | ||
// Stop transcription | ||
isTranscribing = false; | ||
async function stopRecording() { | ||
if (isRecording === true) { | ||
microphone.stop(); | ||
microphone.stream.getTracks().forEach((track) => track.stop()); // Stop all tracks | ||
socket.emit("toggle_transcription", { action: "stop" }); | ||
microphone = null; | ||
isRecording = false; | ||
console.log("Client: Microphone closed"); | ||
document.body.classList.remove("recording"); | ||
} | ||
}); | ||
} | ||
|
||
socket.on("transcription_update", function (data) { | ||
document.getElementById("captions").innerHTML = data.transcription; | ||
document.addEventListener("DOMContentLoaded", () => { | ||
const recordButton = document.getElementById("record"); | ||
|
||
recordButton.addEventListener("click", () => { | ||
if (!isRecording) { | ||
socket.emit("toggle_transcription", { action: "start" }); | ||
startRecording().catch((error) => | ||
console.error("Error starting recording:", error) | ||
); | ||
} else { | ||
stopRecording().catch((error) => | ||
console.error("Error stopping recording:", error) | ||
); | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters