Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to use Flask-SocketIO and get audio from browser mic #4

Merged
merged 10 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__pycache__/
.env
.python-version
.env
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ DEEPGRAM_API_KEY=%api_key%

#### Run the application

Once running, you can access the application in your browser at <http://127.0.0.1:5000>
You need to run both app.py (port 8000) and app_socketio.py (port 5001). Once running, you can access the application in your browser at <http://127.0.0.1:8000>

```bash
python app.py
python app_socketio.py
```


Expand Down
120 changes: 7 additions & 113 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,19 @@
from flask import Flask, render_template
from flask_socketio import SocketIO
from dotenv import load_dotenv
import logging
from threading import Event
from deepgram import (
DeepgramClient,
DeepgramClientOptions,
LiveTranscriptionEvents,
LiveOptions,
Microphone,
)

load_dotenv()

app = Flask(__name__)
socketio = SocketIO(app)

# Set up client configuration
config = DeepgramClientOptions(
verbose=logging.DEBUG,
options={"keepalive": "true"}
)

# Initialize Deepgram client and connection
deepgram = DeepgramClient("", config)
dg_connection = deepgram.listen.live.v("1")

# Track transcription state
transcribing = False
transcription_event = Event()

def configure_deepgram():
options = LiveOptions(
smart_format=True,
language="en-US",
encoding="linear16",
channels=1,
sample_rate=16000,
)
dg_connection.start(options)

def start_microphone():
microphone = Microphone(dg_connection.send)
microphone.start()
return microphone

def start_transcription_loop():
try:
global transcribing
while transcribing:
configure_deepgram()

# Open a microphone stream
microphone = start_microphone()

def on_message(self, result, **kwargs):
transcript = result.channel.alternatives[0].transcript
if len(transcript) > 0:
socketio.emit('transcription_update', {'transcription': transcript})

dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)

# Wait for the transcription to finish
transcription_event.wait()
transcription_event.clear()

# Finish the microphone and Deepgram connection
microphone.finish()
dg_connection.finish()
logging.info("Transcription loop finished.")

except Exception as e:
logging.error(f"Error: {e}")

def reconnect():
try:
logging.info("Reconnecting to Deepgram...")
new_dg_connection = deepgram.listen.live.v("1")

# Configure and start the new Deepgram connection
configure_deepgram(new_dg_connection)
from dotenv import load_dotenv
from flask import Flask, render_template

logging.info("Reconnected to Deepgram successfully.")
return new_dg_connection
load_dotenv()

except Exception as e:
logging.error(f"Reconnection failed: {e}")
return None
app = Flask("app_http")

def on_disconnect():
logging.info("Client disconnected")
global dg_connection
if dg_connection:
dg_connection.finish()
dg_connection = None
logging.info("Cleared listeners and set dg_connection to None")
else:
logging.info("No active dg_connection to disconnect from")

@app.route('/')
def index():
return render_template('index.html')

@socketio.on('disconnect')
def handle_disconnect():
socketio.start_background_task(target=on_disconnect)

@socketio.on('toggle_transcription')
def toggle_transcription(data):
global transcribing
action = data.get('action')

if action == 'start' and not transcribing:
# Start transcription
transcribing = True
socketio.start_background_task(target=start_transcription_loop)
elif action == 'stop' and transcribing:
# Stop transcription
transcribing = False
transcription_event.set()

if __name__ == '__main__':
logging.info("Starting SocketIO server.")
socketio.run(app, debug=True)
logging.info("Starting Flask server.")
# Run flask app
app.run(debug=True, port=8000)
86 changes: 86 additions & 0 deletions app_socketio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import logging
import os
from flask import Flask
from flask_socketio import SocketIO
from dotenv import load_dotenv
from deepgram import (
DeepgramClient,
LiveTranscriptionEvents,
LiveOptions,
DeepgramClientOptions
)

load_dotenv()

app_socketio = Flask("app_socketio")
socketio = SocketIO(app_socketio, cors_allowed_origins=['http://127.0.0.1:8000'])

API_KEY = os.getenv("DEEPGRAM_API_KEY")

# Set up client configuration
config = DeepgramClientOptions(
verbose=logging.WARN, # Change to logging.INFO or logging.DEBUG for more verbose output
options={"keepalive": "true"}
)

deepgram = DeepgramClient(API_KEY, config)

dg_connection = None

def initialize_deepgram_connection():
global dg_connection
# Initialize Deepgram client and connection
dg_connection = deepgram.listen.live.v("1")

def on_open(self, open, **kwargs):
print(f"\n\n{open}\n\n")

def on_message(self, result, **kwargs):
transcript = result.channel.alternatives[0].transcript
if len(transcript) > 0:
print(result.channel.alternatives[0].transcript)
socketio.emit('transcription_update', {'transcription': transcript})

def on_close(self, close, **kwargs):
print(f"\n\n{close}\n\n")

def on_error(self, error, **kwargs):
print(f"\n\n{error}\n\n")

dg_connection.on(LiveTranscriptionEvents.Open, on_open)
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
dg_connection.on(LiveTranscriptionEvents.Close, on_close)
dg_connection.on(LiveTranscriptionEvents.Error, on_error)

# Define the options for the live transcription
options = LiveOptions(model="nova-2", language="en-US")

if dg_connection.start(options) is False: # THIS CAUSES ERROR
print("Failed to start connection")
exit()

@socketio.on('audio_stream')
def handle_audio_stream(data):
if dg_connection:
dg_connection.send(data)

@socketio.on('toggle_transcription')
def handle_toggle_transcription(data):
print("toggle_transcription", data)
action = data.get("action")
if action == "start":
print("Starting Deepgram connection")
initialize_deepgram_connection()

@socketio.on('connect')
def server_connect():
print('Client connected')

@socketio.on('restart_deepgram')
def restart_deepgram():
print('Restarting Deepgram connection')
initialize_deepgram_connection()

if __name__ == '__main__':
logging.info("Starting SocketIO server.")
socketio.run(app_socketio, debug=True, allow_unsafe_werkzeug=True, port=5001)
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
deepgram-sdk==3.0.3
deepgram-sdk==3.2.7
Flask==3.0.0
Flask-SocketIO==5.3.6
python-dotenv==1.0.0
pyaudio==0.2.14
82 changes: 68 additions & 14 deletions static/script.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,75 @@
var socket = io.connect(
"http://" + window.location.hostname + ":" + location.port
let isRecording = false;
let socket;
let microphone;

const socket_port = 5001;
socket = io(
"http://" + window.location.hostname + ":" + socket_port.toString()
);

var isTranscribing = false;
socket.on("transcription_update", (data) => {
document.getElementById("captions").innerHTML = data.transcription;
});

async function getMicrophone() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
return new MediaRecorder(stream, { mimeType: "audio/webm" });
} catch (error) {
console.error("Error accessing microphone:", error);
throw error;
}
}

async function openMicrophone(microphone, socket) {
return new Promise((resolve) => {
microphone.onstart = () => {
console.log("Client: Microphone opened");
document.body.classList.add("recording");
resolve();
};
microphone.ondataavailable = async (event) => {
console.log("client: microphone data received");
if (event.data.size > 0) {
socket.emit("audio_stream", event.data);
}
};
microphone.start(1000);
});
}

async function startRecording() {
isRecording = true;
microphone = await getMicrophone();
console.log("Client: Waiting to open microphone");
await openMicrophone(microphone, socket);
}

document.getElementById("record").addEventListener("change", function () {
if (this.checked) {
// Start transcription
isTranscribing = true;
socket.emit("toggle_transcription", { action: "start" });
} else {
// Stop transcription
isTranscribing = false;
async function stopRecording() {
if (isRecording === true) {
microphone.stop();
microphone.stream.getTracks().forEach((track) => track.stop()); // Stop all tracks
socket.emit("toggle_transcription", { action: "stop" });
microphone = null;
isRecording = false;
console.log("Client: Microphone closed");
document.body.classList.remove("recording");
}
});
}

socket.on("transcription_update", function (data) {
document.getElementById("captions").innerHTML = data.transcription;
document.addEventListener("DOMContentLoaded", () => {
const recordButton = document.getElementById("record");

recordButton.addEventListener("click", () => {
if (!isRecording) {
socket.emit("toggle_transcription", { action: "start" });
startRecording().catch((error) =>
console.error("Error starting recording:", error)
);
} else {
stopRecording().catch((error) =>
console.error("Error stopping recording:", error)
);
}
});
});
14 changes: 0 additions & 14 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,6 @@ <h1>Captions by Deepgram</h1>
<div class="captions" id="captions">
<span>Realtime speech transcription API</span>
</div>
<div class="button-container">
<a
href="https://console.deepgram.com/signup"
class="info-button sign-up"
target="_blank"
>Sign Up</a
>
<a
href="https://developers.deepgram.com/docs/introduction"
class="info-button docs"
target="_blank"
>Read the Docs</a
>
</div>
<script src="../static/script.js"></script>
</body>
</html>