-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d75718b
commit 7848760
Showing
5 changed files
with
199 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,19 @@ | ||
# GPTAssistant-ElevenLabs | ||
Connecting the new OpenAI Assistants with ElevenLabs Text to Speech and Azure Speech to Text | ||
|
||
Connecting the new [OpenAI Assistant API](https://openai.com/blog/new-models-and-developer-products-announced-at-devday), which is persistent and has infinitely long threads, with the ElevenLabs Text to Speech and Azure Speech to Text API. | ||
|
||
|
||
|
||
1: Fill in OpenAI, ElevenLabs and Azure Speech Services API keys, as well as the Azure Location/Region. | ||
|
||
|
||
|
||
2: Execute `python newassistant.py` and follow the on-screen instructions to make an assistant. The language code must be [supported by Azure](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt). When choosing GPT 4, your account must have access to the GPT 4 API. | ||
|
||
|
||
|
||
3: Execute python `"yourassistantname.py"` to create a thread with the assistant created at step 2 | ||
|
||
|
||
|
||
4: Now execute python `"yourassistantname_thread.py"` to send voice messages to the assistant and get TTS answers back. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Set up Azure Speech To Text | ||
azureapikey = "" #Insert Azure Speech Services API key | ||
azureregion = "" # insert selected Azure location | ||
|
||
#Set up ElevenLabs API | ||
elevenlabsapikey = "" #Insert ElevenLabs API Key | ||
|
||
#Set up OpenAI API | ||
openaiapikey = "" #Insert OpenAI API key here | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#creates a new assistant | ||
import config | ||
import openai | ||
import os | ||
import time | ||
import shutil | ||
|
||
openai.api_key = config.openaiapikey | ||
|
||
print("Enter the assistants Name") | ||
gptname = input().lower() | ||
|
||
print("Enter the instructions for the Assistant. How they should behave, speak etc.") | ||
gptinstruction= input() | ||
|
||
print("In which language will you speak to this assistant? e.g. en-US") | ||
language = input() | ||
|
||
print("Enter the ElevenLabs voice name that the assistant should use") | ||
voiceid = input() | ||
|
||
print("Do you want to use the gpt-4-turbo preview or gpt-3.5-turbo? Enter 4 or 3. ") | ||
modelnumber = input() | ||
if modelnumber == "3" : | ||
gptmodel = "gpt-3.5-turbo-1106" | ||
|
||
|
||
elif modelnumber == "4": | ||
gptmodel = "gpt-4-1106-preview" | ||
|
||
|
||
else: | ||
print("Neither 3 or 4") | ||
time.sleep(2) | ||
quit() | ||
|
||
assistant = openai.beta.assistants.create( | ||
name=gptname, | ||
instructions=gptinstruction, | ||
model=gptmodel | ||
) | ||
|
||
configfile = gptname + "_config" + ".py" | ||
|
||
assistantfile = gptname + ".py" | ||
src_dir = os.getcwd() | ||
dest_dir = src_dir + "\\" + assistantfile | ||
shutil.copy('undefassistant.py', dest_dir) | ||
|
||
#append assistant specifics to config file | ||
with open(configfile, "w") as file: | ||
l1 = "\nassistantid = " | ||
l2 = '"' | ||
l3 = assistant.id | ||
l4 = '"\n' | ||
l5 = "voiceid = " | ||
l6 = '"' | ||
l7 = voiceid | ||
l8 = '"\n' | ||
l9 = "language = " | ||
l10 = '"' | ||
l11 = language | ||
l12 = '"' | ||
file.writelines([l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12]) | ||
file.close | ||
|
||
print(f"The Assistant has been created, you can now create a Thread with it by executing {assistantfile}") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import os | ||
import shutil | ||
import openai | ||
import config | ||
from pathlib import Path | ||
|
||
assistantname = Path(__file__).stem | ||
threadfilename = assistantname + "_thread.py" | ||
|
||
src_dir = os.getcwd() | ||
dest_dir = src_dir + "\\" + threadfilename | ||
shutil.copy('undefthread.py', dest_dir) | ||
|
||
openai.api_key = config.openaiapikey | ||
thread = openai.beta.threads.create() | ||
|
||
configfile= assistantname + "_thread_conf.py" | ||
with open(configfile, "w") as file: | ||
l1 = "threadid = " | ||
l2 = '"' | ||
l3 = thread.id | ||
l4 = '"' | ||
file.writelines([l1, l2, l3, l4]) | ||
file.close | ||
|
||
print(f"A thread for {assistantname} has been created. you can Talk to {assistantname} by executing {threadfilename}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# You also need to install the MPV library https://github.com/rossy/mpv-install/blob/master/README.md if you want to enable audio collection | ||
|
||
import azure.cognitiveservices.speech as speechsdk | ||
import config | ||
import elevenlabs | ||
import openai | ||
import time | ||
from pathlib import Path | ||
import importlib | ||
|
||
filename = Path(__file__).stem | ||
assistantname = filename.replace("_thread", "") | ||
configname = assistantname + "_config" | ||
threadconfig = assistantname + "_thread_conf" | ||
|
||
threadconf = importlib.import_module(threadconfig) | ||
assistantconf = importlib.import_module(configname) | ||
|
||
threadid = threadconf.threadid | ||
voiceid = assistantconf.voiceid | ||
assistantid = assistantconf.assistantid | ||
language = assistantconf.language | ||
|
||
from elevenlabs import set_api_key | ||
|
||
#Initializing Elevenlabs and OpenAI API Keys | ||
set_api_key(config.elevenlabsapikey) # Elevenlabs API Key | ||
openai.api_key = config.openaiapikey # OpenAI Key | ||
|
||
|
||
# Set up Azure Speech To Text | ||
speech_config = speechsdk.SpeechConfig(subscription=config.azureapikey, region=config.azureregion) | ||
speech_config.speech_recognition_language=language | ||
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) | ||
|
||
|
||
|
||
|
||
while True: | ||
print("Talk now") | ||
result = speech_recognizer.recognize_once() #get voice input | ||
message = format(result.text) | ||
print(f"You: {message}") | ||
if message: | ||
message = openai.beta.threads.messages.create( #create message from voice input | ||
thread_id=threadid, | ||
role="user", | ||
content=message | ||
) | ||
run = openai.beta.threads.runs.create( | ||
thread_id=threadid, | ||
assistant_id=assistantid | ||
) | ||
|
||
#check if the thread has finished | ||
while True: | ||
running = openai.beta.threads.runs.retrieve( | ||
thread_id=threadid, | ||
run_id=run.id | ||
) | ||
|
||
#if the thread has finished, get the list of messages | ||
if running.status == "completed": | ||
messages = openai.beta.threads.messages.list( | ||
thread_id=threadid | ||
) | ||
|
||
#extract the generated message from the list of all messages | ||
first_message = messages.data[0] | ||
content=first_message.content | ||
value = content[0].text.value | ||
print(f"{assistantname}: {value}") | ||
audio_stream = elevenlabs.generate(text=value, voice=voiceid, model="eleven_multilingual_v2", stream=True) | ||
output = elevenlabs.stream(audio_stream) | ||
break | ||
time.sleep(0.5) | ||
|