-
-
Notifications
You must be signed in to change notification settings - Fork 90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: Added ru locale, added voximplant adapter, groq provider, switched to whisper in deepgram (multilang) #304
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import os | ||
import time | ||
from dotenv import load_dotenv | ||
import litellm | ||
|
||
from bolna.helpers.logger_config import configure_logger | ||
from bolna.helpers.utils import json_to_pydantic_schema | ||
from bolna.llms.llm import BaseLLM | ||
|
||
|
||
logger = configure_logger(__name__) | ||
load_dotenv() | ||
|
||
class GroqLLM(BaseLLM): | ||
def __init__(self, model="llama3-8b-8192", max_tokens=100, buffer_size=40, temperature=0.0, **kwargs): | ||
super().__init__(max_tokens, buffer_size) | ||
self.model = model | ||
self.started_streaming = False | ||
self.model_args = {"max_tokens": max_tokens, "temperature": temperature, "model": self.model} | ||
self.api_key = kwargs.get("llm_key", os.getenv('GROQ_API_KEY')) | ||
|
||
if self.api_key: | ||
self.model_args["api_key"] = self.api_key | ||
if "llm_key" in kwargs: | ||
self.model_args["api_key"] = kwargs["llm_key"] | ||
|
||
|
||
async def generate_stream(self, messages, synthesize=True, request_json=False): | ||
answer, buffer = "", "" | ||
model_args = self.model_args.copy() | ||
model_args["messages"] = messages | ||
model_args["stream"] = True | ||
|
||
logger.info(f"Request to model: {self.model}: {messages} and model args {model_args}") | ||
latency = False | ||
start_time = time.time() | ||
|
||
async for chunk in await litellm.acompletion(api_key=self.api_key, model=f"groq/{self.model}", messages=messages, stream=True, max_tokens=self.max_tokens, request_json=request_json): | ||
if not self.started_streaming: | ||
first_chunk_time = time.time() | ||
latency = first_chunk_time - start_time | ||
logger.info(f"LLM Latency: {latency:.2f} s") | ||
self.started_streaming = True | ||
if (text_chunk := chunk['choices'][0]['delta'].content) and not chunk['choices'][0].finish_reason: | ||
answer += text_chunk | ||
buffer += text_chunk | ||
|
||
if len(buffer) >= self.buffer_size and synthesize: | ||
text = ' '.join(buffer.split(" ")[:-1]) | ||
yield text, False, latency, False | ||
buffer = buffer.split(" ")[-1] | ||
|
||
if buffer: | ||
yield buffer, True, latency, False | ||
else: | ||
yield answer, True, latency, False | ||
self.started_streaming = False | ||
logger.info(f"Time to generate response {time.time() - start_time} {answer}") | ||
|
||
|
||
async def generate(self, messages, stream=False, request_json=False): | ||
text = "" | ||
model_args = self.model_args.copy() | ||
model_args["model"] = self.model | ||
model_args["messages"] = messages | ||
model_args["stream"] = stream | ||
|
||
if request_json: | ||
model_args['response_format'] = { | ||
"type": "json_object", | ||
"schema": json_to_pydantic_schema('{"classification_label": "classification label goes here"}') | ||
} | ||
logger.info(f'Request to Groq LLM {model_args}') | ||
try: | ||
completion = await litellm.acompletion(**model_args) | ||
text = completion.choices[0].message.content | ||
logger.debug(completion) # Changed to debug for non-error logging | ||
except Exception as e: | ||
logger.error(f'Error generating response {e}') | ||
return text |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM python:3.10.13-slim | ||
|
||
WORKDIR /app | ||
COPY ./requirements.txt /app | ||
COPY ./telephony_server/voximplant_api_server.py /app | ||
COPY ./credentials/voximplant.json /app/credentials/voximplant.json | ||
|
||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
EXPOSE 8001 | ||
|
||
CMD ["uvicorn", "voximplant_api_server:app", "--host", "0.0.0.0", "--port", "8000"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
python-dotenv==1.0.0 | ||
fastapi==0.108.0 | ||
plivo==4.47.0 | ||
# plivo==4.47.0 | ||
redis==5.0.1 | ||
twilio==8.9.0 | ||
uvicorn==0.22.0 | ||
voximplant-apiclient==1.11.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
aiobotocore==2.9.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these packages could be installed when installing bolna package in the docker There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I almost missed the mark, I'm not that good at python |
||
aiofiles==23.2.1 | ||
aiohttp==3.9.1 | ||
azure-cognitiveservices-speech==1.38.0 | ||
daily-python==0.9.1 | ||
fastapi==0.108.0 | ||
fastembed==0.2.7 | ||
litellm==1.40.20 | ||
numpy==1.26.1 | ||
openai>=1.10.0 | ||
pydantic==2.5.3 | ||
pydub==0.25.1 | ||
python-dateutil==2.8.2 | ||
python-dotenv==1.0.0 | ||
redis==5.0.1 | ||
requests==2.31.0 | ||
tiktoken>=0.6.0 | ||
twilio==8.9.0 | ||
uvicorn==0.22.0 | ||
websockets==10.4 | ||
onnxruntime>=1.16.3 | ||
scipy==1.11.4 | ||
uvloop==0.19.0 | ||
tokenizers | ||
huggingface-hub | ||
semantic-router | ||
sentence_transformers | ||
optimum[onnxruntime] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,10 @@ | ||
region: us | ||
version: '2' | ||
authtoken: <your-auth-token> | ||
authtoken: <auth-token> | ||
tunnels: | ||
twilio-app: | ||
addr: twilio-app:8001 | ||
proto: http | ||
plivo-app: | ||
addr: plivo-app:8002 | ||
proto: http | ||
bolna-app: | ||
addr: bolna-app:5001 | ||
proto: http |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import os | ||
import json | ||
import requests | ||
import uuid | ||
from voximplant.apiclient import VoximplantAPI, VoximplantException | ||
from dotenv import load_dotenv | ||
import redis.asyncio as redis | ||
from fastapi import FastAPI, HTTPException, Request | ||
from fastapi.responses import PlainTextResponse | ||
|
||
app = FastAPI(port=8000) | ||
load_dotenv() | ||
port = 8000 | ||
|
||
rule_id = os.getenv('VOXIMPLANT_RULE_ID') | ||
|
||
|
||
def populate_ngrok_tunnels(): | ||
response = requests.get("http://ngrok:4040/api/tunnels") # ngrok interface | ||
app_callback_url, websocket_url = None, None | ||
|
||
if response.status_code == 200: | ||
data = response.json() | ||
|
||
for tunnel in data['tunnels']: | ||
if tunnel['name'] == 'twilio-app': | ||
app_callback_url = tunnel['public_url'] | ||
elif tunnel['name'] == 'voximplant': | ||
websocket_url = tunnel['public_url'].replace('https:', 'wss:') | ||
|
||
return app_callback_url, websocket_url | ||
else: | ||
print(f"Error: Unable to fetch data. Status code: {response.status_code}") | ||
|
||
@app.post('/call') | ||
async def make_call(request: Request): | ||
try: | ||
call_details = await request.json() | ||
agent_id = call_details.get('agent_id', None) | ||
recipient_phone_number = call_details.get('recipient_phone_number') | ||
# recipient phone validation below | ||
if not recipient_phone_number: | ||
raise HTTPException(status_code=404, detail="Recipient phone number not provided") | ||
if not agent_id: | ||
raise HTTPException(status_code=404, detail="Agent not provided") | ||
|
||
if not call_details or "recipient_phone_number" not in call_details: | ||
raise HTTPException(status_code=404, detail="Recipient phone number not provided") | ||
|
||
app_callback_url, websocket_url = populate_ngrok_tunnels() | ||
|
||
print(f'app_callback_url: {app_callback_url}') | ||
print(f'websocket_url: {websocket_url}') | ||
print(f'recipient_phone_number: {recipient_phone_number}') | ||
|
||
voxapi = VoximplantAPI("credentials/voximplant.json") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you also please commit a test/temporary sample voximplant json file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Integration with Voximplant should be two-way, unfortunately there is no built-in ability to stream audio from a websocket, but on the bolna side everything is ok |
||
|
||
try: | ||
# Start the scenario | ||
res = voxapi.start_scenarios( | ||
rule_id=rule_id, # Replace with your actual rule ID | ||
script_custom_data=json.dumps({ | ||
"ws_url": websocket_url, | ||
"agent_id": agent_id, | ||
"destination": recipient_phone_number | ||
}) | ||
) | ||
print(res) | ||
except VoximplantException as e: | ||
raise HTTPException(status_code=500, detail=f"Error starting scenario: {e.message}") | ||
|
||
return PlainTextResponse("done", status_code=200) | ||
|
||
except Exception as e: | ||
print(f"Exception occurred in make_call: {e}") | ||
raise HTTPException(status_code=500, detail="Internal Server Error") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any specific reason of not using litellm for groq?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, I'm sorry for the very dirty pr, I'll correct it and write in the comments
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in cases when user need to add more then 1 litellm driven provider,
real case example:
I could be wrong, but in our project, which is a fork of your repository, we need to use both groq and antrophic at the same time, from env variables