Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Added ru locale, added voximplant adapter, groq provider, switched to whisper in deepgram (multilang) #304

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,7 @@ agent_data/**/mp3
*/__pycache__/
*/*/__pycache__/
logs/
agent_data/
agent_data/

local_setup/credentials/
local_setup/ngrok-config.yml
80 changes: 80 additions & 0 deletions bolna/llms/groq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import os
import time
from dotenv import load_dotenv
import litellm

from bolna.helpers.logger_config import configure_logger
from bolna.helpers.utils import json_to_pydantic_schema
from bolna.llms.llm import BaseLLM


logger = configure_logger(__name__)
load_dotenv()

class GroqLLM(BaseLLM):
def __init__(self, model="llama3-8b-8192", max_tokens=100, buffer_size=40, temperature=0.0, **kwargs):
super().__init__(max_tokens, buffer_size)
self.model = model
self.started_streaming = False
self.model_args = {"max_tokens": max_tokens, "temperature": temperature, "model": self.model}
self.api_key = kwargs.get("llm_key", os.getenv('GROQ_API_KEY'))

if self.api_key:
self.model_args["api_key"] = self.api_key
if "llm_key" in kwargs:
self.model_args["api_key"] = kwargs["llm_key"]


async def generate_stream(self, messages, synthesize=True, request_json=False):
answer, buffer = "", ""
model_args = self.model_args.copy()
model_args["messages"] = messages
model_args["stream"] = True

logger.info(f"Request to model: {self.model}: {messages} and model args {model_args}")
latency = False
start_time = time.time()

async for chunk in await litellm.acompletion(api_key=self.api_key, model=f"groq/{self.model}", messages=messages, stream=True, max_tokens=self.max_tokens, request_json=request_json):
if not self.started_streaming:
first_chunk_time = time.time()
latency = first_chunk_time - start_time
logger.info(f"LLM Latency: {latency:.2f} s")
self.started_streaming = True
if (text_chunk := chunk['choices'][0]['delta'].content) and not chunk['choices'][0].finish_reason:
answer += text_chunk
buffer += text_chunk

if len(buffer) >= self.buffer_size and synthesize:
text = ' '.join(buffer.split(" ")[:-1])
yield text, False, latency, False
buffer = buffer.split(" ")[-1]

if buffer:
yield buffer, True, latency, False
else:
yield answer, True, latency, False
self.started_streaming = False
logger.info(f"Time to generate response {time.time() - start_time} {answer}")


async def generate(self, messages, stream=False, request_json=False):
text = ""
model_args = self.model_args.copy()
model_args["model"] = self.model
model_args["messages"] = messages
model_args["stream"] = stream

if request_json:
model_args['response_format'] = {
"type": "json_object",
"schema": json_to_pydantic_schema('{"classification_label": "classification label goes here"}')
}
logger.info(f'Request to Groq LLM {model_args}')
try:
completion = await litellm.acompletion(**model_args)
text = completion.choices[0].message.content
logger.debug(completion) # Changed to debug for non-error logging
except Exception as e:
logger.error(f'Error generating response {e}')
return text
2 changes: 1 addition & 1 deletion bolna/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def validate_model(cls, value):

@validator("language")
def validate_language(cls, value):
return validate_attribute(value, ["en", "hi", "es", "fr", "pt", "ko", "ja", "zh", "de", "it", "pt-BR"])
return validate_attribute(value, ["en", "hi", "es", "fr", "pt", "ko", "ja", "zh", "de", "it", "pt-BR", "ru"])


class Synthesizer(BaseModel):
Expand Down
3 changes: 2 additions & 1 deletion bolna/providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from bolna.llms.groq import GroqLLM
from bolna.transcriber.bodhi_transcriber import BodhiTranscriber
from .synthesizer import PollySynthesizer, XTTSSynthesizer, ElevenlabsSynthesizer, OPENAISynthesizer, FourieSynthesizer, DeepgramSynthesizer, MeloSynthesizer, StylettsSynthesizer, AzureSynthesizer
from .transcriber import DeepgramTranscriber, WhisperTranscriber
Expand Down Expand Up @@ -42,7 +43,7 @@
'anyscale': LiteLLM,
'custom': OpenAiLLM,
'ola': OpenAiLLM,
'groq': LiteLLM,
'groq': GroqLLM,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any specific reason of not using litellm for groq?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, I'm sorry for the very dirty pr, I'll correct it and write in the comments

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any specific reason of not using litellm for groq?

in cases when user need to add more then 1 litellm driven provider,

real case example:
I could be wrong, but in our project, which is a fork of your repository, we need to use both groq and antrophic at the same time, from env variables

'anthropic': LiteLLM
}
SUPPORTED_INPUT_HANDLERS = {
Expand Down
4 changes: 2 additions & 2 deletions bolna/synthesizer/openai_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async def __generate_stream(self, text):
spoken_response = await self.async_client.audio.speech.create(
model=self.model,
voice=self.voice,
response_format="mp3",
response_format="wav",
input=text
)

Expand All @@ -71,7 +71,7 @@ async def generate(self):
if not self.first_chunk_generated:
meta_info["is_first_chunk"] = True
self.first_chunk_generated = True
yield create_ws_data_packet(resample(convert_audio_to_wav(chunk, 'mp3'), self.sample_rate, format="wav"), meta_info)
yield create_ws_data_packet(resample(chunk, self.sample_rate, format="wav"), meta_info)

if "end_of_llm_stream" in meta_info and meta_info["end_of_llm_stream"]:
meta_info["end_of_synthesizer_stream"] = True
Expand Down
8 changes: 4 additions & 4 deletions bolna/transcriber/deepgram_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@


class DeepgramTranscriber(BaseTranscriber):
def __init__(self, telephony_provider, input_queue=None, model='nova-2', stream=True, language="en", endpointing="400",
def __init__(self, telephony_provider, input_queue=None, model='whisper', stream=True, language="en", endpointing="400",
sampling_rate="16000", encoding="linear16", output_queue=None, keywords=None,
process_interim_results="true", **kwargs):
logger.info(f"Initializing transcriber {kwargs}")
super().__init__(input_queue)
self.endpointing = endpointing
self.language = language if model == "nova-2" else "en"
self.language = language if model == "whisper" else "en"
self.stream = stream
self.provider = telephony_provider
self.heartbeat_task = None
Expand All @@ -43,8 +43,8 @@ def __init__(self, telephony_provider, input_queue=None, model='nova-2', stream=
self.transcription_cursor = 0.0
logger.info(f"self.stream: {self.stream}")
self.interruption_signalled = False
if 'nova-2' not in self.model:
self.model = "nova-2"
if 'whisper' not in self.model:
self.model = "whisper"
if not self.stream:
self.api_url = f"https://{self.deepgram_host}/v1/listen?model={self.model}&filler_words=true&language={self.language}"
self.session = aiohttp.ClientSession()
Expand Down
18 changes: 15 additions & 3 deletions local_setup/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
services:

# main bolna service
bolna-app:
image: bolna-app:latest
Expand All @@ -26,7 +25,7 @@ services:
# ngrok for local tunneling
ngrok:
image: ngrok/ngrok:latest
restart: unless-stopped
# restart: unless-stopped
command:
- "start"
- "--all"
Expand All @@ -36,7 +35,6 @@ services:
- ./ngrok-config.yml:/etc/ngrok.yml
ports:
- 4040:4040

### Telephony servers ###
twilio-app:
image: twilio-app:latest
Expand Down Expand Up @@ -65,3 +63,17 @@ services:
- bolna-app
env_file:
- .env

voximplant_app:
image: voximplant_app:latest
build:
context: .
dockerfile: dockerfiles/voximplant-server.Dockerfile
ports:
- "8000:8000"
depends_on:
- redis
- ngrok
- bolna-app
env_file:
- .env
12 changes: 12 additions & 0 deletions local_setup/dockerfiles/voximplant-server.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM python:3.10.13-slim

WORKDIR /app
COPY ./requirements.txt /app
COPY ./telephony_server/voximplant_api_server.py /app
COPY ./credentials/voximplant.json /app/credentials/voximplant.json

RUN pip install --no-cache-dir -r requirements.txt

EXPOSE 8001

CMD ["uvicorn", "voximplant_api_server:app", "--host", "0.0.0.0", "--port", "8000"]
1 change: 1 addition & 0 deletions local_setup/quickstart_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ async def create_agent(agent_data: CreateAgentPayload):
store_file(file_key=stored_prompt_file_path, file_data=agent_prompts, local=True)
)

logger.info(f"Storing agent data in redis {data_for_db}")
return {"agent_id": agent_uuid, "state": "created"}


Expand Down
3 changes: 2 additions & 1 deletion local_setup/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
python-dotenv==1.0.0
fastapi==0.108.0
plivo==4.47.0
# plivo==4.47.0
redis==5.0.1
twilio==8.9.0
uvicorn==0.22.0
voximplant-apiclient==1.11.0
28 changes: 28 additions & 0 deletions local_setup/requirements_bolna.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
aiobotocore==2.9.0
Copy link
Member

@prateeksachan prateeksachan Jul 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these packages could be installed when installing bolna package in the docker

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I almost missed the mark, I'm not that good at python

aiofiles==23.2.1
aiohttp==3.9.1
azure-cognitiveservices-speech==1.38.0
daily-python==0.9.1
fastapi==0.108.0
fastembed==0.2.7
litellm==1.40.20
numpy==1.26.1
openai>=1.10.0
pydantic==2.5.3
pydub==0.25.1
python-dateutil==2.8.2
python-dotenv==1.0.0
redis==5.0.1
requests==2.31.0
tiktoken>=0.6.0
twilio==8.9.0
uvicorn==0.22.0
websockets==10.4
onnxruntime>=1.16.3
scipy==1.11.4
uvloop==0.19.0
tokenizers
huggingface-hub
semantic-router
sentence_transformers
optimum[onnxruntime]
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
region: us
version: '2'
authtoken: <your-auth-token>
authtoken: <auth-token>
tunnels:
twilio-app:
addr: twilio-app:8001
proto: http
plivo-app:
addr: plivo-app:8002
proto: http
bolna-app:
addr: bolna-app:5001
proto: http
76 changes: 76 additions & 0 deletions local_setup/telephony_server/voximplant_api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import json
import requests
import uuid
from voximplant.apiclient import VoximplantAPI, VoximplantException
from dotenv import load_dotenv
import redis.asyncio as redis
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import PlainTextResponse

app = FastAPI(port=8000)
load_dotenv()
port = 8000

rule_id = os.getenv('VOXIMPLANT_RULE_ID')


def populate_ngrok_tunnels():
response = requests.get("http://ngrok:4040/api/tunnels") # ngrok interface
app_callback_url, websocket_url = None, None

if response.status_code == 200:
data = response.json()

for tunnel in data['tunnels']:
if tunnel['name'] == 'twilio-app':
app_callback_url = tunnel['public_url']
elif tunnel['name'] == 'voximplant':
websocket_url = tunnel['public_url'].replace('https:', 'wss:')

return app_callback_url, websocket_url
else:
print(f"Error: Unable to fetch data. Status code: {response.status_code}")

@app.post('/call')
async def make_call(request: Request):
try:
call_details = await request.json()
agent_id = call_details.get('agent_id', None)
recipient_phone_number = call_details.get('recipient_phone_number')
# recipient phone validation below
if not recipient_phone_number:
raise HTTPException(status_code=404, detail="Recipient phone number not provided")
if not agent_id:
raise HTTPException(status_code=404, detail="Agent not provided")

if not call_details or "recipient_phone_number" not in call_details:
raise HTTPException(status_code=404, detail="Recipient phone number not provided")

app_callback_url, websocket_url = populate_ngrok_tunnels()

print(f'app_callback_url: {app_callback_url}')
print(f'websocket_url: {websocket_url}')
print(f'recipient_phone_number: {recipient_phone_number}')

voxapi = VoximplantAPI("credentials/voximplant.json")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also please commit a test/temporary sample voximplant json file.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integration with Voximplant should be two-way, unfortunately there is no built-in ability to stream audio from a websocket, but on the bolna side everything is ok


try:
# Start the scenario
res = voxapi.start_scenarios(
rule_id=rule_id, # Replace with your actual rule ID
script_custom_data=json.dumps({
"ws_url": websocket_url,
"agent_id": agent_id,
"destination": recipient_phone_number
})
)
print(res)
except VoximplantException as e:
raise HTTPException(status_code=500, detail=f"Error starting scenario: {e.message}")

return PlainTextResponse("done", status_code=200)

except Exception as e:
print(f"Exception occurred in make_call: {e}")
raise HTTPException(status_code=500, detail="Internal Server Error")