diff --git a/.gitignore b/.gitignore index 33986b5..885b35e 100644 --- a/.gitignore +++ b/.gitignore @@ -167,4 +167,7 @@ agent_data/**/mp3 */__pycache__/ */*/__pycache__/ logs/ -agent_data/ \ No newline at end of file +agent_data/ + +local_setup/credentials/ +local_setup/ngrok-config.yml \ No newline at end of file diff --git a/bolna/llms/groq.py b/bolna/llms/groq.py new file mode 100644 index 0000000..deefb26 --- /dev/null +++ b/bolna/llms/groq.py @@ -0,0 +1,80 @@ +import os +import time +from dotenv import load_dotenv +import litellm + +from bolna.helpers.logger_config import configure_logger +from bolna.helpers.utils import json_to_pydantic_schema +from bolna.llms.llm import BaseLLM + + +logger = configure_logger(__name__) +load_dotenv() + +class GroqLLM(BaseLLM): + def __init__(self, model="llama3-8b-8192", max_tokens=100, buffer_size=40, temperature=0.0, **kwargs): + super().__init__(max_tokens, buffer_size) + self.model = model + self.started_streaming = False + self.model_args = {"max_tokens": max_tokens, "temperature": temperature, "model": self.model} + self.api_key = kwargs.get("llm_key", os.getenv('GROQ_API_KEY')) + + if self.api_key: + self.model_args["api_key"] = self.api_key + if "llm_key" in kwargs: + self.model_args["api_key"] = kwargs["llm_key"] + + + async def generate_stream(self, messages, synthesize=True, request_json=False): + answer, buffer = "", "" + model_args = self.model_args.copy() + model_args["messages"] = messages + model_args["stream"] = True + + logger.info(f"Request to model: {self.model}: {messages} and model args {model_args}") + latency = False + start_time = time.time() + + async for chunk in await litellm.acompletion(api_key=self.api_key, model=f"groq/{self.model}", messages=messages, stream=True, max_tokens=self.max_tokens, request_json=request_json): + if not self.started_streaming: + first_chunk_time = time.time() + latency = first_chunk_time - start_time + logger.info(f"LLM Latency: {latency:.2f} s") + self.started_streaming = True + if (text_chunk := chunk['choices'][0]['delta'].content) and not chunk['choices'][0].finish_reason: + answer += text_chunk + buffer += text_chunk + + if len(buffer) >= self.buffer_size and synthesize: + text = ' '.join(buffer.split(" ")[:-1]) + yield text, False, latency, False + buffer = buffer.split(" ")[-1] + + if buffer: + yield buffer, True, latency, False + else: + yield answer, True, latency, False + self.started_streaming = False + logger.info(f"Time to generate response {time.time() - start_time} {answer}") + + + async def generate(self, messages, stream=False, request_json=False): + text = "" + model_args = self.model_args.copy() + model_args["model"] = self.model + model_args["messages"] = messages + model_args["stream"] = stream + + if request_json: + model_args['response_format'] = { + "type": "json_object", + "schema": json_to_pydantic_schema('{"classification_label": "classification label goes here"}') + } + logger.info(f'Request to Groq LLM {model_args}') + try: + completion = await litellm.acompletion(**model_args) + text = completion.choices[0].message.content + logger.debug(completion) # Changed to debug for non-error logging + except Exception as e: + logger.error(f'Error generating response {e}') + return text \ No newline at end of file diff --git a/bolna/models.py b/bolna/models.py index 0a028f5..7a9f4ff 100644 --- a/bolna/models.py +++ b/bolna/models.py @@ -90,7 +90,7 @@ def validate_model(cls, value): @validator("language") def validate_language(cls, value): - return validate_attribute(value, ["en", "hi", "es", "fr", "pt", "ko", "ja", "zh", "de", "it", "pt-BR"]) + return validate_attribute(value, ["en", "hi", "es", "fr", "pt", "ko", "ja", "zh", "de", "it", "pt-BR", "ru"]) class Synthesizer(BaseModel): diff --git a/bolna/providers.py b/bolna/providers.py index 7d7634d..de479ba 100644 --- a/bolna/providers.py +++ b/bolna/providers.py @@ -1,3 +1,4 @@ +from bolna.llms.groq import GroqLLM from bolna.transcriber.bodhi_transcriber import BodhiTranscriber from .synthesizer import PollySynthesizer, XTTSSynthesizer, ElevenlabsSynthesizer, OPENAISynthesizer, FourieSynthesizer, DeepgramSynthesizer, MeloSynthesizer, StylettsSynthesizer, AzureSynthesizer from .transcriber import DeepgramTranscriber, WhisperTranscriber @@ -42,7 +43,7 @@ 'anyscale': LiteLLM, 'custom': OpenAiLLM, 'ola': OpenAiLLM, - 'groq': LiteLLM, + 'groq': GroqLLM, 'anthropic': LiteLLM } SUPPORTED_INPUT_HANDLERS = { diff --git a/bolna/synthesizer/openai_synthesizer.py b/bolna/synthesizer/openai_synthesizer.py index 1273d95..99aff29 100644 --- a/bolna/synthesizer/openai_synthesizer.py +++ b/bolna/synthesizer/openai_synthesizer.py @@ -52,7 +52,7 @@ async def __generate_stream(self, text): spoken_response = await self.async_client.audio.speech.create( model=self.model, voice=self.voice, - response_format="mp3", + response_format="wav", input=text ) @@ -71,7 +71,7 @@ async def generate(self): if not self.first_chunk_generated: meta_info["is_first_chunk"] = True self.first_chunk_generated = True - yield create_ws_data_packet(resample(convert_audio_to_wav(chunk, 'mp3'), self.sample_rate, format="wav"), meta_info) + yield create_ws_data_packet(resample(chunk, self.sample_rate, format="wav"), meta_info) if "end_of_llm_stream" in meta_info and meta_info["end_of_llm_stream"]: meta_info["end_of_synthesizer_stream"] = True diff --git a/bolna/transcriber/deepgram_transcriber.py b/bolna/transcriber/deepgram_transcriber.py index f847f63..bc77ebd 100644 --- a/bolna/transcriber/deepgram_transcriber.py +++ b/bolna/transcriber/deepgram_transcriber.py @@ -20,13 +20,13 @@ class DeepgramTranscriber(BaseTranscriber): - def __init__(self, telephony_provider, input_queue=None, model='nova-2', stream=True, language="en", endpointing="400", + def __init__(self, telephony_provider, input_queue=None, model='whisper', stream=True, language="en", endpointing="400", sampling_rate="16000", encoding="linear16", output_queue=None, keywords=None, process_interim_results="true", **kwargs): logger.info(f"Initializing transcriber {kwargs}") super().__init__(input_queue) self.endpointing = endpointing - self.language = language if model == "nova-2" else "en" + self.language = language if model == "whisper" else "en" self.stream = stream self.provider = telephony_provider self.heartbeat_task = None @@ -43,8 +43,8 @@ def __init__(self, telephony_provider, input_queue=None, model='nova-2', stream= self.transcription_cursor = 0.0 logger.info(f"self.stream: {self.stream}") self.interruption_signalled = False - if 'nova-2' not in self.model: - self.model = "nova-2" + if 'whisper' not in self.model: + self.model = "whisper" if not self.stream: self.api_url = f"https://{self.deepgram_host}/v1/listen?model={self.model}&filler_words=true&language={self.language}" self.session = aiohttp.ClientSession() diff --git a/local_setup/docker-compose.yml b/local_setup/docker-compose.yml index 24424cc..5033ce6 100644 --- a/local_setup/docker-compose.yml +++ b/local_setup/docker-compose.yml @@ -1,5 +1,4 @@ services: - # main bolna service bolna-app: image: bolna-app:latest @@ -26,7 +25,7 @@ services: # ngrok for local tunneling ngrok: image: ngrok/ngrok:latest - restart: unless-stopped + # restart: unless-stopped command: - "start" - "--all" @@ -36,7 +35,6 @@ services: - ./ngrok-config.yml:/etc/ngrok.yml ports: - 4040:4040 - ### Telephony servers ### twilio-app: image: twilio-app:latest @@ -65,3 +63,17 @@ services: - bolna-app env_file: - .env + + voximplant_app: + image: voximplant_app:latest + build: + context: . + dockerfile: dockerfiles/voximplant-server.Dockerfile + ports: + - "8000:8000" + depends_on: + - redis + - ngrok + - bolna-app + env_file: + - .env \ No newline at end of file diff --git a/local_setup/dockerfiles/voximplant-server.Dockerfile b/local_setup/dockerfiles/voximplant-server.Dockerfile new file mode 100644 index 0000000..988cd49 --- /dev/null +++ b/local_setup/dockerfiles/voximplant-server.Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.10.13-slim + +WORKDIR /app +COPY ./requirements.txt /app +COPY ./telephony_server/voximplant_api_server.py /app +COPY ./credentials/voximplant.json /app/credentials/voximplant.json + +RUN pip install --no-cache-dir -r requirements.txt + +EXPOSE 8001 + +CMD ["uvicorn", "voximplant_api_server:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/local_setup/quickstart_server.py b/local_setup/quickstart_server.py index e039970..b61a271 100644 --- a/local_setup/quickstart_server.py +++ b/local_setup/quickstart_server.py @@ -63,6 +63,7 @@ async def create_agent(agent_data: CreateAgentPayload): store_file(file_key=stored_prompt_file_path, file_data=agent_prompts, local=True) ) + logger.info(f"Storing agent data in redis {data_for_db}") return {"agent_id": agent_uuid, "state": "created"} diff --git a/local_setup/requirements.txt b/local_setup/requirements.txt index 1838e05..1297789 100644 --- a/local_setup/requirements.txt +++ b/local_setup/requirements.txt @@ -1,6 +1,7 @@ python-dotenv==1.0.0 fastapi==0.108.0 -plivo==4.47.0 +# plivo==4.47.0 redis==5.0.1 twilio==8.9.0 uvicorn==0.22.0 +voximplant-apiclient==1.11.0 \ No newline at end of file diff --git a/local_setup/requirements_bolna.txt b/local_setup/requirements_bolna.txt new file mode 100644 index 0000000..fbcdc52 --- /dev/null +++ b/local_setup/requirements_bolna.txt @@ -0,0 +1,28 @@ +aiobotocore==2.9.0 +aiofiles==23.2.1 +aiohttp==3.9.1 +azure-cognitiveservices-speech==1.38.0 +daily-python==0.9.1 +fastapi==0.108.0 +fastembed==0.2.7 +litellm==1.40.20 +numpy==1.26.1 +openai>=1.10.0 +pydantic==2.5.3 +pydub==0.25.1 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +redis==5.0.1 +requests==2.31.0 +tiktoken>=0.6.0 +twilio==8.9.0 +uvicorn==0.22.0 +websockets==10.4 +onnxruntime>=1.16.3 +scipy==1.11.4 +uvloop==0.19.0 +tokenizers +huggingface-hub +semantic-router +sentence_transformers +optimum[onnxruntime] diff --git a/local_setup/ngrok-config.yml b/local_setup/sample.ngrok-config.yml similarity index 63% rename from local_setup/ngrok-config.yml rename to local_setup/sample.ngrok-config.yml index c868f12..7ffb260 100644 --- a/local_setup/ngrok-config.yml +++ b/local_setup/sample.ngrok-config.yml @@ -1,13 +1,10 @@ region: us version: '2' -authtoken: +authtoken: tunnels: twilio-app: addr: twilio-app:8001 proto: http - plivo-app: - addr: plivo-app:8002 - proto: http bolna-app: addr: bolna-app:5001 proto: http diff --git a/local_setup/telephony_server/voximplant_api_server.py b/local_setup/telephony_server/voximplant_api_server.py new file mode 100644 index 0000000..8dd373b --- /dev/null +++ b/local_setup/telephony_server/voximplant_api_server.py @@ -0,0 +1,76 @@ +import os +import json +import requests +import uuid +from voximplant.apiclient import VoximplantAPI, VoximplantException +from dotenv import load_dotenv +import redis.asyncio as redis +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import PlainTextResponse + +app = FastAPI(port=8000) +load_dotenv() +port = 8000 + +rule_id = os.getenv('VOXIMPLANT_RULE_ID') + + +def populate_ngrok_tunnels(): + response = requests.get("http://ngrok:4040/api/tunnels") # ngrok interface + app_callback_url, websocket_url = None, None + + if response.status_code == 200: + data = response.json() + + for tunnel in data['tunnels']: + if tunnel['name'] == 'twilio-app': + app_callback_url = tunnel['public_url'] + elif tunnel['name'] == 'voximplant': + websocket_url = tunnel['public_url'].replace('https:', 'wss:') + + return app_callback_url, websocket_url + else: + print(f"Error: Unable to fetch data. Status code: {response.status_code}") + +@app.post('/call') +async def make_call(request: Request): + try: + call_details = await request.json() + agent_id = call_details.get('agent_id', None) + recipient_phone_number = call_details.get('recipient_phone_number') + # recipient phone validation below + if not recipient_phone_number: + raise HTTPException(status_code=404, detail="Recipient phone number not provided") + if not agent_id: + raise HTTPException(status_code=404, detail="Agent not provided") + + if not call_details or "recipient_phone_number" not in call_details: + raise HTTPException(status_code=404, detail="Recipient phone number not provided") + + app_callback_url, websocket_url = populate_ngrok_tunnels() + + print(f'app_callback_url: {app_callback_url}') + print(f'websocket_url: {websocket_url}') + print(f'recipient_phone_number: {recipient_phone_number}') + + voxapi = VoximplantAPI("credentials/voximplant.json") + + try: + # Start the scenario + res = voxapi.start_scenarios( + rule_id=rule_id, # Replace with your actual rule ID + script_custom_data=json.dumps({ + "ws_url": websocket_url, + "agent_id": agent_id, + "destination": recipient_phone_number + }) + ) + print(res) + except VoximplantException as e: + raise HTTPException(status_code=500, detail=f"Error starting scenario: {e.message}") + + return PlainTextResponse("done", status_code=200) + + except Exception as e: + print(f"Exception occurred in make_call: {e}") + raise HTTPException(status_code=500, detail="Internal Server Error") \ No newline at end of file