bolna-ai · iowathe3rd · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · prateeksachan
diff --git a/.gitignore b/.gitignore
@@ -167,4 +167,7 @@ agent_data/**/mp3
 */__pycache__/
 */*/__pycache__/
 logs/
-agent_data/
+agent_data/
+
+local_setup/credentials/
+local_setup/ngrok-config.yml
diff --git a/bolna/llms/groq.py b/bolna/llms/groq.py
@@ -0,0 +1,80 @@
+import os
+import time
+from dotenv import load_dotenv
+import litellm
+
+from bolna.helpers.logger_config import configure_logger
+from bolna.helpers.utils import json_to_pydantic_schema
+from bolna.llms.llm import BaseLLM
+
+
+logger = configure_logger(__name__)
+load_dotenv()
+
+class GroqLLM(BaseLLM):
+    def __init__(self, model="llama3-8b-8192", max_tokens=100, buffer_size=40, temperature=0.0, **kwargs):
+        super().__init__(max_tokens, buffer_size)
+        self.model = model
+        self.started_streaming = False
+        self.model_args = {"max_tokens": max_tokens, "temperature": temperature, "model": self.model}
+        self.api_key = kwargs.get("llm_key", os.getenv('GROQ_API_KEY'))
+
+        if self.api_key:
+            self.model_args["api_key"] = self.api_key
+        if "llm_key" in kwargs:
+            self.model_args["api_key"] = kwargs["llm_key"]
+
+
+    async def generate_stream(self, messages, synthesize=True, request_json=False):
+        answer, buffer = "", ""
+        model_args = self.model_args.copy()
+        model_args["messages"] = messages
+        model_args["stream"] = True
+
+        logger.info(f"Request to model: {self.model}: {messages} and model args {model_args}")
+        latency = False
+        start_time = time.time()
+
+        async for chunk in await litellm.acompletion(api_key=self.api_key, model=f"groq/{self.model}", messages=messages, stream=True, max_tokens=self.max_tokens, request_json=request_json):
+            if not self.started_streaming:
+                first_chunk_time = time.time()
+                latency = first_chunk_time - start_time
+                logger.info(f"LLM Latency: {latency:.2f} s")
+                self.started_streaming = True
+            if (text_chunk := chunk['choices'][0]['delta'].content) and not chunk['choices'][0].finish_reason:
+                answer += text_chunk
+                buffer += text_chunk
+
+                if len(buffer) >= self.buffer_size and synthesize:
+                    text = ' '.join(buffer.split(" ")[:-1])
+                    yield text, False, latency, False
+                    buffer = buffer.split(" ")[-1]
+
+        if buffer:
+            yield buffer, True, latency, False
+        else:
+            yield answer, True, latency, False
+        self.started_streaming = False
+        logger.info(f"Time to generate response {time.time() - start_time} {answer}")
+
+
+    async def generate(self, messages, stream=False, request_json=False):
+        text = ""
+        model_args = self.model_args.copy()
+        model_args["model"] = self.model
+        model_args["messages"] = messages
+        model_args["stream"] = stream
+
+        if request_json:
+            model_args['response_format'] = {
+                "type": "json_object",
+                "schema": json_to_pydantic_schema('{"classification_label": "classification label goes here"}')
+            }
+        logger.info(f'Request to Groq LLM {model_args}')
+        try:
+            completion = await litellm.acompletion(**model_args)
+            text = completion.choices[0].message.content
+            logger.debug(completion)  # Changed to debug for non-error logging
+        except Exception as e:
+            logger.error(f'Error generating response {e}')
+        return text
diff --git a/bolna/models.py b/bolna/models.py
@@ -90,7 +90,7 @@ def validate_model(cls, value):
 
     @validator("language")
     def validate_language(cls, value):
-        return validate_attribute(value, ["en", "hi", "es", "fr", "pt", "ko", "ja", "zh", "de", "it", "pt-BR"])
+        return validate_attribute(value, ["en", "hi", "es", "fr", "pt", "ko", "ja", "zh", "de", "it", "pt-BR", "ru"])
 
 
 class Synthesizer(BaseModel):

diff --git a/bolna/providers.py b/bolna/providers.py
@@ -1,3 +1,4 @@
+from bolna.llms.groq import GroqLLM
 from bolna.transcriber.bodhi_transcriber import BodhiTranscriber
 from .synthesizer import PollySynthesizer, XTTSSynthesizer, ElevenlabsSynthesizer, OPENAISynthesizer, FourieSynthesizer, DeepgramSynthesizer, MeloSynthesizer, StylettsSynthesizer, AzureSynthesizer
 from .transcriber import DeepgramTranscriber, WhisperTranscriber
@@ -42,7 +43,7 @@
     'anyscale': LiteLLM,
     'custom': OpenAiLLM,
     'ola': OpenAiLLM,
-    'groq': LiteLLM,
+    'groq': GroqLLM,
     'anthropic': LiteLLM
 }
 SUPPORTED_INPUT_HANDLERS = {

diff --git a/bolna/synthesizer/openai_synthesizer.py b/bolna/synthesizer/openai_synthesizer.py
@@ -52,7 +52,7 @@ async def __generate_stream(self, text):
         spoken_response = await self.async_client.audio.speech.create(
             model=self.model,
             voice=self.voice,
-            response_format="mp3",
+            response_format="wav",
             input=text
             )
 
@@ -71,7 +71,7 @@ async def generate(self):
                         if not self.first_chunk_generated:
                             meta_info["is_first_chunk"] = True
                             self.first_chunk_generated = True
-                        yield create_ws_data_packet(resample(convert_audio_to_wav(chunk, 'mp3'), self.sample_rate, format="wav"), meta_info)
+                        yield create_ws_data_packet(resample(chunk, self.sample_rate, format="wav"), meta_info)
 
                     if "end_of_llm_stream" in meta_info and meta_info["end_of_llm_stream"]:
                         meta_info["end_of_synthesizer_stream"] = True

diff --git a/bolna/transcriber/deepgram_transcriber.py b/bolna/transcriber/deepgram_transcriber.py
@@ -20,13 +20,13 @@
 
 
 class DeepgramTranscriber(BaseTranscriber):
-    def __init__(self, telephony_provider, input_queue=None, model='nova-2', stream=True, language="en", endpointing="400",
+    def __init__(self, telephony_provider, input_queue=None, model='whisper', stream=True, language="en", endpointing="400",
                  sampling_rate="16000", encoding="linear16", output_queue=None, keywords=None,
                  process_interim_results="true", **kwargs):
         logger.info(f"Initializing transcriber {kwargs}")
         super().__init__(input_queue)
         self.endpointing = endpointing
-        self.language = language if model == "nova-2" else "en"
+        self.language = language if model == "whisper" else "en"
         self.stream = stream
         self.provider = telephony_provider
         self.heartbeat_task = None
@@ -43,8 +43,8 @@ def __init__(self, telephony_provider, input_queue=None, model='nova-2', stream=
         self.transcription_cursor = 0.0
         logger.info(f"self.stream: {self.stream}")
         self.interruption_signalled = False
-        if 'nova-2' not in self.model:
-            self.model = "nova-2"
+        if 'whisper' not in self.model:
+            self.model = "whisper"
         if not self.stream:
             self.api_url = f"https://{self.deepgram_host}/v1/listen?model={self.model}&filler_words=true&language={self.language}"
             self.session = aiohttp.ClientSession()

diff --git a/local_setup/docker-compose.yml b/local_setup/docker-compose.yml
@@ -1,5 +1,4 @@
 services:
-
   # main bolna service
   bolna-app:
     image: bolna-app:latest
@@ -26,7 +25,7 @@ services:
   # ngrok for local tunneling
   ngrok:
     image: ngrok/ngrok:latest
-    restart: unless-stopped
+    # restart: unless-stopped
     command:
       - "start"
       - "--all"
@@ -36,7 +35,6 @@ services:
       - ./ngrok-config.yml:/etc/ngrok.yml
     ports:
       - 4040:4040
-
   ### Telephony servers ###
   twilio-app:
     image: twilio-app:latest
@@ -65,3 +63,17 @@ services:
       - bolna-app
     env_file:
       - .env
+
+  voximplant_app:
+    image: voximplant_app:latest
+    build: 
+      context: .
+      dockerfile: dockerfiles/voximplant-server.Dockerfile
+    ports:
+      - "8000:8000"
+    depends_on:
+      - redis
+      - ngrok
+      - bolna-app
+    env_file:
+      - .env
diff --git a/local_setup/dockerfiles/voximplant-server.Dockerfile b/local_setup/dockerfiles/voximplant-server.Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.10.13-slim
+
+WORKDIR /app
+COPY ./requirements.txt /app
+COPY ./telephony_server/voximplant_api_server.py /app
+COPY ./credentials/voximplant.json /app/credentials/voximplant.json
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+EXPOSE 8001
+
+CMD ["uvicorn", "voximplant_api_server:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/local_setup/quickstart_server.py b/local_setup/quickstart_server.py
@@ -63,6 +63,7 @@ async def create_agent(agent_data: CreateAgentPayload):
         store_file(file_key=stored_prompt_file_path, file_data=agent_prompts, local=True)
     )
 
+    logger.info(f"Storing agent data in redis {data_for_db}")
     return {"agent_id": agent_uuid, "state": "created"}
 
 

diff --git a/local_setup/requirements.txt b/local_setup/requirements.txt
@@ -1,6 +1,7 @@
 python-dotenv==1.0.0
 fastapi==0.108.0
-plivo==4.47.0
+# plivo==4.47.0
 redis==5.0.1
 twilio==8.9.0
 uvicorn==0.22.0
+voximplant-apiclient==1.11.0
diff --git a/local_setup/requirements_bolna.txt b/local_setup/requirements_bolna.txt
@@ -0,0 +1,28 @@
+aiobotocore==2.9.0
+aiofiles==23.2.1
+aiohttp==3.9.1
+azure-cognitiveservices-speech==1.38.0
+daily-python==0.9.1
+fastapi==0.108.0
+fastembed==0.2.7
+litellm==1.40.20
+numpy==1.26.1
+openai>=1.10.0
+pydantic==2.5.3
+pydub==0.25.1
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+redis==5.0.1
+requests==2.31.0
+tiktoken>=0.6.0
+twilio==8.9.0
+uvicorn==0.22.0
+websockets==10.4
+onnxruntime>=1.16.3
+scipy==1.11.4
+uvloop==0.19.0
+tokenizers
+huggingface-hub
+semantic-router
+sentence_transformers
+optimum[onnxruntime]
diff --git a/local_setup/ngrok-config.yml → local_setup/sample.ngrok-config.yml b/local_setup/ngrok-config.yml → local_setup/sample.ngrok-config.yml
@@ -1,13 +1,10 @@
 region: us
 version: '2'
-authtoken: <your-auth-token>
+authtoken: <auth-token>
 tunnels:
   twilio-app:
     addr: twilio-app:8001
     proto: http
-  plivo-app:
-    addr: plivo-app:8002
-    proto: http
   bolna-app:
     addr: bolna-app:5001
     proto: http
diff --git a/local_setup/telephony_server/voximplant_api_server.py b/local_setup/telephony_server/voximplant_api_server.py
@@ -0,0 +1,76 @@
+import os
+import json
+import requests
+import uuid
+from voximplant.apiclient import VoximplantAPI, VoximplantException
+from dotenv import load_dotenv
+import redis.asyncio as redis
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import PlainTextResponse
+
+app = FastAPI(port=8000)
+load_dotenv()
+port = 8000
+
+rule_id = os.getenv('VOXIMPLANT_RULE_ID')
+
+
+def populate_ngrok_tunnels():
+    response = requests.get("http://ngrok:4040/api/tunnels")  # ngrok interface
+    app_callback_url, websocket_url = None, None
+
+    if response.status_code == 200:
+        data = response.json()
+
+        for tunnel in data['tunnels']:
+            if tunnel['name'] == 'twilio-app':
+                app_callback_url = tunnel['public_url']
+            elif tunnel['name'] == 'voximplant':
+                websocket_url = tunnel['public_url'].replace('https:', 'wss:')
+
+        return app_callback_url, websocket_url
+    else:
+        print(f"Error: Unable to fetch data. Status code: {response.status_code}")
+
+@app.post('/call')
+async def make_call(request: Request):
+    try:
+        call_details = await request.json()
+        agent_id = call_details.get('agent_id', None)
+        recipient_phone_number = call_details.get('recipient_phone_number')
+        # recipient phone validation below
+        if not recipient_phone_number:
+            raise HTTPException(status_code=404, detail="Recipient phone number not provided")
+        if not agent_id:
+            raise HTTPException(status_code=404, detail="Agent not provided")
+
+        if not call_details or "recipient_phone_number" not in call_details:
+            raise HTTPException(status_code=404, detail="Recipient phone number not provided")
+
+        app_callback_url, websocket_url = populate_ngrok_tunnels()
+
+        print(f'app_callback_url: {app_callback_url}')
+        print(f'websocket_url: {websocket_url}')
+        print(f'recipient_phone_number: {recipient_phone_number}')
+
+        voxapi = VoximplantAPI("credentials/voximplant.json")
+
+        try:
+            # Start the scenario
+            res = voxapi.start_scenarios(
+                rule_id=rule_id,  # Replace with your actual rule ID
+                script_custom_data=json.dumps({
+                    "ws_url": websocket_url,
+                    "agent_id": agent_id,
+                    "destination": recipient_phone_number
+                })
+            )
+            print(res)
+        except VoximplantException as e:
+            raise HTTPException(status_code=500, detail=f"Error starting scenario: {e.message}")
+
+        return PlainTextResponse("done", status_code=200)
+
+    except Exception as e:
+        print(f"Exception occurred in make_call: {e}")
+        raise HTTPException(status_code=500, detail="Internal Server Error")