voxos-ai · marmikcfc · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024
diff --git a/.env.sample b/.env.sample
@@ -5,13 +5,21 @@ OPENAI_MODEL=gpt-3.5-turbo
 REDIS_URL=redis://redis:6379
 WHISPER_URL=
 STYLE_TTS=
+BUCKET_NAME=bolna-user-prompts-sandbox
+
+# RAG credentials
+LANCEDB_DIR=
+LLAMA_CLOUD_API_KEY=
+
+# AWS credentials
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
 
 # Twilio credentials
 TWILIO_ACCOUNT_SID=
 TWILIO_AUTH_TOKEN=
 TWILIO_PHONE_NUMBER=
 
-
 # Plivo credentials
 PLIVO_AUTH_ID=
 PLIVO_AUTH_TOKEN=

diff --git a/.gitignore b/.gitignore
@@ -167,4 +167,5 @@ agent_data/**/mp3
 */__pycache__/
 */*/__pycache__/
 logs/
-agent_data/
+agent_data/
+testing/
diff --git a/bolna/agent_manager/task_manager.py b/bolna/agent_manager/task_manager.py
@@ -11,6 +11,7 @@
 from datetime import datetime
 
 import aiohttp
+from bolna.bolna.agent_types.llama_index_rag_agent import LlamaIndexRag
 
 from bolna.constants import ACCIDENTAL_INTERRUPTION_PHRASES, DEFAULT_USER_ONLINE_MESSAGE, DEFAULT_USER_ONLINE_MESSAGE_TRIGGER_DURATION, FILLER_DICT, PRE_FUNCTION_CALL_MESSAGE
 from bolna.helpers.function_calling_helpers import trigger_api
@@ -52,7 +53,12 @@ def __init__(self, assistant_name, task_id, task, ws, input_parameters=None, con
             logger.info(f"API TOOLS is present {task['tools_config']['api_tools']}")
             self.kwargs['api_tools'] = task['tools_config']['api_tools']
 
-        if self.__has_extra_config():
+        # if self.__has_extra_config():
+        #     pass
+        #     #self.kwargs['assistant_id'] = task['tools_config']["llm_agent"]['extra_config']['assistant_id']
+        #     #logger.info(f"Assistant id for agent is {self.kwargs['assistant_id']}")
+
+        if self.__is_openai_assistant():
             self.kwargs['assistant_id'] = task['tools_config']["llm_agent"]['extra_config']['assistant_id']
             logger.info(f"Assistant id for agent is {self.kwargs['assistant_id']}")
 
@@ -166,12 +172,44 @@ def __init__(self, assistant_name, task_id, task, ws, input_parameters=None, con
         #self.stream = not turn_based_conversation #Currently we are allowing only realtime conversation based usecases. Hence it'll always be true unless connected through dashboard
         self.is_local = False
         self.llm_config = None
-        if self.task_config["tools_config"]["llm_agent"] is not None:
-            self.llm_config = {
-                "model": self.task_config["tools_config"]["llm_agent"]["model"],
-                "max_tokens": self.task_config["tools_config"]["llm_agent"]["max_tokens"],
-                "provider": self.task_config["tools_config"]["llm_agent"]["provider"]
-            }
+        self.llm_config_map = {}
+        self.llm_agent_map = {}
+        if self.__is_multiagent():
+            logger.info(f"Gotta write the code for this shit as well")
+            for agent, config in self.task_config["tools_config"]["llm_agent"]['extra_config']['agent_map'].items():
+                self.llm_config_map[agent] = config.copy()
+                self.llm_config_map[agent]['buffer_size'] = self.task_config["tools_config"]["synthesizer"]['buffer_size']
+                if 'assistant_id' in config:
+                    self.llm_config_map[agent]['agent_type'] = "openai_assistant"
+
+        elif not self.__is_openai_assistant():
+            logger.info(f"NOT OPEN AI ASSISTANT")
+            if self.task_config["tools_config"]["llm_agent"] is not None:                
+                if self.__is_knowledgebase_agent():
+                    self.llm_agent_config = self.task_config["tools_config"]["llm_agent"]
+                    self.llm_config = {
+                        "model": self.llm_agent_config['extra_config']['model'],
+                        "max_tokens": self.llm_agent_config['extra_config']['max_tokens'],
+                        "provider": self.llm_agent_config['extra_config']['provider'],
+                    }
+                else:
+                    agent_type = self.task_config["tools_config"]["llm_agent"].get("agent_type", None)
+                    if not agent_type:
+                        self.llm_agent_config = self.task_config["tools_config"]["llm_agent"]
+                    else:
+                        self.llm_agent_config = self.task_config["tools_config"]["llm_agent"]['extra_config']
+
+                    self.llm_config = {
+                            "model": self.llm_agent_config['model'],
+                            "max_tokens": self.llm_agent_config['max_tokens'],
+                            "provider": self.llm_agent_config['provider'],
+                        }
+
+
+                logger.info(f"SETTING FOLLOW UP TASK DETAILS {self.llm_agent_config}")
+
+
+
 
         # Output stuff
         self.output_task = None
@@ -230,8 +268,12 @@ def __init__(self, assistant_name, task_id, task, ws, input_parameters=None, con
                 else:
                     self.__setup_routes(self.routes)
                     logger.info(f"Time to setup routes {time.time() - start_time}")
-
-
+
+            if self.__is_multiagent():
+                guardrails_meta = self.kwargs.pop('routes', None)
+                self.agent_routing = guardrails_meta['agent_routing_config']['route_layer']
+                self.default_agent = task['tools_config']['llm_agent']['extra_config']['default_agent']
+                logger.info(f"Inisialised with default agent {self.default_agent}, agent_routing {self.agent_routing}")                
             # for long pauses and rushing
             if self.conversation_config is not None:
                 self.minimum_wait_duration = self.task_config["tools_config"]["transcriber"]["endpointing"]
@@ -307,13 +349,61 @@ def __init__(self, assistant_name, task_id, task, ws, input_parameters=None, con
                     logger.info("Not using fillers to decrease latency")
                 else:
                     self.filler_preset_directory = f"{os.getenv('FILLERS_PRESETS_DIR')}/{self.synthesizer_voice.lower()}"
+            # setting transcriber
+            self.__setup_transcriber()
+            # setting synthesizer
+            self.__setup_synthesizer(self.llm_config)
 
-    def __has_extra_config(self):
+        # setting llm
+        if self.llm_config is not None:
+            logger.info(f"LLM CONFIG IS NOT NONE {self.task_config['task_type']} llm agent config {self.llm_agent_config}")
+            llm = self.__setup_llm(self.llm_config)
+            #Setup tasks
+            agent_params = {
+                'llm': llm,
+                'agent_type': self.llm_agent_config.get("agent_type","simple_llm_agent")
+            }                    
+            self.__setup_tasks(**agent_params)
+
+        elif self.__is_multiagent():
+            # Setup task for multiagent conversation
+            for agent in self.task_config["tools_config"]["llm_agent"]['extra_config']['agent_map']:
+                if 'routes' in  self.llm_config_map[agent]:
+                    del self.llm_config_map[agent]['routes'] #Remove routes from here as it'll create conflict ahead
+                llm = self.__setup_llm(self.llm_config_map[agent])
+                agent_type = self.llm_config_map[agent].get("agent_type","simple_llm_agent")
+                logger.info(f"Getting response for {llm} and agent type {agent_type} and {agent}")
+                agent_params = {
+                    'llm': llm,
+                    'agent_type': agent_type
+                }
+                if agent_type == "openai_assistant":
+                    agent_params['assistant_config'] = self.llm_config_map[agent]
+                llm_agent = self.__setup_tasks(**agent_params)
+                self.llm_agent_map[agent] = llm_agent
+        elif self.__is_openai_assistant():
+            # if self.task_config['tools_config']["llm_agent"].get("agent_type", None) is None:
+            #     assistant_config = {"assistant_id": self.task_config['tools_config']["llm_agent"]['assistant_id']}
+            self.__setup_tasks(agent_type = "openai_assistant", assistant_config= task['tools_config']["llm_agent"]['extra_config'])
+
+    def __is_openai_assistant(self):
         if self.task_config["task_type"] == "webhook":
             return False
         extra_config = self.task_config['tools_config']["llm_agent"].get("extra_config", None)
         return False if extra_config is None else True
 
+    def __is_knowledgebase_agent(self):
+        if self.task_config["task_type"] == "webhook":
+            return False
+        agent_type = self.task_config['tools_config']["llm_agent"].get("agent_type", None)
+        return agent_type == "knowledgebase_agent"
+
+    def __is_knowledgebase_agent(self):
+        if self.task_config["task_type"] == "webhook":
+            return False
+        agent_type = self.task_config['tools_config']["llm_agent"].get("agent_type", None)
+        return agent_type == "knowledgebase_agent"
+
     def __setup_routes(self, routes):
         embedding_model = routes.get("embedding_model", os.getenv("ROUTE_EMBEDDING_MODEL"))
         self.route_encoder = FastEmbedEncoder(name=embedding_model)
@@ -468,19 +558,35 @@ def __setup_llm(self, llm_config):
             else:
                 raise Exception(f'LLM {self.task_config["tools_config"]["llm_agent"]["provider"]} not supported')
 
-    def __setup_tasks(self, llm):
-        if self.task_config["task_type"] == "conversation":
-            agent_type = self.task_config["tools_config"]["llm_agent"].get("agent_type", self.task_config["tools_config"]["llm_agent"]["agent_flow_type"])
-            if agent_type == "streaming":
-                self.tools["llm_agent"] = StreamingContextualAgent(llm)
-            elif agent_type == "openai_assistant":
-                logger.info("setting up backend as openai_assistants")
-                self.tools["llm_agent"] = OpenAIAssistantAgent(llm)
-            elif agent_type in ("preprocessed", "formulaic"):
-                preprocessed = self.task_config["tools_config"]["llm_agent"]["agent_flow_type"] == "preprocessed"
-                logger.info(f"LLM TYPE {type(llm)}")
-                self.tools["llm_agent"] = GraphBasedConversationAgent(llm, context_data=self.context_data,
-                                                                      prompts=self.prompts, preprocessed=preprocessed)
+    def __get_agent_object(self, llm, agent_type, assistant_config = None ):
+        if agent_type == "simple_llm_agent":
+            logger.info(f"Simple llm agent")
+            llm_agent = StreamingContextualAgent(llm)
+        elif agent_type == "openai_assistant":
+            logger.info(f"setting up backend as openai_assistants {assistant_config}")
+            llm_agent = OpenAIAssistantAgent(**assistant_config)
+        elif agent_type == "knowledgebase_agent":
+            logger.info("#### Setting up knowledgebase_agent agent ####")
+            extra_config = self.task_config["tools_config"]["llm_agent"].get("extra_config", {})
+            vector_store_config = extra_config.get("vector_store", {})
+            llm_agent = LlamaIndexRag(
+                vector_id=vector_store_config.get("vector_id"),
+                temperature=extra_config.get("temperature", 0.1),
+                model=extra_config.get("model", "gpt-3.5-turbo-16k"),
+                buffer=40,
+                max_tokens=100,  # You might want to make this configurable
+                provider_config=vector_store_config
+            )
+            logger.info("Llama-index rag agent is created")
+        else:
+           raise(f"{agent_type} Agent type is not created yet")
+        return llm_agent
+
+    def __setup_tasks(self, llm = None, agent_type = None, assistant_config= None):
+        if self.task_config["task_type"] == "conversation" and not self.__is_multiagent():
+            self.tools["llm_agent"] = self.__get_agent_object(llm, agent_type, assistant_config)
+        elif self.__is_multiagent():
+            return self.__get_agent_object(llm, agent_type, assistant_config)
         elif self.task_config["task_type"] == "extraction":
             logger.info("Setting up extraction agent")
             self.tools["llm_agent"] = ExtractionContextualAgent(llm, prompt=self.system_prompt)
@@ -507,7 +613,8 @@ def __setup_tasks(self, llm):
     ########################
     async def load_prompt(self, assistant_name, task_id, local, **kwargs):
         logger.info("prompt and config setup started")
-        if self.task_config["task_type"] == "webhook" or self.task_config["tools_config"]["llm_agent"]["agent_flow_type"] == "openai_assistant":
+        agent_type = self.task_config["tools_config"]["llm_agent"].get("agent_type", "simple_llm_agent")
+        if self.task_config["task_type"] == "webhook" or agent_type in ["openai_assistant", "knowledgebase_agent"]:
             return
         self.is_local = local
         today = datetime.now().strftime("%A, %B %d, %Y")
@@ -891,7 +998,7 @@ def __store_into_history(self, meta_info, messages, llm_response, should_trigger
             logger.info("##### User spoke while LLM was generating response")
         else:
             self.llm_response_generated = True
-            convert_to_request_log(message=llm_response, meta_info= meta_info, component="llm", direction="response", model=self.task_config["tools_config"]["llm_agent"]["model"], run_id= self.run_id)
+            convert_to_request_log(message=llm_response, meta_info= meta_info, component="llm", direction="response", model=self.llm_config['model'], run_id= self.run_id)
             if should_trigger_function_call:
                 #Now, we need to consider 2 things here
                 #1. There was silence between function call and now
@@ -996,8 +1103,8 @@ async def _process_conversation_task(self, message, sequence, meta_info):
                 logger.info(f"Route {route} has a vector cache")
                 relevant_utterance = self.vector_caches[route].get(message['data'])
                 cache_response = self.route_responses_dict[route][relevant_utterance]
-                convert_to_request_log(message = message['data'], meta_info= meta_info, component="llm", direction="request", model=self.task_config["tools_config"]["llm_agent"]["model"], run_id= self.run_id)
-                convert_to_request_log(message = message['data'], meta_info= meta_info, component="llm", direction="response", model=self.task_config["tools_config"]["llm_agent"]["model"], is_cached= True, run_id= self.run_id)
+                convert_to_request_log(message = message['data'], meta_info= meta_info, component="llm", direction="request", model=self.llm_config["model"], run_id= self.run_id)
+                convert_to_request_log(message = message['data'], meta_info= meta_info, component="llm", direction="response", model=self.llm_config["model"], is_cached= True, run_id= self.run_id)
                 messages = copy.deepcopy(self.history)
                 messages += [{'role': 'user', 'content': message['data']},{'role': 'assistant', 'content': cache_response}]
                 self.interim_history = copy.deepcopy(messages)
@@ -1032,7 +1139,7 @@ async def _process_conversation_task(self, message, sequence, meta_info):
                         {'role': 'system', 'content': self.check_for_completion_prompt},
                         {'role': 'user', 'content': format_messages(self.history, use_system_prompt= True)}]
                 logger.info(f"##### Answer from the LLM {answer}")
-                convert_to_request_log(message=format_messages(prompt, use_system_prompt= True), meta_info= meta_info, component="llm", direction="request", model=self.task_config["tools_config"]["llm_agent"]["model"], run_id= self.run_id)
+                convert_to_request_log(message=format_messages(prompt, use_system_prompt= True), meta_info= meta_info, component="llm", direction="request", model=self.llm_agent_config["model"], run_id= self.run_id)
                 convert_to_request_log(message=answer, meta_info= meta_info, component="llm", direction="response", model= self.check_for_completion_llm, run_id= self.run_id)
 
                 if should_hangup: