Slack improvements 3 (#258)

* chore: adjust copy * refactor(Chat Integrations): support setting channel Space Group setting to None * chore: refactor some print statements into log lines * refactor: persona -> assistant, refs in variable and args * test: fix
docqai · May 23, 2024 · 9e493cd · 9e493cd
1 parent 3931321
commit 9e493cd
Show file tree

Hide file tree

Showing 19 changed files with 86 additions and 75 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -199,6 +199,11 @@ cmd = "infisical run --env=dev -- opentelemetry-instrument --logs_exporter none
 args = [{ name = "port", default = 8501, type = "integer" }]
 env = { WATCHDOG_LOG_LEVEL = "ERROR", PYTHONPATH = "${PWD}/web/:${PWD}/source/:${PWD}/../docq-extensions/source/" }
 
+[tool.poe.tasks.run-simple]
+cmd = "infisical run --env=dev -- streamlit run web/index.py --server.port $port --browser.gatherUsageStats false --server.runOnSave true --server.fileWatcherType auto"
+args = [{ name = "port", default = 8501, type = "integer" }]
+
+
 [tool.poe.tasks.docker-build]
 cmd = """
 docker build 

diff --git a/source/docq/agents/main.py b/source/docq/agents/main.py
@@ -108,7 +108,7 @@ def run_agent(
     assistant_agent = AssistantAgent(
         name=assistant.name if assistant else "General Assistant 1",
         llm_config=generate_autogen_llm_config(chat_model_settings, kernel),
-        system_message=assistant.system_prompt_content if assistant else ASSISTANT_PERSONA,
+        system_message=assistant.system_message_content if assistant else ASSISTANT_PERSONA,
     )
 
     worker = UserProxyAgent(

diff --git a/source/docq/data_source/support/web_extracting.py b/source/docq/data_source/support/web_extracting.py
@@ -267,7 +267,7 @@ def load_data(
         span.set_attribute("source_page_type", source_page_type.__str__())
 
         # page_links = urls  # default case expect page urls to extract content from directly
-        print("source page type : ", source_page_type)
+        log.debug("source page type : ", source_page_type)
 
         if source_page_type == SourcePageType.index_page:
             # the provided URLs are index pages, extract links from them first
@@ -278,12 +278,12 @@ def load_data(
                 span.add_event("extracted_links_from_index_page", {"url": url, "links_count": len(lnk)})
         elif source_page_type == SourcePageType.page_list:
             page_links = urls
-            print("page list - links : ", page_links)
+            log.debug("page list - links : ", page_links)
         else:
             raise ValueError(f"Invalid source page type: {source_page_type}")
 
         span.set_attribute("page_links_count", len(page_links).__str__())
-        print("page links : ", page_links)
+        log.debug("page links : ", page_links)
 
         for page_link in page_links:
             try:
@@ -347,9 +347,5 @@ def _extract_links(url: str, extractor: BaseTextExtractor, include_filter: Optio
 
         soup = BeautifulSoup(page.content, "html.parser")
 
-        # print("page content: ", page.content)
-
-        # print("page text: ", page.text)
-
         page_links = extractor.extract_links(soup, url, url, include_filter=include_filter)
         return page_links
diff --git a/source/docq/data_source/web_scraper.py b/source/docq/data_source/web_scraper.py
@@ -56,7 +56,7 @@ def load(self: Self, space: SpaceKey, configs: dict) -> List[Document]:
             bs_web_reader = self._initiate_web_reader(space, configs)
 
             source_page_type_str = configs.get("source_page_type")
-            print("source_page_type: ", source_page_type_str)
+            log.debug("source_page_type: ", source_page_type_str)
             source_page_type = (
                 SourcePageType[source_page_type_str[0]] if source_page_type_str else SourcePageType.index_page
             )

diff --git a/source/docq/domain.py b/source/docq/domain.py
@@ -122,16 +122,18 @@ def create_instance(document_link: str, document_text: str, indexed_on: Optional
 
 @dataclass
 class Assistant:
-    """A assistant at it's core is a system prompt and user prompt template that tunes the LLM to take on a certain persona and behave a particular way."""
+    """A assistant at it's core is a system prompt and user prompt template that tunes the LLM to take on a certain persona and behave/respond a particular way."""
 
     key: str
     """Unique ID for a Persona instance"""
     name: str
     """Friendly name for the persona"""
-    system_prompt_content: str
+    system_message_content: str
+    """Content of the system message. This is where the persona is defined."""
     user_prompt_template_content: str
+    """Template for the user prompt aka query. This template is used to generate the content for the user prompt/query that will be sent to the LLM (as a user message)."""
     llm_settings_collection_key: str
-    """The key of the LLM settings collection to use for LLM calls by this assistant"""
+    """The key of the LLM settings collection to use for LLM calls by this assistant. """
 
 
 class AssistantType(Enum):

diff --git a/source/docq/integrations/slack/manage_slack.py b/source/docq/integrations/slack/manage_slack.py
@@ -12,12 +12,12 @@
 
 from .models import SlackChannel, SlackInstallation
 
-SQL_CREATE_DOCQ_SLACK_APPLICATIONS_TABLE = """
+SQL_CREATE_DOCQ_SLACK_APP_INSTALL_TABLE = """
 CREATE TABLE IF NOT EXISTS docq_slack_installations (
     id INTEGER PRIMARY KEY,
     app_id TEXT NOT NULL,
     team_id TEXT NOT NULL,
-    team_name TEXT NOT NULL, -- References a slack workspace
+    team_name TEXT NOT NULL, -- References a Slack workspace name
     org_id INTEGER NOT NULL,
     space_group_id INTEGER, -- TODO: Implement globally available content for the entire slack workspace
     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
@@ -33,20 +33,24 @@
     channel_id TEXT NOT NULL,
     channel_name TEXT NOT NULL,
     org_id INTEGER NOT NULL,
-    space_group_id INTEGER,
+    space_group_id INTEGER, -- associates knowledge with the channel
     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     FOREIGN KEY (org_id) REFERENCES orgs(id),
     FOREIGN KEY (space_group_id) REFERENCES space_groups(id),
     UNIQUE (channel_id, org_id)
 );
 """
 
+# adding persona per channel.
+# move channels table from shared to org scope
+# add persona_id column to the table
+# handle migration scripts
 
 
 def _init() -> None:
     """Initialize the Slack integration."""
     with closing(sqlite3.connect(get_sqlite_shared_system_file())) as connection:
-        connection.execute(SQL_CREATE_DOCQ_SLACK_APPLICATIONS_TABLE)
+        connection.execute(SQL_CREATE_DOCQ_SLACK_APP_INSTALL_TABLE)
         connection.execute(SQL_CREATE_DOCQ_SLACK_CHANNELS_TABLE)
         connection.commit()
 
@@ -120,6 +124,9 @@ def integration_exists(app_id: str, team_id: str, selected_org_id: int) -> bool:
         return cursor.fetchone() is not None
 
 
+# SLACK CHANNELS
+
+
 def insert_or_update_slack_channel(channel_id: str, channel_name: str, org_id: int) -> None:
     """Insert or update a channel."""
     with closing(sqlite3.connect(get_sqlite_shared_system_file())) as connection:

diff --git a/source/docq/integrations/slack/models.py b/source/docq/integrations/slack/models.py
@@ -1,10 +1,8 @@
 """Slack integration data models."""
 
-from concurrent.futures import thread
 from typing import Optional
 
 from attr import dataclass
-from sympy import N
 
 
 @dataclass

diff --git a/source/docq/manage_assistants.py b/source/docq/manage_assistants.py
@@ -1,4 +1,5 @@
 """prompt templates that represent a persona."""
+import logging as log
 import sqlite3
 from contextlib import closing
 from datetime import datetime
@@ -150,24 +151,24 @@ def _init(org_id: Optional[int] = None) -> None:
     __create_default_assistants_if_needed()
 
 
-def llama_index_chat_prompt_template_from_persona(
-    persona: Assistant, chat_history: Optional[List[ChatMessage]] = None
+def llama_index_chat_prompt_template_from_assistant(
+    assistant: Assistant, chat_history: Optional[List[ChatMessage]] = None
 ) -> ChatPromptTemplate:
     """Get the prompt template for llama index.
 
     Args:
-        persona (Assistant): Docq assistant.
+        assistant (Assistant): Docq assistant.
         chat_history (Optional[List[ChatMessage]]): A list of ChatMessages that will be inserted into the message stack of the LLM synth call. It will be inserted between the system message an the latest user query message.
     """
     messages = chat_history or []
 
     _system_prompt_message = ChatMessage(
-        content=persona.system_prompt_content,
+        content=assistant.system_message_content,
         role=MessageRole.SYSTEM,
     )
 
     _user_prompt_message = ChatMessage(
-        content=persona.user_prompt_template_content,
+        content=assistant.user_prompt_template_content,
         role=MessageRole.USER,
     )
 
@@ -204,7 +205,7 @@ def get_assistant_or_default(assistant_scoped_id: Optional[int] = None, org_id:
         return Assistant(
             key=str(assistant_data[0]),
             name=assistant_data[1],
-            system_prompt_content=assistant_data[4],
+            system_message_content=assistant_data[4],
             user_prompt_template_content=assistant_data[5],
             llm_settings_collection_key=assistant_data[6],
         )
@@ -382,7 +383,7 @@ def __create_default_assistants_if_needed() -> None:
         rows.reverse()
 
     names = [row[1] for row in rows]
-    print("names: ", names)
+    log.info("Available assistant names: ", names)
 
     if "General Q&A" not in names:
         chat_default = SIMPLE_CHAT_PERSONAS["default"]

diff --git a/source/docq/manage_spaces.py b/source/docq/manage_spaces.py
@@ -366,7 +366,7 @@ def create_thread_space(org_id: int, thread_id: int, summary: str, datasource_ty
     """Create a spcace for chat thread uploads."""
     rnd = str(random.randint(56450, 9999999999))
     name = f"Thread-{thread_id} {summary} {rnd}"
-    print(f"Creating thread space with name: '{name}'")
+    log.info("Creating thread space with name: '%s'", name)
     return create_space(
         org_id=org_id,
         name=name,
@@ -410,7 +410,7 @@ def thread_space_exists(thread_id: int) -> bool:
         row = cursor.fetchone()
         exists = row is not None
 
-    print(f"Thread space exists: {exists}")
+    log.debug("Thread space exists: %s", {exists})
     return exists
 
 

diff --git a/source/docq/run_queries.py b/source/docq/run_queries.py
@@ -243,7 +243,7 @@ def query(
     feature: FeatureKey,
     thread_id: int,
     model_settings_collection: LlmUsageSettingsCollection,
-    persona: Assistant,
+    assistant: Assistant,
     spaces: Optional[list[SpaceKey]] = None,
 ) -> list:
     """Run the query again documents in the space(s) using a LLM."""
@@ -263,9 +263,9 @@ def query(
     log.debug("is_chat: %s", is_chat)
     try:
         response = (
-            run_chat(input_, history_messages, model_settings_collection, persona)
+            run_chat(input_, history_messages, model_settings_collection, assistant)
             if is_chat
-            else run_ask(input_, history_messages, model_settings_collection, persona, spaces)
+            else run_ask(input_, history_messages, model_settings_collection, assistant, spaces)
         )
         log.debug("Response: %s", response)
 

diff --git a/source/docq/support/llm.py b/source/docq/support/llm.py
@@ -33,7 +33,7 @@
 
 from ..config import EXPERIMENTS
 from ..domain import SpaceKey
-from ..manage_assistants import Assistant, llama_index_chat_prompt_template_from_persona
+from ..manage_assistants import Assistant, llama_index_chat_prompt_template_from_assistant
 from ..model_selection.main import (
     LLM_MODEL_COLLECTIONS,
     LlmUsageSettingsCollection,
@@ -147,8 +147,8 @@ def _get_generation_model(model_settings_collection: LlmUsageSettingsCollection)
 
         model.max_retries = 3
 
-        print("model: ", model)
-        print("model_settings_collection: ", model_settings_collection)
+        log.info("model: ", model)
+        log.info("model_settings_collection: ", model_settings_collection)
 
         return model
 
@@ -314,18 +314,17 @@ def run_chat(
 ) -> AgentChatResponse:
     """Chat directly with a LLM with history."""
     ## chat engine handles tracking the history.
-    print("chat persona: ", assistant.system_prompt_content)
-    print("chat history: ", history)
+    log.debug("chat assistant: ", assistant.system_message_content)
 
     engine = SimpleChatEngine.from_defaults(
         service_context=_get_service_context(model_settings_collection),
         kwargs=model_settings_collection.model_usage_settings[ModelCapability.CHAT].additional_args,
-        system_prompt=assistant.system_prompt_content,
+        system_prompt=assistant.system_message_content,
         chat_history=history,
     )
     output = engine.chat(input_)
 
-    log.debug("(Chat) Q: %s, A: %s", input_, output)
+    # log.debug("(Chat) Q: %s, A: %s", input_, output)
     return output
 
 
@@ -334,7 +333,7 @@ def run_ask(
     input_: str,
     history: List[ChatMessage],
     model_settings_collection: LlmUsageSettingsCollection,
-    persona: Assistant,
+    assistant: Assistant,
     spaces: list[SpaceKey] | None = None,
 ) -> RESPONSE_TYPE | AGENT_CHAT_RESPONSE_TYPE:
     """Ask questions against existing index(es) with history."""
@@ -373,7 +372,7 @@ def run_ask(
                 continue
 
         try:
-            text_qa_template = llama_index_chat_prompt_template_from_persona(persona, history)
+            text_qa_template = llama_index_chat_prompt_template_from_assistant(assistant, history)
             span.add_event(name="prompt_created")
         except Exception as e:
             raise Error(f"Error: {e}") from e

diff --git a/tests/integration/backend_integration_test.py b/tests/integration/backend_integration_test.py
@@ -147,7 +147,7 @@ def test_chat_private_feature(features: dict[str, domain.FeatureKey], saved_mode
     persona = domain.Assistant(
         key="test-persona",
         name="Test Persona",
-        system_prompt_content=system_prompt,
+        system_message_content=system_prompt,
         user_prompt_template_content=user_prompt_template_content,
         llm_settings_collection_key=saved_model_settings.key,
     )
@@ -160,6 +160,6 @@ def test_chat_private_feature(features: dict[str, domain.FeatureKey], saved_mode
         features[config.OrganisationFeatureType.CHAT_PRIVATE.name],
         thread_id,
         model_settings_collection=saved_model_settings,
-        persona=persona,
+        assistant=persona,
     )
     assert "Test 1 from docq" in results[1][1], f"The query didn't return the expected response. Returned: '{results[1][1]}', expected: 'Test 1 from docq'"
diff --git a/tests/unit/docq/support/llm_test.py b/tests/unit/docq/support/llm_test.py
@@ -1,10 +1,11 @@
 """Tests for docq.support.llm."""
 from unittest.mock import Mock, patch
 
-from docq.manage_assistants import Assistant
+from docq.domain import Assistant
 from docq.model_selection.main import LlmUsageSettings, LlmUsageSettingsCollection, ModelCapability
 from llama_index.core import ServiceContext
 from llama_index.core.chat_engine import SimpleChatEngine
+from llama_index.core.llms import ChatMessage, MessageRole
 
 
 #@patch("docq.support.metadata_extractors.DEFAULT_MODEL_PATH")
@@ -26,10 +27,15 @@ def test_run_chat() -> None:
         mocked_model_usage_settings = Mock(LlmUsageSettings)
         mocked_model_usage_settings.additional_args = {"arg1": "value1", "arg2": "value2"}
         mocked_model_usage_settings_collection.model_usage_settings = {ModelCapability.CHAT: mocked_model_usage_settings}
-        mocked_persona = Mock(Assistant)
-        mocked_persona.system_prompt_content= "Some system prompt"
-        mocked_persona.user_prompt_template_content = "My user prompt template"
+        mocked_assistant = Mock(Assistant)
+        mocked_assistant.system_message_content = "Some system prompt"
+        mocked_assistant.user_prompt_template_content = "My user prompt template"
 
-        response = run_chat("My ask", "My chat history", mocked_model_usage_settings_collection, mocked_persona)
+        response = run_chat(
+            "My ask",
+            [ChatMessage(role=MessageRole.USER, content="My chat history")],
+            mocked_model_usage_settings_collection,
+            mocked_assistant,
+        )
         mocked_chat.assert_called_once_with("My ask")
         assert response == "LLM response"
diff --git a/web/admin/admin_integrations.py b/web/admin/admin_integrations.py
@@ -2,7 +2,7 @@
 
 import streamlit as st
 
-from web.utils.layout import render_integrations, render_slack_installation_button, tracer
+from web.utils.layout import render_integrations_slack, render_slack_installation_button, tracer
 
 
 @tracer.start_as_current_span("admin_integrations_page")
@@ -11,27 +11,27 @@ def admin_integrations_page() -> None:
     integrations = [
         {
             "name": "Slack",
-            "description": "Slack is a business communication platform that allows teams to communicate and collaborate.",
+            "description": "Slack the business communication platform that allows teams to communicate and collaborate.",
             "icon": "slack",
             "url": "/api/integration/slack/v1/install",
         },
         {
-            "name": "Teams",
-            "description": "Google Drive is a file storage and synchronization service developed by Google.",
+            "name": "MS Teams",
+            "description": "Mircosoft Teams the business communication platform that allows teams to communicate and collaborate.",
             "icon": "google-drive",
-            "url": "/api/integration/google-drive/v1/install",
+            "url": "/api/integration/msteams/v1/install",
         },
     ]
 
     integration = st.selectbox(
-        "Select an integration to get started",
+        "Select an integration",
         options=[integration["name"] for integration in integrations],
     )
 
     if integration == "Slack":
         render_slack_installation_button()
 
-        render_integrations()
+        render_integrations_slack()
 
     else:
         st.info("Coming soon!")