Skip to content

Commit 2e81525

Browse files
authored
Merge pull request #268 from docqai/build/update-streamlit-package
refactor: update streamlit version plus improvements to API need by Chrome Ext
2 parents 104782d + 79acfce commit 2e81525

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+2653
-2206
lines changed

poetry.lock

Lines changed: 1704 additions & 1798 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "docq"
3-
version = "0.12.1"
3+
version = "0.13.1"
44
description = "Docq.AI - Your private ChatGPT alternative. Securely unlock knowledge from confidential documents."
55
authors = ["Docq.AI Team <[email protected]>"]
66
maintainers = ["Docq.AI Team <[email protected]>"]
@@ -13,16 +13,16 @@ packages = [{ include = "docq", from = "source" }]
1313

1414
[tool.poetry.dependencies]
1515
python = ">=3.10,<3.12"
16-
streamlit = "1.30.0"
17-
st-pages = "^0.4.1"
16+
streamlit = "^1.38.0"
17+
st-pages = "^1.0.0"
1818
pypdf = "^4.1.0"
1919
docx2txt = "^0.8"
2020
argon2-cffi = "^21.3.0"
2121
azure-core = "^1.27.1"
2222
opendal = "^0.41.0"
2323
transformers = "^4.38.2"
2424
optimum = {extras = ["exporters"], version = "^1.17.1"}
25-
torch = "^2.0.0, !=2.0.1, !=2.1.0"
25+
torch = "2.2.0"
2626
cryptography = "^42.0.4"
2727
span-marker = "^1.3.0"
2828
honeycomb-opentelemetry = "^0.2.3b0"
@@ -47,7 +47,7 @@ google-auth-oauthlib = "^1.1.0"
4747
google-api-python-client = "^2.104.0"
4848
google-auth-httplib2 = "^0.1.1"
4949
microsoftgraph-python = "^1.1.6"
50-
pydantic = "^2.5.2"
50+
pydantic = "2.8.2"
5151
mkdocs-material = "^9.5.13"
5252
pyautogen = "^0.2.2"
5353
termcolor = "^2.4.0"
@@ -57,14 +57,14 @@ semantic-kernel = "0.4.3.dev0"
5757
imap-tools = "^1.5.0"
5858
llama-index-llms-litellm = "^0.1.3"
5959
llama-index-embeddings-azure-openai = "^0.1.6"
60-
jwt = "^1.3.1"
6160
llama-index-embeddings-huggingface-optimum = "^0.1.5"
62-
llama-index-core = "^0.10.39"
6361
llama-index-readers-file = "^0.1.12"
6462
slack-bolt = "^1.18.1"
6563
llama-index-retrievers-bm25 = "^0.1.3"
6664
sentence-transformers = "^2.6.1"
6765
llama-index-postprocessor-colbert-rerank = "^0.1.2"
66+
jwt = "^1.3.1"
67+
llama-index-core = "0.10.39"
6868

6969
[tool.poetry.group.dev.dependencies]
7070
pre-commit = "^2.18.1"

source/docq/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@
2828
class SpaceType(Enum):
2929
"""Space types. These reflect scope of data access."""
3030

31-
PERSONAL = "personal"
31+
PERSONAL = "personal" # DEPRECATED. Personal spaces are now shared spaces in the users personal org.
3232
SHARED = "shared"
33-
PUBLIC = "public"
34-
THREAD = "thread"
33+
PUBLIC = "public" # public spaces are accessible to all users and anonymous users such as via widgets for chat bots
34+
THREAD = "thread" # a space that belongs to a thread used for adhoc uploads.
3535

3636

3737
class SystemFeatureType(Enum):

source/docq/manage_assistants.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from llama_index.core.base.llms.types import ChatMessage, MessageRole
99
from llama_index.core.prompts import ChatPromptTemplate
1010

11-
from .domain import Assistant, AssistantType
12-
from .support.store import (
11+
from docq.domain import Assistant, AssistantType
12+
from docq.support.store import (
1313
get_sqlite_global_system_file,
1414
get_sqlite_org_system_file,
1515
)
@@ -126,7 +126,7 @@
126126
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
127127
)
128128
"""
129-
129+
# id, name, type, archived, system_prompt_template, user_prompt_template, llm_settings_collection_key, created_at, updated_at, scoped_id
130130
ASSISTANT = tuple[int, str, str, bool, str, str, str, datetime, datetime, str]
131131

132132

@@ -196,10 +196,17 @@ def get_assistant_fixed(
196196
return result
197197

198198

199-
def get_assistant_or_default(assistant_scoped_id: Optional[int] = None, org_id: Optional[int] = None) -> Assistant:
200-
"""Get the persona."""
199+
def get_assistant_or_default(assistant_scoped_id: Optional[str] = None, org_id: Optional[int] = None) -> Assistant:
200+
"""Get the persona.
201+
202+
Args:
203+
assistant_scoped_id (Optional[int]): The assistant scoped ID. A composite ID <scope>_<id>.
204+
scope is either 'org' or 'global'. id from the respective table.
205+
org_id (Optional[int]): The org ID.
206+
207+
"""
201208
if assistant_scoped_id:
202-
assistant_data = get_assistant(assistant_scoped_id=str(assistant_scoped_id), org_id=org_id)
209+
assistant_data = get_assistant(assistant_scoped_id=assistant_scoped_id, org_id=org_id)
203210
return Assistant(
204211
key=str(assistant_data[0]),
205212
name=assistant_data[1],
@@ -209,7 +216,11 @@ def get_assistant_or_default(assistant_scoped_id: Optional[int] = None, org_id:
209216
)
210217
else:
211218
key = "default"
212-
return Assistant(key=key, **SIMPLE_CHAT_PERSONAS[key])
219+
return Assistant(
220+
key=key,
221+
llm_settings_collection_key="azure_openai_with_local_embedding",
222+
**SIMPLE_CHAT_PERSONAS[key],
223+
)
213224

214225

215226
def list_assistants(org_id: Optional[int] = None, assistant_type: Optional[AssistantType] = None) -> list[ASSISTANT]:
@@ -258,6 +269,7 @@ def get_assistant(assistant_scoped_id: str, org_id: Optional[int]) -> ASSISTANT:
258269
if scope == "org" and org_id:
259270
path = __get_assistants_sqlite_file(org_id=org_id)
260271
else:
272+
# global scope
261273
path = __get_assistants_sqlite_file(org_id=None)
262274

263275
with closing(sqlite3.connect(path, detect_types=sqlite3.PARSE_DECLTYPES)) as connection, closing(
@@ -271,10 +283,10 @@ def get_assistant(assistant_scoped_id: str, org_id: Optional[int]) -> ASSISTANT:
271283
if row is None:
272284
if org_id and scope == "org":
273285
raise ValueError(
274-
f"No Persona with: id = '{assistant_scoped_id}' that belongs to org org_id= '{org_id}', scope= '{scope}'"
286+
f"No Assistant with: id = '{id_}' that belongs to org org_id= '{org_id}', scope= '{scope}'"
275287
)
276288
else:
277-
raise ValueError(f"No Persona with: id = '{assistant_scoped_id}' in global scope. scope= '{scope}'")
289+
raise ValueError(f"No Assistant with: id = '{id_}' in global scope. scope= '{scope}'")
278290
return (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], assistant_scoped_id)
279291

280292

source/docq/manage_spaces.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,13 @@
1111
from opentelemetry import trace
1212

1313
import docq
14-
15-
from .access_control.main import SpaceAccessor, SpaceAccessType
16-
from .config import SpaceType
17-
from .data_source.list import SpaceDataSources
18-
from .domain import DocumentListItem, SpaceKey
19-
from .manage_indices import _create_vector_index, _persist_index
20-
from .model_selection.main import get_saved_model_settings_collection
21-
from .support.store import get_sqlite_shared_system_file
14+
from docq.access_control.main import SpaceAccessor, SpaceAccessType
15+
from docq.config import SpaceType
16+
from docq.data_source.list import SpaceDataSources
17+
from docq.domain import DocumentListItem, SpaceKey
18+
from docq.manage_indices import _create_vector_index, _persist_index
19+
from docq.model_selection.main import get_saved_model_settings_collection
20+
from docq.support.store import get_sqlite_shared_system_file
2221

2322
tracer = trace.get_tracer(__name__, docq.__version_str__)
2423

@@ -151,6 +150,7 @@ def list_space(org_id: int, space_type: Optional[str] = None) -> list[SPACE]:
151150
)
152151

153152
rows = cursor.fetchall()
153+
print("spaces:", rows)
154154
return [_format_space(row) for row in rows]
155155

156156

@@ -354,8 +354,7 @@ def get_thread_space(org_id: int, thread_id: int) -> SpaceKey | None:
354354
with closing(
355355
sqlite3.connect(get_sqlite_shared_system_file(), detect_types=sqlite3.PARSE_DECLTYPES)
356356
) as connection, closing(connection.cursor()) as cursor:
357-
358-
name = f"Thread-{thread_id} %"
357+
name = f"Thread-{thread_id} %" # FIXME: urg this is nasty.
359358
cursor.execute(
360359
"SELECT id FROM spaces WHERE org_id = ? AND name LIKE ? AND space_type = ?",
361360
(org_id, name, SpaceType.THREAD.name),

source/docq/model_selection/main.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,19 @@
1313
from typing import Any, Dict, Mapping, Optional
1414

1515
import docq
16+
from docq.config import (
17+
ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE1,
18+
ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE2,
19+
ENV_VAR_DOCQ_AZURE_OPENAI_API_KEY1,
20+
ENV_VAR_DOCQ_AZURE_OPENAI_API_KEY2,
21+
ENV_VAR_DOCQ_AZURE_OPENAI_API_VERSION,
22+
ENV_VAR_DOCQ_GROQ_API_KEY,
23+
EXPERIMENTS,
24+
OrganisationSettingsKey,
25+
)
26+
from docq.manage_settings import get_organisation_settings
27+
from docq.support.llama_index.callbackhandlers import OtelCallbackHandler
28+
from docq.support.store import get_models_dir
1629
from llama_index.core.callbacks.base import CallbackManager
1730
from llama_index.core.embeddings import BaseEmbedding
1831
from llama_index.core.llms import LLM
@@ -25,20 +38,6 @@
2538
from opentelemetry import trace
2639
from vertexai.preview.generative_models import HarmBlockThreshold, HarmCategory
2740

28-
from ..config import (
29-
ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE1,
30-
ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE2,
31-
ENV_VAR_DOCQ_AZURE_OPENAI_API_KEY1,
32-
ENV_VAR_DOCQ_AZURE_OPENAI_API_KEY2,
33-
ENV_VAR_DOCQ_AZURE_OPENAI_API_VERSION,
34-
ENV_VAR_DOCQ_GROQ_API_KEY,
35-
EXPERIMENTS,
36-
OrganisationSettingsKey,
37-
)
38-
from ..manage_settings import get_organisation_settings
39-
from ..support.llama_index.callbackhandlers import OtelCallbackHandler
40-
from ..support.store import get_models_dir
41-
4241
tracer = trace.get_tracer(__name__, docq.__version_str__)
4342

4443

@@ -165,9 +164,10 @@ class LlmUsageSettingsCollection:
165164
provider=ModelProvider.AZURE_OPENAI,
166165
model_name="gpt-4o",
167166
model_deployment_name="gpt-4o-2024-05-13",
168-
api_base=os.getenv(ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE2) or "",
169-
api_key=os.getenv(ENV_VAR_DOCQ_AZURE_OPENAI_API_KEY2) or "",
170-
api_version=os.environ.get(ENV_VAR_DOCQ_AZURE_OPENAI_API_VERSION, "2023-05-15"),
167+
api_base=os.getenv(ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE2) or "base url missing",
168+
api_key=os.getenv(ENV_VAR_DOCQ_AZURE_OPENAI_API_KEY2) or "api key missing",
169+
# api_version=os.environ.get(ENV_VAR_DOCQ_AZURE_OPENAI_API_VERSION, "2023-05-15"),
170+
api_version=os.environ.get(ENV_VAR_DOCQ_AZURE_OPENAI_API_VERSION, "2024-07-01-preview"),
171171
license_="Commercial",
172172
),
173173
"azure-openai-ada-002": LlmServiceInstanceConfig(
@@ -282,7 +282,7 @@ class LlmUsageSettingsCollection:
282282
ModelCapability.CHAT: LlmUsageSettings(
283283
model_capability=ModelCapability.CHAT,
284284
temperature=0.7,
285-
service_instance_config=LLM_SERVICE_INSTANCES["azure-openai-gpt35turbo"],
285+
service_instance_config=LLM_SERVICE_INSTANCES["azure-openai-gpt4o-2024-05-13"],
286286
),
287287
ModelCapability.EMBEDDING: LlmUsageSettings(
288288
model_capability=ModelCapability.EMBEDDING,
@@ -412,6 +412,9 @@ class LlmUsageSettingsCollection:
412412
def get_model_settings_collection(model_settings_collection_key: str) -> LlmUsageSettingsCollection:
413413
"""Get the settings for the model."""
414414
try:
415+
x = os.getenv(ENV_VAR_DOCQ_AZURE_OPENAI_API_BASE2)
416+
if not x:
417+
raise ValueError("Azure OpenAI API base 2 is missing")
415418
return LLM_MODEL_COLLECTIONS[model_settings_collection_key]
416419
except KeyError as e:
417420
log.error(

source/docq/run_queries.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,28 @@
22

33
import logging as log
44
import sqlite3
5-
from concurrent.futures import thread
65
from contextlib import closing
76
from datetime import datetime
87
from typing import Literal, Optional
98

109
from llama_index.core.llms import ChatMessage, MessageRole
11-
from numpy import int32
1210

11+
from docq.config import OrganisationFeatureType
12+
from docq.domain import FeatureKey, SpaceKey
13+
from docq.manage_assistants import Assistant
14+
from docq.manage_documents import format_document_sources
1315
from docq.model_selection.main import LlmUsageSettingsCollection
14-
15-
from .config import OrganisationFeatureType
16-
from .domain import FeatureKey, SpaceKey
17-
from .manage_assistants import Assistant
18-
from .manage_documents import format_document_sources
19-
from .support.llm import query_error, run_ask, run_chat
20-
from .support.store import (
16+
from docq.support.llm import query_error, run_ask, run_chat
17+
from docq.support.store import (
2118
get_history_table_name,
2219
get_history_thread_table_name,
2320
get_public_sqlite_usage_file,
2421
get_sqlite_usage_file,
2522
)
2623

24+
# TODO: add thread_space_id to hold the space that's hard attached to a thread for adhoc uploads
25+
# add space_ids dict / array to loosely persist space ids that are selected by a user.
26+
# add assistant_scoped_id to hold the assistant that's attached to the thread.
2727
SQL_CREATE_THREAD_TABLE = """
2828
CREATE TABLE IF NOT EXISTS {table} (
2929
id INTEGER PRIMARY KEY,
@@ -32,6 +32,7 @@
3232
)
3333
"""
3434

35+
3536
SQL_CREATE_MESSAGE_TABLE = """
3637
CREATE TABLE IF NOT EXISTS {table} (
3738
id INTEGER PRIMARY KEY,
@@ -52,6 +53,7 @@
5253

5354

5455
def _save_messages(data: list[tuple[str, bool, datetime, int]], feature: FeatureKey) -> list:
56+
"""feature.id_ needs to be the user_id."""
5557
rows = []
5658
tablename = get_history_table_name(feature.type_)
5759
thread_tablename = get_history_thread_table_name(feature.type_)
@@ -124,7 +126,7 @@ def _retrieve_messages(
124126

125127

126128
def list_thread_history(feature: FeatureKey, id_: Optional[int] = None) -> list[tuple[int, str, int]]:
127-
"""List the history of threads."""
129+
"""List threads or a thread if id_ is provided."""
128130
tablename = get_history_thread_table_name(feature.type_)
129131
rows = None
130132
with closing(
@@ -206,7 +208,7 @@ def get_history_as_chat_messages(
206208
return history_chat_message
207209

208210

209-
def create_history_thread(topic: str, feature: FeatureKey) -> int:
211+
def create_history_thread(topic: str, feature: FeatureKey) -> int | None:
210212
"""Create a new thread for the history i.e a new chat session."""
211213
tablename = get_history_thread_table_name(feature.type_)
212214
with closing(
@@ -225,6 +227,34 @@ def create_history_thread(topic: str, feature: FeatureKey) -> int:
225227

226228
return id_
227229

230+
def delete_thread(thread_id: int, feature: FeatureKey) -> bool:
231+
"""Delete a thread and its associated messages.
232+
233+
feature.id_ needs to be the user_id.
234+
"""
235+
thread_tablename = get_history_thread_table_name(feature.type_)
236+
message_tablename = get_history_table_name(feature.type_)
237+
usage_file = (
238+
get_sqlite_usage_file(feature.id_)
239+
if feature.type_ != OrganisationFeatureType.ASK_PUBLIC
240+
else get_public_sqlite_usage_file(str(feature.id_))
241+
)
242+
is_deleted = False
243+
with closing(sqlite3.connect(usage_file, detect_types=sqlite3.PARSE_DECLTYPES)) as connection, closing(
244+
connection.cursor()
245+
) as cursor:
246+
cursor.execute("PRAGMA foreign_keys = ON;")
247+
try:
248+
cursor.execute(f"DELETE FROM {message_tablename} WHERE thread_id = ?", (thread_id,)) # noqa: S608
249+
cursor.execute(f"DELETE FROM {thread_tablename} WHERE id = ?", (thread_id,)) # noqa: S608
250+
connection.commit()
251+
is_deleted = True
252+
except sqlite3.Error as e:
253+
connection.rollback()
254+
# raise e
255+
is_deleted = False
256+
return is_deleted
257+
228258

229259
def get_latest_thread(feature: FeatureKey) -> tuple[int, str, int] | None:
230260
"""Retrieve the most recently created thread.

source/docq/support/auth_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
from typing import Dict, Optional
1010

1111
import docq
12+
import streamlit as st
1213
from cachetools import TTLCache
1314
from cryptography.fernet import Fernet
1415
from opentelemetry import trace
1516
from streamlit.components.v1 import html
16-
from streamlit.web.server.websocket_headers import _get_websocket_headers
1717

1818
from ..config import ENV_VAR_DOCQ_COOKIE_HMAC_SECRET_KEY, SESSION_COOKIE_NAME
1919

@@ -80,7 +80,7 @@ def _clear_cookie(cookie_name: str) -> None:
8080
def _get_cookies() -> Optional[Dict[str, str]]:
8181
"""Return client cookies."""
8282
try:
83-
headers = _get_websocket_headers()
83+
headers = st.context.headers # _get_websocket_headers()
8484
if headers is None:
8585
return None
8686
cookie_str = str(headers.get("Cookie"))

0 commit comments

Comments
 (0)