Skip to content

Commit

Permalink
Merge pull request #6 from sbslee/0.7.0-dev
Browse files Browse the repository at this point in the history
0.7.0 dev
  • Loading branch information
sbslee committed Jun 19, 2023
2 parents 4cf1a86 + d608475 commit 40e027f
Show file tree
Hide file tree
Showing 10 changed files with 371 additions and 211 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# CHANGELOG

## 0.7.0 (2023-06-19)
* Implement token counter and price monitor in chat window.
* Enable the Enter key for sending messages in chat window.
* Enable automatic chat session resizing based on window size.
* Enable users to download chat session as a text file.

## 0.6.0 (2023-06-12)
* Enable users to chat with .csv documents.
* Enable users to customize chat settings (e.g. font size and background color).
Expand Down
12 changes: 4 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ There are currently two chatbots available in KANU:

Other features of KANU inclde:

- Customize chat settings (e.g. font size and background color)
- Customize chatbot parameters (e.g. prompt, temperature, and chunk size) by directly using the GUI or uploading a configuration file
- Customize chat settings (e.g. font size and background color)
- Display token counter and price monitor in chat window

## Installation

Expand Down Expand Up @@ -49,18 +50,13 @@ openai # Required.

![Alt Text](https://raw.githubusercontent.com/sbslee/kanu/main/images/docgpt.gif)

The following document formats are supported by DocGPT:

- .txt
- .pdf
- .doc and .docx
- .csv
DocGPT currently supports the following document formats: `.csv`, `.doc`, `.docx`, `.pdf`, and `.txt`.

The following packages are required to run DocGPT:

```
langchain # Required.
chromadb # Required.
chromadb # Required.
tiktoken # Required.
pdfminer.six # Optional. Only required for .pdf documents.
unstructured # Optional. Only required for .doc and .docx documents.
Expand Down
Binary file modified images/chatgpt.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/docgpt.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 22 additions & 12 deletions kanu/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import configparser
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
import importlib.util

from .version import __version__
from .utils import Tooltip
from .gui import Tooltip

GPT_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"]
CHATGPT_PROMPT = """You are a helpful assistant."""
DOCGPT_PROMPT = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Expand All @@ -19,7 +21,7 @@ def __init__(self, root):
self.container = None
self.root = root
self.root.title(f"KANU ({__version__})")
self.root.geometry("600x620")
self.root.geometry("700x620")
self.homepage()

def homepage(self):
Expand Down Expand Up @@ -51,13 +53,11 @@ def config_chatgpt(self):
b.grid(row=4, column=1)
m = tk.Message(self.container, width=300, text="Option 2. Configure manually")
m.grid(row=5, column=0, columnspan=2)
self.model = tk.StringVar(self.container, value="gpt-3.5-turbo")
l = tk.Label(self.container, text="Model:")
l.grid(row=6, column=0, columnspan=2)
b = tk.Radiobutton(self.container, variable=self.model, text="gpt-3.5-turbo", value="gpt-3.5-turbo")
b.grid(row=7, column=0)
b = tk.Radiobutton(self.container, variable=self.model, text="gpt-4", value="gpt-4")
b.grid(row=7, column=1)
self.model = tk.StringVar(self.container, value="gpt-3.5-turbo")
om = ttk.OptionMenu(self.container, self.model, *GPT_MODELS)
om.grid(row=7, column=0, columnspan=2)
l = tk.Label(self.container, text="System message ⓘ:")
Tooltip(l, "The system message helps set the behavior of the chatbot.")
l.grid(row=8, column=0, columnspan=2)
Expand Down Expand Up @@ -127,10 +127,8 @@ def config_docgpt(self):
self.model = tk.StringVar(self.container, value="gpt-3.5-turbo")
l = tk.Label(self.container, text="Model:")
l.grid(row=12, column=0, columnspan=2)
rb = tk.Radiobutton(self.container, variable=self.model, text="gpt-3.5-turbo", value="gpt-3.5-turbo")
rb.grid(row=13, column=0)
rb = tk.Radiobutton(self.container, variable=self.model, text="gpt-4", value="gpt-4")
rb.grid(row=13, column=1)
om = ttk.OptionMenu(self.container, self.model, *GPT_MODELS)
om.grid(row=13, column=0, columnspan=2)
l = tk.Label(self.container, text="System message ⓘ:")
Tooltip(l, "The system message helps set the behavior of the chatbot.")
l.grid(row=14, column=0, columnspan=2)
Expand Down Expand Up @@ -161,7 +159,18 @@ def parse_docgpt_config(self):
if not file_path:
return
config.read(file_path)
self.deploy_agent("DocGPT", config["USER"]["openai_key"], config["DEFAULT"]["model"], float(config["DEFAULT"]["temperature"]), config["DEFAULT"]["prompt"], config["DEFAULT"]["chunk_size"], config["DEFAULT"]["chunk_overlap"])
self.deploy_agent(
"DocGPT",
config["USER"]["openai_key"],
config["DEFAULT"]["model"],
float(config["DEFAULT"]["temperature"]),
config["DEFAULT"]["prompt"],
config["DEFAULT"]["chunk_size"],
config["DEFAULT"]["chunk_overlap"],
config["OPTIONAL"]["new_database_directory"],
config["OPTIONAL"]["document_directory"],
config["OPTIONAL"]["existing_database_directory"],
)

def template_docgpt_config(self):
file_path = filedialog.asksaveasfilename()
Expand All @@ -170,6 +179,7 @@ def template_docgpt_config(self):
config = configparser.ConfigParser()
config["DEFAULT"] = {"model": "gpt-3.5-turbo", "temperature": "0.5", "prompt": DOCGPT_PROMPT, "chunk_size": 1000, "chunk_overlap": 50}
config["USER"] = {"openai_key": ""}
config["OPTIONAL"] = {"new_database_directory": "", "document_directory": "", "existing_database_directory": ""}
with open(file_path, "w") as f:
config.write(f)

Expand Down
52 changes: 25 additions & 27 deletions kanu/chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import openai

from .utils import Settings
from .gui import Settings, Conversation
from .utils import tokens2price

class ChatGPT:
def __init__(self, kanu, openai_key, model, temperature, prompt):
Expand All @@ -12,44 +13,41 @@ def __init__(self, kanu, openai_key, model, temperature, prompt):
self.prompt = prompt
openai.api_key = openai_key
self.settings = Settings(self)
self.conversation = Conversation(self)
self.tokens = 0
self.price = 0

def run(self):
self.kanu.container.pack_forget()
self.kanu.container = tk.Frame(self.kanu.root)
self.kanu.container.pack()
l = tk.Label(self.kanu.container, text="ChatGPT")
l.grid(row=0, column=0, columnspan=4)
self.session = tk.Text(self.kanu.container, width=70, height=20)
self.session.grid(row=1, column=0, columnspan=4)
self.session.tag_config("user", **self.settings.get_user_kwargs())
self.session.tag_config("bot", **self.settings.get_bot_kwargs())
user_input = tk.Entry(self.kanu.container, width=54)
user_input.grid(row=2, column=0, columnspan=4)
self.messages = []
b = tk.Button(self.kanu.container, text="Send", command=lambda: self.send_message(user_input))
b.grid(row=3, column=0)
b = tk.Button(self.kanu.container, text="Clear", command=lambda: self.clear_session())
b.grid(row=3, column=1)
b = tk.Button(self.kanu.container, text="Go back", command=lambda: self.kanu.config_chatgpt())
b.grid(row=3, column=2)
b = tk.Button(self.kanu.container, text="Settings", command=lambda: self.settings.page())
b.grid(row=3, column=3)
self.conversation.page()

def send_message(self, entry):
def send_message(self):
if not self.messages:
self.messages.append({"role": "system", "content": self.prompt})
self.messages += [{"role": "user", "content": entry.get()}]
self.messages += [{"role": "user", "content": self.user_input.get()}]
bot_response = openai.ChatCompletion.create(
model=self.model,
messages=self.messages,
temperature=self.temperature,
)
response = bot_response["choices"][0]["message"]["content"]
self.messages += [{"role": "assistant", "content": response}]
self.session.insert(tk.END, "You: " + entry.get() + "\n", "user")
self.session.insert(tk.END, "You: " + self.user_input.get() + "\n", "user")
self.session.insert(tk.END, f"Bot: " + response + "\n", "bot")
entry.delete(0, tk.END)
usage = self.calculate_usage(bot_response)
self.system.insert(tk.END, f"{usage}\n", "system")
self.chatbox.delete(0, tk.END)

def calculate_usage(self, response):
total_tokens = response["usage"]["total_tokens"]
prompt_tokens = response["usage"]["prompt_tokens"]
completion_tokens = response["usage"]["completion_tokens"]
prompt_price = tokens2price(self.model, "prompt", prompt_tokens)
completion_price = tokens2price(self.model, "completion", completion_tokens)
self.price += prompt_price + completion_price
self.tokens += total_tokens
message = f"System: Used {prompt_tokens:,} prompt + {completion_tokens:,} completion = {total_tokens:,} tokens (total: {self.tokens:,} or ${self.price:.6f})."
return message

def clear_session(self):
self.session.delete(1.0, tk.END)
self.messages.clear()
self.tokens = self.price = 0
self.run()
107 changes: 70 additions & 37 deletions kanu/docgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

from langchain.callbacks import get_openai_callback
from langchain.document_loaders import (
TextLoader,
PDFMinerLoader,
UnstructuredWordDocumentLoader,
CSVLoader,
)

from .utils import Tooltip, Settings
from .gui import Tooltip, Settings, Conversation
from .utils import tokens2price, text2tokens

DOCUMENT_LOADERS = {
".txt": (TextLoader, {"encoding": "utf8"}),
Expand All @@ -28,7 +29,19 @@
}

class DocGPT:
def __init__(self, kanu, openai_key, model, temperature, prompt, default_chunk_size, default_chunk_overlap):
def __init__(
self,
kanu,
openai_key,
model,
temperature,
prompt,
default_chunk_size,
default_chunk_overlap,
new_database_directory="",
document_directory="",
existing_database_directory="",
):
self.kanu = kanu
self.model = model
self.temperature = temperature
Expand All @@ -37,11 +50,19 @@ def __init__(self, kanu, openai_key, model, temperature, prompt, default_chunk_s
self.default_chunk_overlap = default_chunk_overlap
os.environ["OPENAI_API_KEY"] = openai_key
self.settings = Settings(self)
self.conversation = Conversation(self)
self.tokens = 0
self.price = 0
self.new_database_directory = new_database_directory
self.document_directory = document_directory
self.existing_database_directory = existing_database_directory

def run(self):
self.kanu.container.pack_forget()
self.kanu.container = tk.Frame(self.kanu.root)
self.kanu.container.pack()
self.kanu.container.bind_all("<Return>", lambda event: self.send_message())
self.kanu.container.focus_set()
l = tk.Label(self.kanu.container, text="DocGPT")
l.grid(row=0, column=0, columnspan=3)
b = tk.Button(self.kanu.container, text="Go back", command=lambda: self.kanu.config_docgpt())
Expand Down Expand Up @@ -84,13 +105,22 @@ def run(self):
l = tk.Label(self.kanu.container, text="Database ⓘ:")
Tooltip(l, "Directory where the database is stored.")
l.grid(row=9, column=0)
self.old_database_label = tk.Label(self.kanu.container, text="Not selected", fg="red")
self.old_database_label.grid(row=9, column=1)
b = tk.Button(self.kanu.container, text="Browse", command=self.specify_old_database_directory)
self.existing_database_label = tk.Label(self.kanu.container, text="Not selected", fg="red")
self.existing_database_label.grid(row=9, column=1)
b = tk.Button(self.kanu.container, text="Browse", command=self.specify_existing_database_directory)
b.grid(row=9, column=2)
self.option2_button = tk.Button(self.kanu.container, text="Go with Option 2", command=self.go_with_option2)
self.option2_button.grid(row=10, column=0, columnspan=3)
self.option2_button["state"] = tk.DISABLED
if self.new_database_directory:
self.new_database_label.configure(text=os.path.basename(self.new_database_directory), fg="lime green")
if self.document_directory:
self.document_label.configure(text=os.path.basename(self.document_directory), fg="lime green")
if self.new_database_label["text"] != "Not selected" and self.document_label["text"] != "Not selected":
self.option1_button["state"] = tk.NORMAL
if self.existing_database_directory:
self.existing_database_label.configure(text=os.path.basename(self.existing_database_directory), fg="lime green")
self.option2_button["state"] = tk.NORMAL

def query(self):
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
Expand All @@ -102,33 +132,28 @@ def query(self):
chain_type="stuff",
combine_docs_chain_kwargs={"prompt": PromptTemplate(template=self.prompt, input_variables=["context", "question"])}
)
self.kanu.container.pack_forget()
self.kanu.container = tk.Frame(self.kanu.root)
self.kanu.container.pack()
l = tk.Label(self.kanu.container, text="DocGPT")
l.grid(row=0, column=0, columnspan=4)
self.session = tk.Text(self.kanu.container, width=70, height=20)
self.session.grid(row=1, column=0, columnspan=4)
self.session.tag_config("user", **self.settings.get_user_kwargs())
self.session.tag_config("bot", **self.settings.get_bot_kwargs())
user_input = tk.Entry(self.kanu.container, width=54)
user_input.grid(row=2, column=0, columnspan=4)
b = tk.Button(self.kanu.container, text="Send", command=lambda: self.send_message(user_input))
b.grid(row=3, column=0)
b = tk.Button(self.kanu.container, text="Clear", command=lambda: self.clear_session())
b.grid(row=3, column=1)
b = tk.Button(self.kanu.container, text="Go back", command=lambda: self.run())
b.grid(row=3, column=2)
b = tk.Button(self.kanu.container, text="Settings", command=lambda: self.settings.page())
b.grid(row=3, column=3)
self.conversation.page()

def send_message(self):
self.session.insert(tk.END, "You: " + self.user_input.get() + "\n", "user")
with get_openai_callback() as cb:
response = self.qa(self.user_input.get())
usage = self.calculate_usage(cb)
self.session.insert(tk.END, "Bot: " + response["answer"] + "\n", "bot")
self.system.insert(tk.END, f"{usage}\n", "system")
self.chatbox.delete(0, tk.END)

def send_message(self, entry):
self.session.insert(tk.END, "You: " + entry.get() + "\n", "user")
response = self.qa(entry.get())["answer"]
self.session.insert(tk.END, "Bot: " + response + "\n", "bot")
entry.delete(0, tk.END)
def calculate_usage(self, cb):
prompt_price = tokens2price(self.model, "prompt", cb.prompt_tokens)
completion_price = tokens2price(self.model, "completion", cb.completion_tokens)
self.price += prompt_price + completion_price
self.tokens += cb.total_tokens
message = f"System: Used {cb.prompt_tokens:,} prompt + {cb.completion_tokens:,} completion = {cb.total_tokens:,} tokens (total: {self.tokens:,} or ${self.price:.6f})."
return message

def go_with_option1(self):
self.database_directory = self.new_database_directory
self.tokens = self.price = 0
documents = []
for root, dirs, files in os.walk(self.document_directory):
for file in files:
Expand All @@ -142,13 +167,20 @@ def go_with_option1(self):
documents.extend(document)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size.get(), chunk_overlap=self.chunk_overlap.get())
texts = text_splitter.split_documents(documents)
db = Chroma.from_documents(texts, OpenAIEmbeddings(), persist_directory=self.database_directory)
for text in texts:
self.tokens += text2tokens("text-embedding-ada-002", text.page_content)
self.price = tokens2price("text-embedding-ada-002", "embedding", self.tokens)
db = Chroma.from_documents(texts, OpenAIEmbeddings(model="text-embedding-ada-002"), persist_directory=self.database_directory)
db.add_documents(texts)
db.persist()
db = None
self.existing = False
self.query()

def go_with_option2(self):
self.database_directory = self.existing_database_directory
self.tokens = self.price = 0
self.existing = True
self.query()

def specify_document_directory(self):
Expand All @@ -164,19 +196,20 @@ def specify_new_database_directory(self):
directory_path = filedialog.askdirectory()
if not directory_path:
return
self.database_directory = directory_path
self.new_database_directory = directory_path
self.new_database_label.configure(text=os.path.basename(directory_path), fg="lime green")
if self.document_label["text"] != "No file selected":
self.option1_button["state"] = tk.NORMAL

def specify_old_database_directory(self):
def specify_existing_database_directory(self):
directory_path = filedialog.askdirectory()
if not directory_path:
return
self.database_directory = directory_path
self.old_database_label.configure(text=os.path.basename(directory_path), fg="lime green")
self.existing_database_directory = directory_path
self.existing_database_label.configure(text=os.path.basename(directory_path), fg="lime green")
self.option2_button["state"] = tk.NORMAL

def clear_session(self):
self.session.delete(1.0, tk.END)

self.existing = True
self.tokens = self.price = 0
self.query()
Loading

0 comments on commit 40e027f

Please sign in to comment.