From 9997b7b9f2e3f86d6204a20aa6e7844ddfff004c Mon Sep 17 00:00:00 2001
From: Luca Foppiano <lfoppiano@users.noreply.github.com>
Date: Wed, 1 Nov 2023 16:40:20 +0900
Subject: [PATCH] fix env variables  (#9)

* avoid writing env variables of api keys
---
 README.md                         |  6 ++--
 document_qa/document_qa_engine.py |  1 +
 requirements.txt                  |  1 -
 streamlit_app.py                  | 49 ++++++++++++++++++-------------
 4 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index bb02ba7..b0d0d59 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ---
-title: 📝 Scientific Document Insight QA
-emoji: 
+title: Scientific Document Insights Q/A
+emoji: 📝
 colorFrom: yellow
 colorTo: pink
 sdk: streamlit
@@ -10,7 +10,7 @@ pinned: false
 license: apache-2.0
 ---
 
-# DocumentIQA: Scientific Document Insight QA
+# DocumentIQA: Scientific Document Insights Q/A
 
 **Work in progress** :construction_worker: 
 
diff --git a/document_qa/document_qa_engine.py b/document_qa/document_qa_engine.py
index 447e088..22ca1e5 100644
--- a/document_qa/document_qa_engine.py
+++ b/document_qa/document_qa_engine.py
@@ -205,6 +205,7 @@ def create_memory_embeddings(self, pdf_path, doc_id=None, chunk_size=500, perc_o
         if doc_id:
             hash = doc_id
         else:
+
             hash = metadata[0]['hash']
 
         if hash not in self.embeddings_dict.keys():
diff --git a/requirements.txt b/requirements.txt
index a8322f0..5943c56 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,6 @@ chromadb==0.4.15
 tiktoken==0.4.0
 openai==0.27.7
 langchain==0.0.314
-promptlayer==0.2.4
 typing-inspect==0.9.0
 typing_extensions==4.8.0
 pydantic==2.4.2
diff --git a/streamlit_app.py b/streamlit_app.py
index 3957f20..3b7c5f8 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -10,7 +10,7 @@
 dotenv.load_dotenv(override=True)
 
 import streamlit as st
-from langchain.chat_models import PromptLayerChatOpenAI
+from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
 
 from document_qa.document_qa_engine import DocumentQAEngine
@@ -52,7 +52,7 @@
     st.session_state['uploaded'] = False
 
 st.set_page_config(
-    page_title="Document Insights QA",
+    page_title="Scientific Document Insights Q/A",
     page_icon="📝",
     initial_sidebar_state="expanded",
     menu_items={
@@ -70,13 +70,21 @@ def new_file():
 
 
 # @st.cache_resource
-def init_qa(model):
+def init_qa(model, api_key=None):
     if model == 'chatgpt-3.5-turbo':
-        chat = PromptLayerChatOpenAI(model_name="gpt-3.5-turbo",
-                                     temperature=0,
-                                     return_pl_id=True,
-                                     pl_tags=["streamlit", "chatgpt"])
-        embeddings = OpenAIEmbeddings()
+        if api_key:
+            chat = ChatOpenAI(model_name="gpt-3.5-turbo",
+                              temperature=0,
+                              openai_api_key=api_key,
+                              frequency_penalty=0.1)
+            embeddings = OpenAIEmbeddings(openai_api_key=api_key)
+        else:
+            chat = ChatOpenAI(model_name="gpt-3.5-turbo",
+                              temperature=0,
+                              frequency_penalty=0.1)
+            embeddings = OpenAIEmbeddings()
+
+
     elif model == 'mistral-7b-instruct-v0.1':
         chat = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1",
                               model_kwargs={"temperature": 0.01, "max_length": 4096, "max_new_tokens": 2048})
@@ -162,12 +170,11 @@ def play_old_messages():
     st.markdown(
         ":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ")
 
-    if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
+    if model == 'mistral-7b-instruct-v0.1' and model not in st.session_state['api_keys']:
         if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
             api_key = st.text_input('Huggingface API Key', type="password")
 
-            st.markdown(
-                "Get it [here](https://huggingface.co/docs/hub/security-tokens)")
+            st.markdown("Get it [here](https://huggingface.co/docs/hub/security-tokens)")
         else:
             api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
 
@@ -176,33 +183,33 @@ def play_old_messages():
             if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
                 with st.spinner("Preparing environment"):
                     st.session_state['api_keys'][model] = api_key
-                    if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
-                        os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
+                    # if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
+                    #     os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
                     st.session_state['rqa'][model] = init_qa(model)
 
-    elif model == 'chatgpt-3.5-turbo':
+    elif model == 'chatgpt-3.5-turbo' and model not in st.session_state['api_keys']:
         if 'OPENAI_API_KEY' not in os.environ:
             api_key = st.text_input('OpenAI API Key', type="password")
-            st.markdown(
-                "Get it [here](https://platform.openai.com/account/api-keys)")
+            st.markdown("Get it [here](https://platform.openai.com/account/api-keys)")
         else:
             api_key = os.environ['OPENAI_API_KEY']
 
         if api_key:
-            # st.session_state['api_key'] = is_api_key_provided = True
             if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
                 with st.spinner("Preparing environment"):
                     st.session_state['api_keys'][model] = api_key
                     if 'OPENAI_API_KEY' not in os.environ:
-                        os.environ['OPENAI_API_KEY'] = api_key
-                    st.session_state['rqa'][model] = init_qa(model)
+                        st.session_state['rqa'][model] = init_qa(model, api_key)
+                    else:
+                        st.session_state['rqa'][model] = init_qa(model)
     # else:
     #     is_api_key_provided = st.session_state['api_key']
 
-st.title("📝 Scientific Document Insight QA")
+st.title("📝 Scientific Document Insights Q/A")
 st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
 
-st.markdown(":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
+st.markdown(
+    ":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
 
 uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
                                  disabled=st.session_state['model'] is not None and st.session_state['model'] not in