From 9997b7b9f2e3f86d6204a20aa6e7844ddfff004c Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 1 Nov 2023 16:40:20 +0900 Subject: [PATCH] fix env variables (#9) * avoid writing env variables of api keys --- README.md | 6 ++-- document_qa/document_qa_engine.py | 1 + requirements.txt | 1 - streamlit_app.py | 49 ++++++++++++++++++------------- 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index bb02ba7..b0d0d59 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ --- -title: 📝 Scientific Document Insight QA -emoji: +title: Scientific Document Insights Q/A +emoji: 📝 colorFrom: yellow colorTo: pink sdk: streamlit @@ -10,7 +10,7 @@ pinned: false license: apache-2.0 --- -# DocumentIQA: Scientific Document Insight QA +# DocumentIQA: Scientific Document Insights Q/A **Work in progress** :construction_worker: diff --git a/document_qa/document_qa_engine.py b/document_qa/document_qa_engine.py index 447e088..22ca1e5 100644 --- a/document_qa/document_qa_engine.py +++ b/document_qa/document_qa_engine.py @@ -205,6 +205,7 @@ def create_memory_embeddings(self, pdf_path, doc_id=None, chunk_size=500, perc_o if doc_id: hash = doc_id else: + hash = metadata[0]['hash'] if hash not in self.embeddings_dict.keys(): diff --git a/requirements.txt b/requirements.txt index a8322f0..5943c56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,6 @@ chromadb==0.4.15 tiktoken==0.4.0 openai==0.27.7 langchain==0.0.314 -promptlayer==0.2.4 typing-inspect==0.9.0 typing_extensions==4.8.0 pydantic==2.4.2 diff --git a/streamlit_app.py b/streamlit_app.py index 3957f20..3b7c5f8 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -10,7 +10,7 @@ dotenv.load_dotenv(override=True) import streamlit as st -from langchain.chat_models import PromptLayerChatOpenAI +from langchain.chat_models import ChatOpenAI from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings from document_qa.document_qa_engine import DocumentQAEngine @@ -52,7 +52,7 @@ st.session_state['uploaded'] = False st.set_page_config( - page_title="Document Insights QA", + page_title="Scientific Document Insights Q/A", page_icon="📝", initial_sidebar_state="expanded", menu_items={ @@ -70,13 +70,21 @@ def new_file(): # @st.cache_resource -def init_qa(model): +def init_qa(model, api_key=None): if model == 'chatgpt-3.5-turbo': - chat = PromptLayerChatOpenAI(model_name="gpt-3.5-turbo", - temperature=0, - return_pl_id=True, - pl_tags=["streamlit", "chatgpt"]) - embeddings = OpenAIEmbeddings() + if api_key: + chat = ChatOpenAI(model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key=api_key, + frequency_penalty=0.1) + embeddings = OpenAIEmbeddings(openai_api_key=api_key) + else: + chat = ChatOpenAI(model_name="gpt-3.5-turbo", + temperature=0, + frequency_penalty=0.1) + embeddings = OpenAIEmbeddings() + + elif model == 'mistral-7b-instruct-v0.1': chat = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.01, "max_length": 4096, "max_new_tokens": 2048}) @@ -162,12 +170,11 @@ def play_old_messages(): st.markdown( ":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ") - if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat': + if model == 'mistral-7b-instruct-v0.1' and model not in st.session_state['api_keys']: if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ: api_key = st.text_input('Huggingface API Key', type="password") - st.markdown( - "Get it [here](https://huggingface.co/docs/hub/security-tokens)") + st.markdown("Get it [here](https://huggingface.co/docs/hub/security-tokens)") else: api_key = os.environ['HUGGINGFACEHUB_API_TOKEN'] @@ -176,33 +183,33 @@ def play_old_messages(): if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']: with st.spinner("Preparing environment"): st.session_state['api_keys'][model] = api_key - if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ: - os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key + # if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ: + # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key st.session_state['rqa'][model] = init_qa(model) - elif model == 'chatgpt-3.5-turbo': + elif model == 'chatgpt-3.5-turbo' and model not in st.session_state['api_keys']: if 'OPENAI_API_KEY' not in os.environ: api_key = st.text_input('OpenAI API Key', type="password") - st.markdown( - "Get it [here](https://platform.openai.com/account/api-keys)") + st.markdown("Get it [here](https://platform.openai.com/account/api-keys)") else: api_key = os.environ['OPENAI_API_KEY'] if api_key: - # st.session_state['api_key'] = is_api_key_provided = True if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']: with st.spinner("Preparing environment"): st.session_state['api_keys'][model] = api_key if 'OPENAI_API_KEY' not in os.environ: - os.environ['OPENAI_API_KEY'] = api_key - st.session_state['rqa'][model] = init_qa(model) + st.session_state['rqa'][model] = init_qa(model, api_key) + else: + st.session_state['rqa'][model] = init_qa(model) # else: # is_api_key_provided = st.session_state['api_key'] -st.title("📝 Scientific Document Insight QA") +st.title("📝 Scientific Document Insights Q/A") st.subheader("Upload a scientific article in PDF, ask questions, get insights.") -st.markdown(":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.") +st.markdown( + ":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.") uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file, disabled=st.session_state['model'] is not None and st.session_state['model'] not in