added privacy statement, minor cosmetics on the key information, NER …

…written without abbreviation
lfoppiano · Oct 30, 2023 · 0f074cc · 0f074cc
1 parent 6915a03
commit 0f074cc
Showing 1 changed file with 27 additions and 15 deletions.
diff --git a/streamlit_app.py b/streamlit_app.py
@@ -48,11 +48,13 @@
 if 'ner_processing' not in st.session_state:
     st.session_state['ner_processing'] = False
 
+if 'uploaded' not in st.session_state:
+    st.session_state['uploaded'] = False
 
 def new_file():
     st.session_state['loaded_embeddings'] = None
     st.session_state['doc_id'] = None
-
+    st.session_state['uploaded'] = True
 
 # @st.cache_resource
 def init_qa(model):
@@ -128,27 +130,33 @@ def play_old_messages():
                     else:
                         st.write(message['content'])
 
+
 # is_api_key_provided = st.session_state['api_key']
 
 with st.sidebar:
+    st.markdown(
+        ":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
+
     st.session_state['model'] = model = st.radio(
-        "Model (cannot be changed after selection or upload)",
+        "Model",
         ("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"),  # , "llama-2-70b-chat"),
         index=1,
         captions=[
             "ChatGPT 3.5 Turbo + Ada-002-text (embeddings)",
             "Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
             # "LLama2-70B-Chat + Sentence BERT (embeddings)",
         ],
-        help="Select the model you want to use.",
-        disabled=st.session_state['doc_id'] is not None)
+        help="Select the LLM model and embeddings you want to use.",
+        disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
 
     if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
-        api_key = st.text_input('Huggingface API Key',
-                                type="password") if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ else os.environ[
-            'HUGGINGFACEHUB_API_TOKEN']
-        st.markdown(
-            "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
+        if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
+            api_key = st.text_input('Huggingface API Key', type="password")
+
+            st.markdown(
+                "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
+        else:
+            api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
 
         if api_key:
             # st.session_state['api_key'] = is_api_key_provided = True
@@ -159,10 +167,13 @@ def play_old_messages():
                 st.session_state['rqa'][model] = init_qa(model)
 
     elif model == 'chatgpt-3.5-turbo':
-        api_key = st.text_input('OpenAI API Key', type="password") if 'OPENAI_API_KEY' not in os.environ else \
-            os.environ['OPENAI_API_KEY']
-        st.markdown(
-            "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
+        if 'OPENAI_API_KEY' not in os.environ:
+            api_key = st.text_input('OpenAI API Key', type="password")
+            st.markdown(
+                "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
+        else:
+            api_key = os.environ['OPENAI_API_KEY']
+
         if api_key:
             # st.session_state['api_key'] = is_api_key_provided = True
             with st.spinner("Preparing environment"):
@@ -177,7 +188,8 @@ def play_old_messages():
 st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
 
 uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
-                                 disabled=st.session_state['model'] is not None and st.session_state['model'] not in st.session_state['api_keys'],
+                                 disabled=st.session_state['model'] is not None and st.session_state['model'] not in
+                                          st.session_state['api_keys'],
                                  help="The full-text is extracted using Grobid. ")
 
 question = st.chat_input(
@@ -198,7 +210,7 @@ def play_old_messages():
                              help="Number of chunks to consider when answering a question",
                              disabled=not uploaded_file)
 
-    st.session_state['ner_processing'] = st.checkbox("NER processing on LLM response")
+    st.session_state['ner_processing'] = st.checkbox("Named Entities Recognition (NER) processing on LLM response")
     st.markdown(
         '**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
         unsafe_allow_html=True)