Skip to content

Commit 9c5538b

Browse files
authored
Merge pull request #25 from lfoppiano/review-interface
Interface updates, more models
2 parents ab9a153 + 01b5fcd commit 9c5538b

File tree

2 files changed

+33
-18
lines changed

2 files changed

+33
-18
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ The conversation is kept in memory by a buffered sliding window memory (top 4 mo
4141
## Getting started
4242

4343
- Select the model+embedding combination you want to use
44-
- If using OpenAI, enter your API Key ([Open AI](https://platform.openai.com/account/api-keys)~~ or [Huggingface](https://huggingface.co/docs/hub/security-tokens))~~.
44+
- If using gpt3.5-turbo, gpt4 or gpt4-turbo, enter your API Key ([Open AI](https://platform.openai.com/account/api-keys)).
4545
- Upload a scientific article as a PDF document. You will see a spinner or loading indicator while the processing is in progress.
4646
- Once the spinner disappears, you can proceed to ask your questions
4747

@@ -77,6 +77,14 @@ Error: `streamlit: Your system has an unsupported version of sqlite3. Chroma req
7777
Here the [solution on Linux](https://stackoverflow.com/questions/76958817/streamlit-your-system-has-an-unsupported-version-of-sqlite3-chroma-requires-sq).
7878
For more information, see the [details](https://docs.trychroma.com/troubleshooting#sqlite) on Chroma website.
7979

80+
## Disclaimer on Data, Security, and Privacy ⚠️
81+
82+
Please read carefully:
83+
84+
- Avoid uploading sensitive data. We temporarily store text from the uploaded PDF documents only for processing your request, and we disclaim any responsibility for subsequent use or handling of the submitted data by third-party LLMs.
85+
- Mistral and Zephyr are FREE to use and do not require any API, but as we leverage the free API entrypoint, there is no guarantee that all requests will go through. Use at your own risk.
86+
- We do not assume responsibility for how the data is utilized by the LLM end-points API.
87+
8088
## Development notes
8189

8290
To release a new version:

streamlit_app.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
from document_qa.grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
2020
from grobid_client_generic import GrobidClientGeneric
2121

22+
OPENAI_MODELS = ['chatgpt-3.5-turbo',
23+
"gpt-4",
24+
"gpt-4-1106-preview"]
25+
2226
if 'rqa' not in st.session_state:
2327
st.session_state['rqa'] = {}
2428

@@ -117,17 +121,17 @@ def clear_memory():
117121
# @st.cache_resource
118122
def init_qa(model, api_key=None):
119123
## For debug add: callbacks=[PromptLayerCallbackHandler(pl_tags=["langchain", "chatgpt", "document-qa"])])
120-
if model == 'chatgpt-3.5-turbo':
124+
if model in OPENAI_MODELS:
121125
st.session_state['memory'] = ConversationBufferWindowMemory(k=4)
122126
if api_key:
123-
chat = ChatOpenAI(model_name="gpt-3.5-turbo",
127+
chat = ChatOpenAI(model_name=model,
124128
temperature=0,
125129
openai_api_key=api_key,
126130
frequency_penalty=0.1)
127131
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
128132

129133
else:
130-
chat = ChatOpenAI(model_name="gpt-3.5-turbo",
134+
chat = ChatOpenAI(model_name=model,
131135
temperature=0,
132136
frequency_penalty=0.1)
133137
embeddings = OpenAIEmbeddings()
@@ -206,20 +210,23 @@ def play_old_messages():
206210
# is_api_key_provided = st.session_state['api_key']
207211

208212
with st.sidebar:
209-
st.session_state['model'] = model = st.radio(
210-
"Model",
211-
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1", "zephyr-7b-beta"),
212-
index=2,
213-
captions=[
214-
"ChatGPT 3.5 Turbo + Ada-002-text (embeddings)",
215-
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings) :free:",
216-
"Zephyr-7B-beta + Sentence BERT (embeddings) :free:"
213+
st.session_state['model'] = model = st.selectbox(
214+
"Model:",
215+
options=[
216+
"chatgpt-3.5-turbo",
217+
"mistral-7b-instruct-v0.1",
218+
"zephyr-7b-beta",
219+
"gpt-4",
220+
"gpt-4-1106-preview"
217221
],
218-
help="Select the LLM model and embeddings you want to use.",
219-
disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
222+
index=2,
223+
placeholder="Select model",
224+
help="Select the LLM model:",
225+
disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded']
226+
)
220227

221228
st.markdown(
222-
":warning: Mistral and Zephyr are **FREE** to use. Requests might fail anytime. Use at your own risk. :warning: ")
229+
":warning: [Usage disclaimer](https://github.com/lfoppiano/document-qa/tree/review-interface#disclaimer-on-data-security-and-privacy-%EF%B8%8F) :warning: ")
223230

224231
if (model == 'mistral-7b-instruct-v0.1' or model == 'zephyr-7b-beta') and model not in st.session_state['api_keys']:
225232
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
@@ -238,7 +245,7 @@ def play_old_messages():
238245
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
239246
st.session_state['rqa'][model] = init_qa(model)
240247

241-
elif model == 'chatgpt-3.5-turbo' and model not in st.session_state['api_keys']:
248+
elif model in OPENAI_MODELS and model not in st.session_state['api_keys']:
242249
if 'OPENAI_API_KEY' not in os.environ:
243250
api_key = st.text_input('OpenAI API Key', type="password")
244251
st.markdown("Get it [here](https://platform.openai.com/account/api-keys)")
@@ -297,9 +304,9 @@ def play_old_messages():
297304
help="Number of chunks to consider when answering a question",
298305
disabled=not uploaded_file)
299306

300-
st.session_state['ner_processing'] = st.checkbox("Named Entities Recognition (NER) processing on LLM response")
307+
st.session_state['ner_processing'] = st.checkbox("Identify materials and properties.")
301308
st.markdown(
302-
'**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
309+
'The LLM responses undergo post-processing to extract <span style="color:orange">physical quantities, measurements</span>, and <span style="color:green">materials</span> mentions.',
303310
unsafe_allow_html=True)
304311

305312
st.divider()

0 commit comments

Comments
 (0)