diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py
index 65368c4..310effa 100644
--- a/demo/chat_pdf_streamlit_ui.py
+++ b/demo/chat_pdf_streamlit_ui.py
@@ -49,6 +49,9 @@
 
 import zlib  # for crc32 checksums
 
+# Avoid re-entrace complaints from huggingface/tokenizers
+os.environ['TOKENIZERS_PARALLELISM'] = 'false'
+
 # Load the main parameters from .env file
 load_dotenv()
 # User can set a variety of likely values to trigger use of OpenAI full-service
@@ -66,6 +69,8 @@
 # LLM used for vector DB embeddings: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
 DOC_EMBEDDINGS_LLM = os.getenv('EMBED_CHUNK_OVERLAP', 'all-MiniLM-L12-v2')
 
+CONSOLE_WIDTH = 80
+
 PDF_USER_QUERY_PROMPT = 'Ask a question about your PDF:'
 
 # Streamlit caching to save on repeat loads
@@ -107,6 +112,52 @@ def prep_pdf(pdf, embedding_model, collection_name):
     return knowledge_base
 
 
+def query_llm(kb, openai_api, model):
+    user_query = st.session_state['user_query_str']
+
+    # Create placeholder st.empty() for throbber and LLM response
+    response_placeholder = st.empty()
+        
+    # Load throbber from cache
+    throbber = load_throbber()
+    response_placeholder.image(throbber)
+
+    docs = kb.search(user_query, limit=K)
+
+    # Collects "chunked_doc" into "gathered_chunks"
+    gathered_chunks = '\n\n'.join(
+        doc.payload['_text'] for doc in docs if doc.payload)
+
+    # Build prompt the doc chunks as context
+    prompt = format(
+        f'Given the context, {user_query}\n\n'
+        f'Context: """\n{gathered_chunks}\n"""\n',
+        preamble='### SYSTEM:\nYou are a helpful assistant, who answers '
+        'questions directly and as briefly as possible. '
+        'If you cannot answer with the given context, just say so.',
+        delimiters=CHATGPT_DELIMITERS)
+
+    print('  PROMPT FOR LLM:  '.center(CONSOLE_WIDTH, '='))
+    print(prompt)
+
+    response = openai_api.Completion.create(
+        model=model,  # Model (Required)
+        prompt=prompt,  # Prompt (Required)
+        temperature=LLM_TEMP,  # Temp (Default 1)
+        max_tokens=1024,  # Max Token length of generated text (Default 16)
+        )
+
+    # Response is a json-like object; extract the text
+    print('\nFull response data from LLM:\n', response)
+
+    # response is a json-like object; 
+    # just get back the text of the response
+    response_text = oapi_choice1_text(response)
+    print('\nResponse text from LLM:\n', response_text)
+
+    response_placeholder.write(response_text)
+
+
 def streamlit_loop(openai_api, model, LLM_TEMP):
     # Streamlit treats function docstrings as magic strings for user display
     '''
@@ -142,71 +193,34 @@ def streamlit_loop(openai_api, model, LLM_TEMP):
             new_pdf = True  # Flag to know if the new pdf needs to be embedded
 
     if pdf:  # Only run once the program has a "pdf" loaded
-        # Show throbber, embed the PDF, and get ready for similarity search
-        embedding_placeholder = st.empty()
-        
-        # Load throbber from cache
-        throbber = load_throbber()
-        embedding_placeholder.image(throbber)
+        if st.session_state['embedding_model']:
+            # Show throbber, embed the PDF, and get ready for similarity search
+            embedding_placeholder = st.container()
 
-        # Get the embedding model
-        embedding_model = load_embedding_model(DOC_EMBEDDINGS_LLM)
+            embedding_placeholder.write('Embedding PDF...')
 
-        # Prepare a vector knowledgebase based on the pdf contents
-        # Use st.session_state to avoid unnecessary reprocessing/reloading
-        if new_pdf:
-            kb = prep_pdf(pdf, embedding_model, collection_name=pdf.name)
-            st.session_state['kb'] = kb
-        else:
-            kb = st.session_state['kb']
-
-        # Clear all elements in placeholder (in this case, just the throbber)
-        embedding_placeholder.empty()
-
-        # Get the user query
-        user_query = st.text_input(PDF_USER_QUERY_PROMPT)
-        if user_query:  # Only run once the program has a "user_query"
-            response_placeholder = st.empty()
-        
             # Load throbber from cache
             throbber = load_throbber()
-            response_placeholder.image(throbber)
-
-            docs = kb.search(user_query, limit=K)
+            embedding_placeholder.image(throbber)
 
-            # Collects "chunked_doc" into "gathered_chunks"
-            gathered_chunks = '\n\n'.join(
-                doc.payload['_text'] for doc in docs if doc.payload)
+            # Get the embedding model
+            embedding_model = load_embedding_model(embedding_model_name=DOC_EMBEDDINGS_LLM)
 
-            # Build prompt the doc chunks as context
-            prompt = format(
-                f'Given the context, {user_query}\n\n'
-                f'Context: """\n{gathered_chunks}\n"""\n',
-                preamble='### SYSTEM:\nYou are a helpful assistant, who answers '
-                'questions directly and as briefly as possible. '
-                'If you cannot answer with the given context, just say so.',
-                delimiters=CHATGPT_DELIMITERS)
+            # Prepare a vector knowledgebase based on the pdf contents
+            # Use st.session_state to avoid unnecessary reprocessing/reloading
+            if new_pdf:
+                kb = prep_pdf(pdf, embedding_model, collection_name=pdf.name)
+                st.session_state['kb'] = kb
+            else:
+                kb = st.session_state['kb']
 
-            print(prompt)
+            st.session_state['embedding_model'] = False
 
-            response = openai_api.Completion.create(
-                model=model,  # Model (Required)
-                prompt=prompt,  # Prompt (Required)
-                temperature=LLM_TEMP,  # Temp (Default 1)
-                max_tokens=1024,  # Max Token length of generated text (Default 16)
-                )
+            # Rerun the app to hide the embedding throbber
+            st.experimental_rerun()
 
-            # Response is a json-like object; extract the text
-            print('\nFull response data from LLM:\n', response)
-
-            # response is a json-like object; 
-            # just get back the text of the response
-            response_text = oapi_choice1_text(response)
-            print('\nResponse text from LLM:\n', response_text)
-
-            response_placeholder.write(response_text)
-
-            user_query = None
+        # Get the user query
+        st.text_input(label=PDF_USER_QUERY_PROMPT, key='user_query_str', on_change=query_llm, args=(kb, openai_api, model))
 
 
 def main():
@@ -223,6 +237,8 @@ def main():
         model = LLM or HOST_DEFAULT
         openai_api = openai_emulation(
             host=LLM_HOST, port=LLM_PORT, model=LLM, debug=True)
+        
+    st.session_state['embedding_model'] = True
 
     streamlit_loop(openai_api, model, LLM_TEMP)