From ea587e16392817ad24e298fbd5486efcff27d807 Mon Sep 17 00:00:00 2001
From: Uche Ogbuji <uche@ogbuji.net>
Date: Fri, 15 Mar 2024 20:31:18 -0600
Subject: [PATCH] Repair demos

---
 demo/chat_pdf_streamlit_ui.py | 4 ++--
 demo/chat_web_selects.py      | 5 ++---
 demo/function_calling.py      | 2 +-
 demo/multiprocess.py          | 7 +++----
 demo/qa_discord.py            | 8 ++++----
 demo/simple_fix_xml.py        | 7 +++----
 pylib/llm_wrapper.py          | 1 +
 7 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py
index ea9563c..9eb3001 100644
--- a/demo/chat_pdf_streamlit_ui.py
+++ b/demo/chat_pdf_streamlit_ui.py
@@ -173,12 +173,12 @@ def query_llm(openai_api):
     # Need to remove old system messages for subsequent queries
     st.session_state['messages'].extend(messages)
     # print(st.session_state['messages'], '\n', '-'*10)
-    response = oapi(st.session_state['messages'], temperature=LLM_TEMP, max_tokens=1024)
+    response = oapi.call(st.session_state['messages'], temperature=LLM_TEMP, max_tokens=1024)
 
     print('\nFull response data from LLM:\n', response)
 
     # Response is a json-like object; extract the text
-    response_text = oapi.first_choice_message(response)
+    response_text = response.first_choice_text
     print('\nResponse text from LLM:\n', response_text)
 
     st.session_state['messages'].append({'role': 'assistant', 'content': response})
diff --git a/demo/chat_web_selects.py b/demo/chat_web_selects.py
index ae41711..5e045cd 100644
--- a/demo/chat_web_selects.py
+++ b/demo/chat_web_selects.py
@@ -143,7 +143,7 @@ async def async_main(oapi, sites, verbose, limit, chunk_size, chunk_overlap, que
                 )
 
             indicator_task = asyncio.create_task(indicate_progress())
-            llm_task = oapi.wrap_for_multiproc(messages, **model_params)
+            llm_task = asyncio.Task(oapi(messages, **model_params))
             tasks = [indicator_task, llm_task]
             done, _ = await asyncio.wait(
                 tasks, return_when=asyncio.FIRST_COMPLETED)
@@ -157,8 +157,7 @@ async def async_main(oapi, sites, verbose, limit, chunk_size, chunk_overlap, que
                 print('\nFull response data from LLM:\n', retval)
 
             # just get back the text of the response
-            response_text = oapi.first_choice_message(retval)
-            print('\nResponse text from LLM:\n\n', response_text)
+            print('\nResponse text from LLM:\n\n', retval.first_choice_text)
 
 
 # Command line arguments defined in click decorators
diff --git a/demo/function_calling.py b/demo/function_calling.py
index 1dad24a..a0ae9e3 100644
--- a/demo/function_calling.py
+++ b/demo/function_calling.py
@@ -55,7 +55,7 @@ class ExecuteStepByStepPlan(BaseModel):
 
 function_call={'name': 'handle_steps_from_user_query'}
 
-resp = llm_api(messages=messages, functions=functions, function_call=function_call)
+resp = llm_api.call(messages=messages, functions=functions, function_call=function_call)
 fc = resp.choices[0].message.function_call
 
 if fc:
diff --git a/demo/multiprocess.py b/demo/multiprocess.py
index dab6703..b9e49aa 100644
--- a/demo/multiprocess.py
+++ b/demo/multiprocess.py
@@ -49,7 +49,7 @@ async def async_main(requests_info):
     # is often a better alternative, but waits for all tasks to complete whereas we're done once
     # the LLM generation tasks are complete
     indicator_task = asyncio.create_task(console_progress_indicator())
-    llm_tasks = [llm.wrap_for_multiproc(prompt_to_chat(msg), temperature=temp, max_tokens=1024)
+    llm_tasks = [asyncio.create_task(llm(prompt_to_chat(msg), temperature=temp, max_tokens=1024))
                  for (llm, msg, temp) in requests_info]
     llm_messages = [msg for (llm, msg, temp) in requests_info]
     # Need to gather to make sure all LLM tasks are completed
@@ -63,8 +63,7 @@ async def async_main(requests_info):
         # resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info
         print('\nFull response data from LLM:\n', resp)
         # Just the response text
-        response_text = openai_chat_api.first_choice_message(resp)
-        print('\nResponse text from LLM:\n\n', response_text)
+        print('\nResponse text from LLM:\n\n', resp.first_choice_text)
         print('-'*80)
 
 
@@ -82,7 +81,7 @@ def main(apibase, llmtemp, openai, model):
     if openai:
         oapi = openai_chat_api(model=(model or 'gpt-3.5-turbo'))
     else:
-        oapi = openai_chat_api(model=model, api_base=apibase)
+        oapi = openai_chat_api(model=model, base_url=apibase)
 
     # Separate models or params—e.g. temp—for each LLM request is left as an exercise 😊
     requests_info = [
diff --git a/demo/qa_discord.py b/demo/qa_discord.py
index 8515400..6be5dea 100644
--- a/demo/qa_discord.py
+++ b/demo/qa_discord.py
@@ -37,6 +37,7 @@
 '''
 
 import os
+import asyncio
 
 import discord
 
@@ -56,16 +57,15 @@ async def send_llm_msg(msg):
     '''
     # See demo/alpaca_multitask_fix_xml.py for some important warnings here
     # oapi.parameters
-    response = await oapi.wrap_for_multiproc(prompt_to_chat(msg), max_tokens=512)
+    response = await asyncio.create_task(oapi(prompt_to_chat(msg), max_tokens=512))
     print(response)
 
     print('\nFull response data from LLM:\n', response)
 
     # Response is a json-like object; we just need the message text
-    response_text = oapi.first_choice_message(response)
-    print('\nResponse text from LLM:\n', response_text)
+    print('\nResponse text from LLM:\n', response.first_choice_text)
 
-    return response_text
+    return response.first_choice_text
 
 
 @client.event
diff --git a/demo/simple_fix_xml.py b/demo/simple_fix_xml.py
index cd7ac2c..f548432 100644
--- a/demo/simple_fix_xml.py
+++ b/demo/simple_fix_xml.py
@@ -38,7 +38,7 @@ def main(apibase, llmtemp, openai, model):
         assert not apibase, 'Don\'t use --apibase with --openai'
         oapi = openai_api(model=(model or 'gpt-3.5-turbo'))
     else:
-        oapi = openai_api(model=model, api_base=apibase)
+        oapi = openai_api(model=model, base_url=apibase)
 
     BAD_XML_CODE = '''\
 <earth>
@@ -56,7 +56,7 @@ def main(apibase, llmtemp, openai, model):
         delimiters=ALPACA_INSTRUCT_INPUT_DELIMITERS)
     print(prompt, '\n')
 
-    response = oapi(
+    response = oapi.call(
         prompt=prompt,  # Prompt (Required)
         temperature=llmtemp,  # Temp (Default 1)
         max_tokens=100,  # Max Token length of generated text (Default 16)
@@ -72,8 +72,7 @@ def main(apibase, llmtemp, openai, model):
     print('\nFull response data from LLM:\n', response)
 
     # Response is a json-like object; just get back the text of the response
-    response_text = oapi.first_choice_text(response)
-    print('\nResponse text from LLM:\n\n', response_text)
+    print('\nResponse text from LLM:\n\n', response.first_choice_text)
 
 
 # CLI entry point
diff --git a/pylib/llm_wrapper.py b/pylib/llm_wrapper.py
index 2f49a78..5bec8b8 100644
--- a/pylib/llm_wrapper.py
+++ b/pylib/llm_wrapper.py
@@ -198,6 +198,7 @@ async def __call__(self, prompt, api_func=None, **kwargs):
         # Haven't implemented any OpenAI API calls that are async, so just call the sync version
         return self.call(prompt, api_func, **kwargs)
 
+    # FIXME: Needs investigation, but we might no longer need this as much
     def wrap_for_multiproc(self, prompt, **kwargs):
         '''
         Wrap the LLM invocation in an asyncio task