diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py index ea9563c..9eb3001 100644 --- a/demo/chat_pdf_streamlit_ui.py +++ b/demo/chat_pdf_streamlit_ui.py @@ -173,12 +173,12 @@ def query_llm(openai_api): # Need to remove old system messages for subsequent queries st.session_state['messages'].extend(messages) # print(st.session_state['messages'], '\n', '-'*10) - response = oapi(st.session_state['messages'], temperature=LLM_TEMP, max_tokens=1024) + response = oapi.call(st.session_state['messages'], temperature=LLM_TEMP, max_tokens=1024) print('\nFull response data from LLM:\n', response) # Response is a json-like object; extract the text - response_text = oapi.first_choice_message(response) + response_text = response.first_choice_text print('\nResponse text from LLM:\n', response_text) st.session_state['messages'].append({'role': 'assistant', 'content': response}) diff --git a/demo/chat_web_selects.py b/demo/chat_web_selects.py index ae41711..5e045cd 100644 --- a/demo/chat_web_selects.py +++ b/demo/chat_web_selects.py @@ -143,7 +143,7 @@ async def async_main(oapi, sites, verbose, limit, chunk_size, chunk_overlap, que ) indicator_task = asyncio.create_task(indicate_progress()) - llm_task = oapi.wrap_for_multiproc(messages, **model_params) + llm_task = asyncio.Task(oapi(messages, **model_params)) tasks = [indicator_task, llm_task] done, _ = await asyncio.wait( tasks, return_when=asyncio.FIRST_COMPLETED) @@ -157,8 +157,7 @@ async def async_main(oapi, sites, verbose, limit, chunk_size, chunk_overlap, que print('\nFull response data from LLM:\n', retval) # just get back the text of the response - response_text = oapi.first_choice_message(retval) - print('\nResponse text from LLM:\n\n', response_text) + print('\nResponse text from LLM:\n\n', retval.first_choice_text) # Command line arguments defined in click decorators diff --git a/demo/function_calling.py b/demo/function_calling.py index 1dad24a..a0ae9e3 100644 --- a/demo/function_calling.py +++ b/demo/function_calling.py @@ -55,7 +55,7 @@ class ExecuteStepByStepPlan(BaseModel): function_call={'name': 'handle_steps_from_user_query'} -resp = llm_api(messages=messages, functions=functions, function_call=function_call) +resp = llm_api.call(messages=messages, functions=functions, function_call=function_call) fc = resp.choices[0].message.function_call if fc: diff --git a/demo/multiprocess.py b/demo/multiprocess.py index dab6703..b9e49aa 100644 --- a/demo/multiprocess.py +++ b/demo/multiprocess.py @@ -49,7 +49,7 @@ async def async_main(requests_info): # is often a better alternative, but waits for all tasks to complete whereas we're done once # the LLM generation tasks are complete indicator_task = asyncio.create_task(console_progress_indicator()) - llm_tasks = [llm.wrap_for_multiproc(prompt_to_chat(msg), temperature=temp, max_tokens=1024) + llm_tasks = [asyncio.create_task(llm(prompt_to_chat(msg), temperature=temp, max_tokens=1024)) for (llm, msg, temp) in requests_info] llm_messages = [msg for (llm, msg, temp) in requests_info] # Need to gather to make sure all LLM tasks are completed @@ -63,8 +63,7 @@ async def async_main(requests_info): # resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info print('\nFull response data from LLM:\n', resp) # Just the response text - response_text = openai_chat_api.first_choice_message(resp) - print('\nResponse text from LLM:\n\n', response_text) + print('\nResponse text from LLM:\n\n', resp.first_choice_text) print('-'*80) @@ -82,7 +81,7 @@ def main(apibase, llmtemp, openai, model): if openai: oapi = openai_chat_api(model=(model or 'gpt-3.5-turbo')) else: - oapi = openai_chat_api(model=model, api_base=apibase) + oapi = openai_chat_api(model=model, base_url=apibase) # Separate models or params—e.g. temp—for each LLM request is left as an exercise 😊 requests_info = [ diff --git a/demo/qa_discord.py b/demo/qa_discord.py index 8515400..6be5dea 100644 --- a/demo/qa_discord.py +++ b/demo/qa_discord.py @@ -37,6 +37,7 @@ ''' import os +import asyncio import discord @@ -56,16 +57,15 @@ async def send_llm_msg(msg): ''' # See demo/alpaca_multitask_fix_xml.py for some important warnings here # oapi.parameters - response = await oapi.wrap_for_multiproc(prompt_to_chat(msg), max_tokens=512) + response = await asyncio.create_task(oapi(prompt_to_chat(msg), max_tokens=512)) print(response) print('\nFull response data from LLM:\n', response) # Response is a json-like object; we just need the message text - response_text = oapi.first_choice_message(response) - print('\nResponse text from LLM:\n', response_text) + print('\nResponse text from LLM:\n', response.first_choice_text) - return response_text + return response.first_choice_text @client.event diff --git a/demo/simple_fix_xml.py b/demo/simple_fix_xml.py index cd7ac2c..f548432 100644 --- a/demo/simple_fix_xml.py +++ b/demo/simple_fix_xml.py @@ -38,7 +38,7 @@ def main(apibase, llmtemp, openai, model): assert not apibase, 'Don\'t use --apibase with --openai' oapi = openai_api(model=(model or 'gpt-3.5-turbo')) else: - oapi = openai_api(model=model, api_base=apibase) + oapi = openai_api(model=model, base_url=apibase) BAD_XML_CODE = '''\ @@ -56,7 +56,7 @@ def main(apibase, llmtemp, openai, model): delimiters=ALPACA_INSTRUCT_INPUT_DELIMITERS) print(prompt, '\n') - response = oapi( + response = oapi.call( prompt=prompt, # Prompt (Required) temperature=llmtemp, # Temp (Default 1) max_tokens=100, # Max Token length of generated text (Default 16) @@ -72,8 +72,7 @@ def main(apibase, llmtemp, openai, model): print('\nFull response data from LLM:\n', response) # Response is a json-like object; just get back the text of the response - response_text = oapi.first_choice_text(response) - print('\nResponse text from LLM:\n\n', response_text) + print('\nResponse text from LLM:\n\n', response.first_choice_text) # CLI entry point diff --git a/pylib/llm_wrapper.py b/pylib/llm_wrapper.py index 2f49a78..5bec8b8 100644 --- a/pylib/llm_wrapper.py +++ b/pylib/llm_wrapper.py @@ -198,6 +198,7 @@ async def __call__(self, prompt, api_func=None, **kwargs): # Haven't implemented any OpenAI API calls that are async, so just call the sync version return self.call(prompt, api_func, **kwargs) + # FIXME: Needs investigation, but we might no longer need this as much def wrap_for_multiproc(self, prompt, **kwargs): ''' Wrap the LLM invocation in an asyncio task