From 1d35367339fa93730bb2ba6f23562d04adafe11c Mon Sep 17 00:00:00 2001 From: Uche Ogbuji Date: Fri, 21 Jul 2023 21:11:19 -0600 Subject: [PATCH] [#19] Initial demo improvement moves --- demo/README.md | 2 + demo/alpaca_multitask_fix_xml.py | 130 ------------------ demo/alpaca_simple_qa_discord.py | 4 - demo/chat_pdf_streamlit_ui.py | 4 - demo/multiprocess.py | 112 +++++++++++++++ ...ca_simple_fix_xml.py => simple_fix_xml.py} | 0 6 files changed, 114 insertions(+), 138 deletions(-) delete mode 100644 demo/alpaca_multitask_fix_xml.py create mode 100644 demo/multiprocess.py rename demo/{alpaca_simple_fix_xml.py => simple_fix_xml.py} (100%) diff --git a/demo/README.md b/demo/README.md index 2e9eb79..c6d45bb 100644 --- a/demo/README.md +++ b/demo/README.md @@ -1,3 +1,5 @@ +For all these demos you need access to an OpenAI-like service. Default assumption is that you have a self-hosted framework such as llama-cpp-python or text-generation-webui running + # Simplest ## alpaca_simple_fix_xml.py diff --git a/demo/alpaca_multitask_fix_xml.py b/demo/alpaca_multitask_fix_xml.py deleted file mode 100644 index c033eeb..0000000 --- a/demo/alpaca_multitask_fix_xml.py +++ /dev/null @@ -1,130 +0,0 @@ -''' -Advanced demo using an LLM to repair data (XML), like -alpaca_simple_fix_xml.py -but demonstrating asyncio by running a separate, progress indicator task -in the background while the LLM is generating. Should work even -if the LLM framework in use doesn't suport asyncio, -thanks to ogbujipt.async_helper - -You need access to an OpenAI-like service. Default assumption is that you -have a self-hosted framework such as llama-cpp-python or text-generation-webui -running. Say it's at my-llm-host:8000, you can do: - -python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000 - -Also allows you to use the actual OpenAI service, by specifying --openai -''' - -import asyncio - -import click - -from ogbujipt import oapi_first_choice_text -from ogbujipt.async_helper import schedule_callable, openai_api_surrogate -from ogbujipt import config -from ogbujipt.prompting.basic import format -from ogbujipt.prompting.model_style import ALPACA_INSTRUCT_DELIMITERS - -DOTS_SPACING = 0.5 # Number of seconds between each dot printed to console - - -# Could probably use something like tqdm.asyncio, if we wanted to be fancy -async def indicate_progress(pause=DOTS_SPACING): - ''' - Simple progress indicator for the console. Just prints dots. - ''' - while True: - print('.', end='', flush=True) - await asyncio.sleep(pause) - - -async def async_main(openai_params): - ''' - Schedule one task to do a long-running/blocking LLM request, and another - to run a progress indicator in the background - ''' - BAD_XML_CODE = '''\ - -Russia -Moscow -''' - - prompt = format( - 'Correct the following XML to make it well-formed', - contexts=BAD_XML_CODE, - delimiters=ALPACA_INSTRUCT_DELIMITERS) - print(prompt, '\n') - - # Customize parameters for model behavior - # More info: https://platform.openai.com/docs/api-reference/completions - model_params = dict( - max_tokens=60, # Limit number of generated tokens - top_p=1, # AKA nucleus sampling; can increase generated text diversity - frequency_penalty=0, # Favor more or less frequent tokens - presence_penalty=1, # Prefer new, previously unused tokens - ) - model_params.update(openai_params) - - # Pro tip: When creating tasks with asyncio.create_task be mindful to not - # accidentally lose references to tasks, lest they get garbage collected, - # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11) - # is a better alternative, but we can't use them in this case because - # they wait for all tasks to complete whereas we're done once only - # the LLM generation task is complete - indicator_task = asyncio.create_task(indicate_progress()) - # Notice the pattern of passing in the callable iself, then the params - # You can't just do, say llm(prompt) because that will actually - # call the function & block on the LLM request - llm_task = asyncio.create_task( - schedule_callable(openai_api_surrogate, prompt, **model_params)) - tasks = [indicator_task, llm_task] - done, _ = await asyncio.wait( - tasks, return_when=asyncio.FIRST_COMPLETED - ) - - # Instance of openai.openai_object.OpenAIObject, with lots of useful info - retval = next(iter(done)).result() - print(type(retval)) - # Response is a json-like object; extract the text - print('\nFull response data from LLM:\n', retval) - - # response is a json-like object; - # just get back the text of the response - response_text = oapi_first_choice_text(retval) - print('\nResponse text from LLM:\n\n', response_text) - - -# Command line arguments defined in click decorators -@click.command() -@click.option('--host', default='http://127.0.0.1', help='OpenAI API host') -@click.option('--port', default='8000', help='OpenAI API port') -@click.option('--llmtemp', default='0.1', type=float, help='LLM temperature') -@click.option('--openai', is_flag=True, default=False, type=bool, - help='Use live OpenAI API. If you use this option, you must have ' - '"OPENAI_API_KEY" defined in your environmnt') -@click.option('--model', default='', type=str, - help='OpenAI model to use (see https://platform.openai.com/docs/models)') -def main(host, port, llmtemp, openai, model): - # Use OpenAI API if specified, otherwise emulate with supplied host, etc. - if openai: - assert not (host or port), 'Don\'t use --host or --port with --openai' - model = model or 'text-davinci-003' - openai_api = config.openai_live( - model=model, debug=True) - else: - # For now the model param is most useful in conjunction with --openai - model = model or config.HOST_DEFAULT - openai_api = config.openai_emulation( - host=host, port=port, model=model, debug=True) - - # Preserve the provided temperature setting - openai_api.params.temperature = llmtemp - asyncio.run(async_main(openai_api.params)) - - -if __name__ == '__main__': - # CLI entry point - # Also protects against multiple launching of the overall program - # when a child process imports this - # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import - main() diff --git a/demo/alpaca_simple_qa_discord.py b/demo/alpaca_simple_qa_discord.py index 6ecce4b..b5b4a5d 100644 --- a/demo/alpaca_simple_qa_discord.py +++ b/demo/alpaca_simple_qa_discord.py @@ -8,10 +8,6 @@ so for example if a request is sent, and a second comes in before it has completed, only the latter will complete. -You need access to an OpenAI-like service. Default assumption is that you -have a self-hosted framework such as llama-cpp-python or text-generation-webui -running. Say it's at my-llm-host:8000, you can do: - Prerequisites: python-dotenv discord.py You also need to make sure Python has root SSL certificates installed diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py index 3785af2..c6a2601 100644 --- a/demo/chat_pdf_streamlit_ui.py +++ b/demo/chat_pdf_streamlit_ui.py @@ -14,10 +14,6 @@ Single-PDF support, for now, to keep the demo code simple. Can easily extend to e.g. work with multiple docs dropped in a directory -You need access to an OpenAI-like service. Default assumption is that you -have a self-hosted framework such as llama-cpp-python or text-generation-webui -running. Assume for the following it's at my-llm-host:8000 - Prerequisites. From OgbujiPT cloned dir:. ```sh diff --git a/demo/multiprocess.py b/demo/multiprocess.py new file mode 100644 index 0000000..4bcc168 --- /dev/null +++ b/demo/multiprocess.py @@ -0,0 +1,112 @@ +''' +Advanced demo showing quick chat with an LLM, but with 3 simultaneous requests, +and also a separate, progress indicator dislay while the LLM instances are generating. +Key is taking advantage of Python's asyncio, and also multiprocess, which requires some finesse, +to work even when the LLM framework in use doesn't suport asyncio. +Luckily `ogbujipt.async_helper` comes in handy. + +```sh +python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000 +``` + +Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai +''' +import sys +import asyncio + +import openai + +from ogbujipt.async_helper import schedule_callable, openai_api_surrogate +from ogbujipt import config +from ogbujipt.prompting.basic import format +from ogbujipt.prompting.model_style import ALPACA_DELIMITERS + +model_params = {} + + +async def indicate_progress(pause=0.5): + ''' + Simple progress indicator for the console. Just prints dots. + ''' + while True: + print('.', end='', flush=True) + await asyncio.sleep(pause) + + +openai_globals = ['api_base', 'api_key', 'api_key_path'] + + +def cache_openai_api_params(): + params = {} + for k in openai_globals: + if hasattr(openai, k): + params[k] = getattr(openai, k) + return params + + +def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs): + # Reset API details, relevant when emulating OpenAI + trimmed_kwargs = {} + for k in kwargs: + if k in openai_globals: + setattr(openai, k, kwargs[k]) + else: + trimmed_kwargs[k] = kwargs[k] + # Send other, provided args to the generation function + return api_func(prompt=prompt, **trimmed_kwargs) + + +class llm_request: + tasks = {} + + def __init__(self, topic): + self.topic = topic + + def wrap(self): + prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS) + + self.task = asyncio.create_task( + schedule_callable(openai_api_surrogate, prompt, model='text-ada-001', **cache_openai_api_params())) + llm_request.tasks[self.task] = self + return self.task + + +async def async_main(): + topics = ['wild animals', 'vehicles', 'space aliens'] + + # model_params = dict( + # max_tokens=60, # Limit number of generated tokens + # top_p=1, # AKA nucleus sampling; can increase generated text diversity + # frequency_penalty=0, # Favor more or less frequent tokens + # presence_penalty=1, # Prefer new, previously unused tokens + # ) + indicator_task = asyncio.create_task(indicate_progress()) + # Notice the pattern of passing in the callable iself, then the params + # You can't just do, say llm(prompt) because that will actually + # call the function & block on the LLM request + llm_requests = [llm_request(t) for t in topics] + llm_tasks = [req.wrap() for req in llm_requests] + # Need to gather to make sure all LLm tasks are completed + gathered_llm_tasks = asyncio.gather(*llm_tasks) + done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED) + + # Only completed task will be from the gather() of llm_tasks, and it has results in original order + results = zip(llm_requests, next(iter(done)).result()) + for req, resp in results: + print(f'Result re {req.topic}') + print(resp) + + +def main(): + openai.model = 'text-ada-001' + # Just hardcode these params + model_params['llmtemp'], model_params['model'] = 1, 'text-ada-001' + openai.api_key_path = sys.argv[1] + # openai_api = config.openai_live(model=model, debug=True) + # model_params['api_key_path'] = openai.api_key_path + asyncio.run(async_main()) + + +if __name__ == '__main__': + # Re-entry control. Don't want main() executed on re-import + main() diff --git a/demo/alpaca_simple_fix_xml.py b/demo/simple_fix_xml.py similarity index 100% rename from demo/alpaca_simple_fix_xml.py rename to demo/simple_fix_xml.py