diff --git a/demo/README.md b/demo/README.md index 2e9eb79..7a47210 100644 --- a/demo/README.md +++ b/demo/README.md @@ -1,19 +1,18 @@ +For all these demos you need access to an OpenAI-like service. Default assumption is that you have a self-hosted framework such as llama-cpp-python or text-generation-webui running + # Simplest -## alpaca_simple_fix_xml.py +## simple_fix_xml.py -Quick demo, sending an Alpaca-compatible LLM some bad XML & asking it to make corrections. +Quick demo, sending a Llama or Alpaca-compatible LLM some bad XML & asking it to make corrections. # Intermediate -## alpaca_multitask_fix_xml.py +## multiprocess.py -Intermediate demo using an LLM to repair data (XML), like -alpaca_simple_fix_xml.py -but running a separate, progress indicator task in the background -while the LLm works, using asyncio. This should work even -if the LLM framework we're using doesn't suport asyncio, -thanks to ogbujipt.async_helper +Intermediate demo asking an LLM multiple simultaneous riddles on various topics, +running a separate, progress indicator task in the background, using asyncio. +Works even if the LLM framework suport asyncio, thanks to ogbujipt.async_helper # Advanced diff --git a/demo/alpaca_multitask_fix_xml.py b/demo/alpaca_multitask_fix_xml.py deleted file mode 100644 index c033eeb..0000000 --- a/demo/alpaca_multitask_fix_xml.py +++ /dev/null @@ -1,130 +0,0 @@ -''' -Advanced demo using an LLM to repair data (XML), like -alpaca_simple_fix_xml.py -but demonstrating asyncio by running a separate, progress indicator task -in the background while the LLM is generating. Should work even -if the LLM framework in use doesn't suport asyncio, -thanks to ogbujipt.async_helper - -You need access to an OpenAI-like service. Default assumption is that you -have a self-hosted framework such as llama-cpp-python or text-generation-webui -running. Say it's at my-llm-host:8000, you can do: - -python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000 - -Also allows you to use the actual OpenAI service, by specifying --openai -''' - -import asyncio - -import click - -from ogbujipt import oapi_first_choice_text -from ogbujipt.async_helper import schedule_callable, openai_api_surrogate -from ogbujipt import config -from ogbujipt.prompting.basic import format -from ogbujipt.prompting.model_style import ALPACA_INSTRUCT_DELIMITERS - -DOTS_SPACING = 0.5 # Number of seconds between each dot printed to console - - -# Could probably use something like tqdm.asyncio, if we wanted to be fancy -async def indicate_progress(pause=DOTS_SPACING): - ''' - Simple progress indicator for the console. Just prints dots. - ''' - while True: - print('.', end='', flush=True) - await asyncio.sleep(pause) - - -async def async_main(openai_params): - ''' - Schedule one task to do a long-running/blocking LLM request, and another - to run a progress indicator in the background - ''' - BAD_XML_CODE = '''\ - -Russia -Moscow -''' - - prompt = format( - 'Correct the following XML to make it well-formed', - contexts=BAD_XML_CODE, - delimiters=ALPACA_INSTRUCT_DELIMITERS) - print(prompt, '\n') - - # Customize parameters for model behavior - # More info: https://platform.openai.com/docs/api-reference/completions - model_params = dict( - max_tokens=60, # Limit number of generated tokens - top_p=1, # AKA nucleus sampling; can increase generated text diversity - frequency_penalty=0, # Favor more or less frequent tokens - presence_penalty=1, # Prefer new, previously unused tokens - ) - model_params.update(openai_params) - - # Pro tip: When creating tasks with asyncio.create_task be mindful to not - # accidentally lose references to tasks, lest they get garbage collected, - # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11) - # is a better alternative, but we can't use them in this case because - # they wait for all tasks to complete whereas we're done once only - # the LLM generation task is complete - indicator_task = asyncio.create_task(indicate_progress()) - # Notice the pattern of passing in the callable iself, then the params - # You can't just do, say llm(prompt) because that will actually - # call the function & block on the LLM request - llm_task = asyncio.create_task( - schedule_callable(openai_api_surrogate, prompt, **model_params)) - tasks = [indicator_task, llm_task] - done, _ = await asyncio.wait( - tasks, return_when=asyncio.FIRST_COMPLETED - ) - - # Instance of openai.openai_object.OpenAIObject, with lots of useful info - retval = next(iter(done)).result() - print(type(retval)) - # Response is a json-like object; extract the text - print('\nFull response data from LLM:\n', retval) - - # response is a json-like object; - # just get back the text of the response - response_text = oapi_first_choice_text(retval) - print('\nResponse text from LLM:\n\n', response_text) - - -# Command line arguments defined in click decorators -@click.command() -@click.option('--host', default='http://127.0.0.1', help='OpenAI API host') -@click.option('--port', default='8000', help='OpenAI API port') -@click.option('--llmtemp', default='0.1', type=float, help='LLM temperature') -@click.option('--openai', is_flag=True, default=False, type=bool, - help='Use live OpenAI API. If you use this option, you must have ' - '"OPENAI_API_KEY" defined in your environmnt') -@click.option('--model', default='', type=str, - help='OpenAI model to use (see https://platform.openai.com/docs/models)') -def main(host, port, llmtemp, openai, model): - # Use OpenAI API if specified, otherwise emulate with supplied host, etc. - if openai: - assert not (host or port), 'Don\'t use --host or --port with --openai' - model = model or 'text-davinci-003' - openai_api = config.openai_live( - model=model, debug=True) - else: - # For now the model param is most useful in conjunction with --openai - model = model or config.HOST_DEFAULT - openai_api = config.openai_emulation( - host=host, port=port, model=model, debug=True) - - # Preserve the provided temperature setting - openai_api.params.temperature = llmtemp - asyncio.run(async_main(openai_api.params)) - - -if __name__ == '__main__': - # CLI entry point - # Also protects against multiple launching of the overall program - # when a child process imports this - # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import - main() diff --git a/demo/alpaca_simple_qa_discord.py b/demo/alpaca_simple_qa_discord.py index 6ecce4b..d7a7734 100644 --- a/demo/alpaca_simple_qa_discord.py +++ b/demo/alpaca_simple_qa_discord.py @@ -6,11 +6,7 @@ Note: This is a simple demo, which doesn't do any client-side job management, so for example if a request is sent, and a second comes in before it has completed, -only the latter will complete. - -You need access to an OpenAI-like service. Default assumption is that you -have a self-hosted framework such as llama-cpp-python or text-generation-webui -running. Say it's at my-llm-host:8000, you can do: +the LLM back end is relied on to cope. Prerequisites: python-dotenv discord.py @@ -44,7 +40,7 @@ from dotenv import load_dotenv from ogbujipt.config import openai_emulation -from ogbujipt.async_helper import schedule_callable, openai_api_surrogate +from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params from ogbujipt import oapi_first_choice_text from ogbujipt.prompting.basic import format from ogbujipt.prompting.model_style import ALPACA_DELIMITERS @@ -66,12 +62,12 @@ async def send_llm_msg(msg): # See demo/alpaca_multitask_fix_xml.py for some important warnings here llm_task = asyncio.create_task( - schedule_callable(openai_api_surrogate, prompt, **llm.params)) + schedule_callable(openai_api_surrogate, prompt, temperature=llmtemp, max_tokens=512, + **save_openai_api_params())) tasks = [llm_task] done, _ = await asyncio.wait( - tasks, return_when=asyncio.FIRST_COMPLETED - ) + tasks, return_when=asyncio.FIRST_COMPLETED) response = next(iter(done)).result() @@ -117,7 +113,7 @@ async def on_ready(): def main(): # A real app would probably use a discord.py cog w/ these as data members - global llm, llm_temp + global llm, llmtemp load_dotenv() # From .env file DISCORD_TOKEN = os.getenv('DISCORD_TOKEN') @@ -126,8 +122,7 @@ def main(): # Set up API connector & update temperature from environment llm = openai_emulation(host=LLM_HOST, port=LLM_PORT) - llm.params.llmtemp = os.getenv('LLM_TEMP') - llm.params.max_tokens = 512 + llmtemp = os.getenv('LLM_TEMP') # launch Discord client event loop client.run(DISCORD_TOKEN) diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py index 3785af2..c6a2601 100644 --- a/demo/chat_pdf_streamlit_ui.py +++ b/demo/chat_pdf_streamlit_ui.py @@ -14,10 +14,6 @@ Single-PDF support, for now, to keep the demo code simple. Can easily extend to e.g. work with multiple docs dropped in a directory -You need access to an OpenAI-like service. Default assumption is that you -have a self-hosted framework such as llama-cpp-python or text-generation-webui -running. Assume for the following it's at my-llm-host:8000 - Prerequisites. From OgbujiPT cloned dir:. ```sh diff --git a/demo/chat_web_selects.py b/demo/chat_web_selects.py index 8b511de..db1fba4 100644 --- a/demo/chat_web_selects.py +++ b/demo/chat_web_selects.py @@ -34,7 +34,7 @@ from ogbujipt import config from ogbujipt.prompting import format, ALPACA_INSTRUCT_DELIMITERS -from ogbujipt.async_helper import schedule_openai_call, openai_api_surrogate +from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params from ogbujipt import oapi_first_choice_text from ogbujipt.text_helper import text_splitter from ogbujipt.embedding_helper import qdrant_collection @@ -82,7 +82,7 @@ async def read_site(url, collection): print(f'{collection.count()} chunks added to collection') -async def async_main(sites, api_params): +async def async_main(sites): # Automatic download from HuggingFace # Seem to be reentrancy issues with HuggingFace; defer import from sentence_transformers import SentenceTransformer @@ -134,7 +134,7 @@ async def async_main(sites, api_params): indicator_task = asyncio.create_task(indicate_progress()) llm_task = asyncio.create_task( - schedule_openai_call(openai_api_surrogate, prompt, **model_params)) + schedule_callable(openai_api_surrogate, prompt, **model_params, **save_openai_api_params())) tasks = [indicator_task, llm_task] done, _ = await asyncio.wait( tasks, return_when=asyncio.FIRST_COMPLETED) @@ -164,17 +164,14 @@ async def async_main(sites, api_params): def main(host, port, openai_key, model, sites): # Use OpenAI API if specified, otherwise emulate with supplied host, etc. if openai_key: - assert not (host or port), 'Don\'t use --host or --port with --openai' model = model or 'text-davinci-003' - openai_api = config.openai_live( - model=model, debug=True) + config.openai_live(apikey=openai_key, model=model, debug=True) else: - # For now the model param is most useful in conjunction with --openai + # Generally not really useful except in conjunction with --openai model = model or config.HOST_DEFAULT - openai_api = config.openai_emulation( - host=host, port=port, model=model, debug=True) + config.openai_emulation(host=host, port=port, model=model, debug=True) - asyncio.run(async_main(sites, openai_api.params)) + asyncio.run(async_main(sites)) if __name__ == '__main__': diff --git a/demo/multiprocess.py b/demo/multiprocess.py new file mode 100644 index 0000000..2134769 --- /dev/null +++ b/demo/multiprocess.py @@ -0,0 +1,118 @@ +''' +Advanced demo showing quick chat with an LLM, but with 3 simultaneous requests, +and also a separate, progress indicator dislay while the LLM instances are generating. +Key is taking advantage of Python's asyncio, and also multiprocess, which requires some finesse, +to work even when the LLM framework in use doesn't suport asyncio. +Luckily `ogbujipt.async_helper` comes in handy. + +```sh +python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000 +``` + +Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai +''' +import asyncio + +# import openai + +import click + +from ogbujipt import oapi_first_choice_text +from ogbujipt import config +from ogbujipt.async_helper import ( + schedule_callable, + openai_api_surrogate, + console_progress_indicator, + save_openai_api_params) +from ogbujipt.prompting.basic import format +from ogbujipt.prompting.model_style import ALPACA_DELIMITERS + + +class llm_request: + ''' + Encapsulates each LLM service request via OpenAI API (even for self-hosted LLM) + ''' + tasks = {} + + def __init__(self, topic, llmtemp, **model_params): + ''' + topic - a particular topic about which we'll ask the LLM + model_params - mapping of custom parameters for model behavior, e.g.: + max_tokens: limit number of generated tokens (default 16) + top_p: AKA nucleus sampling; can increase generated text diversity + frequency_penalty: Favor more or less frequent tokens + presence_penalty: Prefer new, previously unused tokens + More info: https://platform.openai.com/docs/api-reference/completions + ''' + self.topic = topic + self.llmtemp = llmtemp + self.model_params = model_params + + def wrap(self): + prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS) + + # Pattern of passing in the callable iself, then the params—required for multiprocess execution + self.task = asyncio.create_task( + schedule_callable(openai_api_surrogate, prompt, temperature=self.llmtemp, + **self.model_params, **save_openai_api_params())) + llm_request.tasks[self.task] = self + return self.task + + +async def async_main(topics, llmtemp): + # Pro tip: When creating tasks with asyncio.create_task be mindful to not + # accidentally lose references to tasks, lest they get garbage collected, + # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11) + # is a better alternative, but we can't use them in this case because + # they wait for all tasks to complete whereas we're done once only + # the LLM generation task is complete + indicator_task = asyncio.create_task(console_progress_indicator()) + # Notice the pattern of passing in the callable iself, then the params + # You can't just do, say llm(prompt) because that will actually + # call the function & block on the LLM request + llm_requests = [llm_request(t, llmtemp, max_tokens=1024) for t in topics] + llm_tasks = [req.wrap() for req in llm_requests] + # Need to gather to make sure all LLm tasks are completed + gathered_llm_tasks = asyncio.gather(*llm_tasks) + done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED) + + # Completed task will from gather() of llm_tasks; results in original task arg order + results = zip(llm_requests, next(iter(done)).result()) + for req, resp in results: + print(f'Result re {req.topic}') + # resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info + print('\nFull response data from LLM:\n', resp) + # Just the response text + response_text = oapi_first_choice_text(resp) + print('\nResponse text from LLM:\n\n', response_text) + + +# Command line arguments defined in click decorators +@click.command() +@click.option('--host', default='http://127.0.0.1', help='OpenAI API host') +@click.option('--port', default='8000', help='OpenAI API port') +@click.option('--llmtemp', default='0.9', type=float, help='LLM temperature') +@click.option('--openai', is_flag=True, default=False, type=bool, + help='Use live OpenAI API. If you use this option, you must have ' + '"OPENAI_API_KEY" defined in your environmnt') +@click.option('--model', default='', type=str, + help='OpenAI model to use (see https://platform.openai.com/docs/models)') +def main(host, port, llmtemp, openai, model): + # Use OpenAI API if specified, otherwise emulate with supplied host, etc. + if openai: + model = model or 'text-davinci-003' + config.openai_live(model=model, debug=True) + else: + # Generally not really useful except in conjunction with --openai + model = model or config.HOST_DEFAULT + config.openai_emulation(host=host, port=port, model=model, debug=True) + + topics = ['wild animals', 'vehicles', 'space aliens'] + + asyncio.run(async_main(topics, llmtemp)) + + +if __name__ == '__main__': + # CLI entry point. Also protects against re-execution of main() after process fork + # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import + main() diff --git a/demo/alpaca_simple_fix_xml.py b/demo/simple_fix_xml.py similarity index 100% rename from demo/alpaca_simple_fix_xml.py rename to demo/simple_fix_xml.py diff --git a/pylib/async_helper.py b/pylib/async_helper.py index 922436d..7fd4ec3 100644 --- a/pylib/async_helper.py +++ b/pylib/async_helper.py @@ -6,13 +6,12 @@ Coroutines to make it a little easier to multitask LLM access using Python asyncio ''' +import sys import asyncio import concurrent.futures from functools import partial -import openai - async def schedule_callable(callable, *args, **kwargs): ''' @@ -41,41 +40,8 @@ async def schedule_callable(callable, *args, **kwargs): return response -async def schedule_openai_call(callable, *args, **kwargs): - ''' - Schedule long-running/blocking LLM request in a separate process, - wrapped to work well in an asyncio event loop - - Basically hides away a bunch of the multiprocessing webbing - - e.g. `llm_task = asyncio.create_task(schedule_callable(llm, prompt))` - - Can then use asyncio.wait(), asyncio.gather(), etc. with `llm_task` - - Args: - callable (callable): Callable to be scheduled - - Returns: - response: Response object - ''' - # Link up the current async event loop for multiprocess execution - loop = asyncio.get_running_loop() - executor = concurrent.futures.ProcessPoolExecutor() - # Need to partial execute to get in any kwargs for the target callable - if 'model' not in kwargs: - kwargs['model'] = '' - prepped_callable = partial( - callable, - api_base=openai.api_base, - api_key=openai.api_key, - **kwargs) - # Spawn a separate process for the LLM call - response = await loop.run_in_executor(executor, prepped_callable, *args) - return response - - # FIXME: Add all arguments for OpenAI API generation functions here -def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs): +def openai_api_surrogate(prompt, api_func=None, **kwargs): ''' Wrapper around OpenAI API generation functions. Needed for use in multiprocessing because it seems when the openai library gets @@ -88,10 +54,50 @@ def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs): api_func: API function to utilize Returns: - api_func (openai): OpenAI API object + api_func: Result of OpenAI API call ''' - # Reset API details, relevant when emulating OpenAI - openai.api_base = kwargs['api_base'] - openai.api_key = kwargs['api_key'] + import openai + + api_func = api_func or openai.Completion.create + + trimmed_kwargs = {} + for k in kwargs: + if k in OPENAI_GLOALS: + setattr(openai, k, kwargs[k]) + else: + trimmed_kwargs[k] = kwargs[k] # Send other, provided args to the generation function - return api_func(prompt=prompt, **kwargs) + return api_func(prompt=prompt, **trimmed_kwargs) + + +# Extracted from https://github.com/openai/openai-python/blob/main/openai/__init__.py +OPENAI_GLOALS = ['api_key', 'api_key_path', 'api_base', 'organization', 'api_type', 'api_version', + 'proxy', 'app_info', 'debug', 'log'] + + +def save_openai_api_params(): + ''' + openai package uses globals for a lot of its parameters, including the mandatory api_key. + In some circs, e.g. multiprocessing, these should be saved for re-set when the module is re-imported. + ''' + import openai + + params = {} + # model also carried as a user convenience + for k in OPENAI_GLOALS + ['model']: + if hasattr(openai, k): + params[k] = getattr(openai, k) + return params + + +async def console_progress_indicator(pause=0.5, file=sys.stderr): + ''' + Simple progress indicator for the console. Just prints dots. + + pause - seconds between each dot printed to console, default half a sec + + file - file for dots output, default STDERR + ''' + while True: + print('.', end='', flush=True, file=file) + await asyncio.sleep(pause) diff --git a/pylib/config.py b/pylib/config.py index 5e279ab..e4c7aa7 100644 --- a/pylib/config.py +++ b/pylib/config.py @@ -14,7 +14,7 @@ # Really just a bogus name for cases when OpenAPI is being emulated # OpenAI API requires the model be specified, but many compaitble APIs # have a model predetermined by the host -HOST_DEFAULT = 'HOST-DEFAULT' +HOST_DEFAULT_MODEL = HOST_DEFAULT = 'HOST-DEFAULT' class attr_dict(dict): @@ -25,18 +25,13 @@ class attr_dict(dict): __delattr__ = dict.__delitem__ -def openai_live( - rev='v1', - model='', - apikey=None, - debug=True - ): +def openai_live(apikey=None, debug=True, model=''): ''' Set up to use OpenAI proper. If you don't pass in an API key, the environment variable OPENAI_API_KEY will be checked Side note: a lot of OpenAI tutorials suggest that you embed your - OpenAI private key into the code, which is a horrible, terrible idea + OpenAI private key into the code, which is a horrible, no-good idea Extra reminder: If you set up your environment via .env file, make sure it's in .gitignore or equivalent so it never gets accidentally committed! @@ -49,27 +44,21 @@ def openai_live( Returns: openai_api (openai): Prepared OpenAI API ''' + import os import openai as openai_api # openai_api.api_version openai_api.debug = debug - openai_api.params = attr_dict( - rev=rev, - api_key=apikey, - model=model, - debug=debug) - + openai_api.api_key = apikey or os.getenv('OPENAI_API_KEY') + openai_api.model = model return openai_api def openai_emulation( host='http://127.0.0.1', port='8000', - rev='v1', - model=HOST_DEFAULT, apikey='BOGUS', - oaitype='open_ai', - debug=True): + debug=True, model=''): ''' Set up emulation, to use a alternative, OpenAI API compatible service Port 8000 for llama-cpp-python, Port 5001 for Oobabooga @@ -79,11 +68,7 @@ def openai_emulation( port (str, optional): Port to use at "host" - rev (str, optional): OpenAI revision to use - - apikey (str, optional): API key to use for authentication - - oaitype (str, optional): OpenAI type to use + apikey (str, optional): Unused standin for OpenAI API key debug (bool, optional): Debug flag @@ -92,17 +77,9 @@ def openai_emulation( ''' import openai as openai_api + rev = 'v1' openai_api.api_key = apikey - openai_api.api_type = oaitype openai_api.api_base = f'{host}:{port}/{rev}' openai_api.debug = debug - - openai_api.params = attr_dict( - api_key=apikey, - api_type=oaitype, - api_base=openai_api.api_base, - model=model, - debug=debug - ) - + openai_api.model = model return openai_api