From 1d35367339fa93730bb2ba6f23562d04adafe11c Mon Sep 17 00:00:00 2001
From: Uche Ogbuji <uche@ogbuji.net>
Date: Fri, 21 Jul 2023 21:11:19 -0600
Subject: [PATCH] [#19] Initial demo improvement moves

---
 demo/README.md                                |   2 +
 demo/alpaca_multitask_fix_xml.py              | 130 ------------------
 demo/alpaca_simple_qa_discord.py              |   4 -
 demo/chat_pdf_streamlit_ui.py                 |   4 -
 demo/multiprocess.py                          | 112 +++++++++++++++
 ...ca_simple_fix_xml.py => simple_fix_xml.py} |   0
 6 files changed, 114 insertions(+), 138 deletions(-)
 delete mode 100644 demo/alpaca_multitask_fix_xml.py
 create mode 100644 demo/multiprocess.py
 rename demo/{alpaca_simple_fix_xml.py => simple_fix_xml.py} (100%)
diff --git a/demo/README.md b/demo/README.md
index 2e9eb79..c6d45bb 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -1,3 +1,5 @@
+For all these demos you need access to an OpenAI-like service. Default assumption is that you have a self-hosted framework such as llama-cpp-python or text-generation-webui running
+
 # Simplest
 
 ## alpaca_simple_fix_xml.py
diff --git a/demo/alpaca_multitask_fix_xml.py b/demo/alpaca_multitask_fix_xml.py
deleted file mode 100644
index c033eeb..0000000
--- a/demo/alpaca_multitask_fix_xml.py
+++ /dev/null
@@ -1,130 +0,0 @@
-'''
-Advanced demo using an LLM to repair data (XML), like
-alpaca_simple_fix_xml.py
-but demonstrating asyncio by running a separate, progress indicator task
-in the background while the LLM is generating. Should work even
-if the LLM framework in use doesn't suport asyncio,
-thanks to ogbujipt.async_helper
-
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Say it's at my-llm-host:8000, you can do:
-
-python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000
-
-Also allows you to use the actual OpenAI service, by specifying --openai
-'''
-
-import asyncio
-
-import click
-
-from ogbujipt import oapi_first_choice_text
-from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
-from ogbujipt import config
-from ogbujipt.prompting.basic import format
-from ogbujipt.prompting.model_style import ALPACA_INSTRUCT_DELIMITERS
-
-DOTS_SPACING = 0.5  # Number of seconds between each dot printed to console
-
-
-# Could probably use something like tqdm.asyncio, if we wanted to be fancy
-async def indicate_progress(pause=DOTS_SPACING):
-    '''
-    Simple progress indicator for the console. Just prints dots.
-    '''
-    while True:
-        print('.', end='', flush=True)
-        await asyncio.sleep(pause)
-
-
-async def async_main(openai_params):
-    '''
-    Schedule one task to do a long-running/blocking LLM request, and another
-    to run a progress indicator in the background
-    '''
-    BAD_XML_CODE = '''\
-<earth>
-<country><b>Russia</country></b>
-<capital>Moscow</capital>
-</Earth>'''
-
-    prompt = format(
-        'Correct the following XML to make it well-formed',
-        contexts=BAD_XML_CODE,
-        delimiters=ALPACA_INSTRUCT_DELIMITERS)
-    print(prompt, '\n')
-
-    # Customize parameters for model behavior
-    # More info: https://platform.openai.com/docs/api-reference/completions
-    model_params = dict(
-        max_tokens=60,  # Limit number of generated tokens
-        top_p=1,  # AKA nucleus sampling; can increase generated text diversity
-        frequency_penalty=0,  # Favor more or less frequent tokens
-        presence_penalty=1,  # Prefer new, previously unused tokens
-        )
-    model_params.update(openai_params)
-
-    # Pro tip: When creating tasks with asyncio.create_task be mindful to not
-    # accidentally lose references to tasks, lest they get garbage collected,
-    # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11)
-    # is a better alternative, but we can't use them in this case because
-    # they wait for all tasks to complete whereas we're done once only
-    # the LLM generation task is complete
-    indicator_task = asyncio.create_task(indicate_progress())
-    # Notice the pattern of passing in the callable iself, then the params
-    # You can't just do, say llm(prompt) because that will actually
-    # call the function & block on the LLM request
-    llm_task = asyncio.create_task(
-        schedule_callable(openai_api_surrogate, prompt, **model_params))
-    tasks = [indicator_task, llm_task]
-    done, _ = await asyncio.wait(
-        tasks, return_when=asyncio.FIRST_COMPLETED
-        )
-
-    # Instance of openai.openai_object.OpenAIObject, with lots of useful info
-    retval = next(iter(done)).result()
-    print(type(retval))
-    # Response is a json-like object; extract the text
-    print('\nFull response data from LLM:\n', retval)
-
-    # response is a json-like object; 
-    # just get back the text of the response
-    response_text = oapi_first_choice_text(retval)
-    print('\nResponse text from LLM:\n\n', response_text)
-
-
-# Command line arguments defined in click decorators
-@click.command()
-@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
-@click.option('--port', default='8000', help='OpenAI API port')
-@click.option('--llmtemp', default='0.1', type=float, help='LLM temperature')
-@click.option('--openai', is_flag=True, default=False, type=bool,
-              help='Use live OpenAI API. If you use this option, you must have '
-              '"OPENAI_API_KEY" defined in your environmnt')
-@click.option('--model', default='', type=str, 
-              help='OpenAI model to use (see https://platform.openai.com/docs/models)')
-def main(host, port, llmtemp, openai, model):
-    # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
-    if openai:
-        assert not (host or port), 'Don\'t use --host or --port with --openai'
-        model = model or 'text-davinci-003'
-        openai_api = config.openai_live(
-            model=model, debug=True)
-    else:
-        # For now the model param is most useful in conjunction with --openai
-        model = model or config.HOST_DEFAULT
-        openai_api = config.openai_emulation(
-            host=host, port=port, model=model, debug=True)
-
-    # Preserve the provided temperature setting
-    openai_api.params.temperature = llmtemp
-    asyncio.run(async_main(openai_api.params))
-
-
-if __name__ == '__main__':
-    # CLI entry point
-    # Also protects against multiple launching of the overall program
-    # when a child process imports this
-    # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import
-    main()
diff --git a/demo/alpaca_simple_qa_discord.py b/demo/alpaca_simple_qa_discord.py
index 6ecce4b..b5b4a5d 100644
--- a/demo/alpaca_simple_qa_discord.py
+++ b/demo/alpaca_simple_qa_discord.py
@@ -8,10 +8,6 @@
 so for example if a request is sent, and a second comes in before it has completed,
 only the latter will complete.
 
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Say it's at my-llm-host:8000, you can do:
-
 Prerequisites: python-dotenv discord.py
 
 You also need to make sure Python has root SSL certificates installed
diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py
index 3785af2..c6a2601 100644
--- a/demo/chat_pdf_streamlit_ui.py
+++ b/demo/chat_pdf_streamlit_ui.py
@@ -14,10 +14,6 @@
 Single-PDF support, for now, to keep the demo code simple. Can easily extend to
 e.g. work with multiple docs dropped in a directory
 
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Assume for the following it's at my-llm-host:8000
-
 Prerequisites. From OgbujiPT cloned dir:.
 
 ```sh
diff --git a/demo/multiprocess.py b/demo/multiprocess.py
new file mode 100644
index 0000000..4bcc168
--- /dev/null
+++ b/demo/multiprocess.py
@@ -0,0 +1,112 @@
+'''
+Advanced demo showing quick chat with an LLM, but with 3 simultaneous requests,
+and also a separate, progress indicator dislay while the LLM instances are generating.
+Key is taking advantage of Python's asyncio, and also multiprocess, which requires some finesse,
+to work even when the LLM framework in use doesn't suport asyncio.
+Luckily `ogbujipt.async_helper` comes in handy.
+
+```sh
+python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000
+```
+
+Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai
+'''
+import sys
+import asyncio
+
+import openai
+
+from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
+from ogbujipt import config
+from ogbujipt.prompting.basic import format
+from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
+
+model_params = {}
+
+
+async def indicate_progress(pause=0.5):
+    '''
+    Simple progress indicator for the console. Just prints dots.
+    '''
+    while True:
+        print('.', end='', flush=True)
+        await asyncio.sleep(pause)
+
+
+openai_globals = ['api_base', 'api_key', 'api_key_path']
+
+
+def cache_openai_api_params():
+    params = {}
+    for k in openai_globals:
+        if hasattr(openai, k):
+            params[k] = getattr(openai, k)
+    return params
+
+
+def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
+    # Reset API details, relevant when emulating OpenAI
+    trimmed_kwargs = {}
+    for k in kwargs:
+        if k in openai_globals:
+            setattr(openai, k, kwargs[k])
+        else:
+            trimmed_kwargs[k] = kwargs[k]
+    # Send other, provided args to the generation function
+    return api_func(prompt=prompt, **trimmed_kwargs)
+
+
+class llm_request:
+    tasks = {}
+
+    def __init__(self, topic):
+        self.topic = topic
+
+    def wrap(self):
+        prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS)
+
+        self.task = asyncio.create_task(
+            schedule_callable(openai_api_surrogate, prompt, model='text-ada-001', **cache_openai_api_params()))
+        llm_request.tasks[self.task] = self
+        return self.task
+
+
+async def async_main():
+    topics = ['wild animals', 'vehicles', 'space aliens']
+
+    # model_params = dict(
+    #     max_tokens=60,  # Limit number of generated tokens
+    #     top_p=1,  # AKA nucleus sampling; can increase generated text diversity
+    #     frequency_penalty=0,  # Favor more or less frequent tokens
+    #     presence_penalty=1,  # Prefer new, previously unused tokens
+    #     )
+    indicator_task = asyncio.create_task(indicate_progress())
+    # Notice the pattern of passing in the callable iself, then the params
+    # You can't just do, say llm(prompt) because that will actually
+    # call the function & block on the LLM request
+    llm_requests = [llm_request(t) for t in topics]
+    llm_tasks = [req.wrap() for req in llm_requests]
+    # Need to gather to make sure all LLm tasks are completed
+    gathered_llm_tasks = asyncio.gather(*llm_tasks)
+    done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED)
+
+    # Only completed task will be from the gather() of llm_tasks, and it has results in original order
+    results = zip(llm_requests, next(iter(done)).result())
+    for req, resp in results:
+        print(f'Result re {req.topic}')
+        print(resp)
+
+
+def main():
+    openai.model = 'text-ada-001'
+    # Just hardcode these params
+    model_params['llmtemp'], model_params['model'] = 1, 'text-ada-001'
+    openai.api_key_path = sys.argv[1]
+    # openai_api = config.openai_live(model=model, debug=True)
+    # model_params['api_key_path'] = openai.api_key_path
+    asyncio.run(async_main())
+
+
+if __name__ == '__main__':
+    # Re-entry control. Don't want main() executed on re-import
+    main()
diff --git a/demo/alpaca_simple_fix_xml.py b/demo/simple_fix_xml.py
similarity index 100%
rename from demo/alpaca_simple_fix_xml.py
rename to demo/simple_fix_xml.py