From 1d35367339fa93730bb2ba6f23562d04adafe11c Mon Sep 17 00:00:00 2001
From: Uche Ogbuji <uche@ogbuji.net>
Date: Fri, 21 Jul 2023 21:11:19 -0600
Subject: [PATCH 1/4] [#19] Initial demo improvement moves

---
 demo/README.md                                |   2 +
 demo/alpaca_multitask_fix_xml.py              | 130 ------------------
 demo/alpaca_simple_qa_discord.py              |   4 -
 demo/chat_pdf_streamlit_ui.py                 |   4 -
 demo/multiprocess.py                          | 112 +++++++++++++++
 ...ca_simple_fix_xml.py => simple_fix_xml.py} |   0
 6 files changed, 114 insertions(+), 138 deletions(-)
 delete mode 100644 demo/alpaca_multitask_fix_xml.py
 create mode 100644 demo/multiprocess.py
 rename demo/{alpaca_simple_fix_xml.py => simple_fix_xml.py} (100%)
diff --git a/demo/README.md b/demo/README.md
index 2e9eb79..c6d45bb 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -1,3 +1,5 @@
+For all these demos you need access to an OpenAI-like service. Default assumption is that you have a self-hosted framework such as llama-cpp-python or text-generation-webui running
+
 # Simplest
 
 ## alpaca_simple_fix_xml.py
diff --git a/demo/alpaca_multitask_fix_xml.py b/demo/alpaca_multitask_fix_xml.py
deleted file mode 100644
index c033eeb..0000000
--- a/demo/alpaca_multitask_fix_xml.py
+++ /dev/null
@@ -1,130 +0,0 @@
-'''
-Advanced demo using an LLM to repair data (XML), like
-alpaca_simple_fix_xml.py
-but demonstrating asyncio by running a separate, progress indicator task
-in the background while the LLM is generating. Should work even
-if the LLM framework in use doesn't suport asyncio,
-thanks to ogbujipt.async_helper
-
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Say it's at my-llm-host:8000, you can do:
-
-python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000
-
-Also allows you to use the actual OpenAI service, by specifying --openai
-'''
-
-import asyncio
-
-import click
-
-from ogbujipt import oapi_first_choice_text
-from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
-from ogbujipt import config
-from ogbujipt.prompting.basic import format
-from ogbujipt.prompting.model_style import ALPACA_INSTRUCT_DELIMITERS
-
-DOTS_SPACING = 0.5  # Number of seconds between each dot printed to console
-
-
-# Could probably use something like tqdm.asyncio, if we wanted to be fancy
-async def indicate_progress(pause=DOTS_SPACING):
-    '''
-    Simple progress indicator for the console. Just prints dots.
-    '''
-    while True:
-        print('.', end='', flush=True)
-        await asyncio.sleep(pause)
-
-
-async def async_main(openai_params):
-    '''
-    Schedule one task to do a long-running/blocking LLM request, and another
-    to run a progress indicator in the background
-    '''
-    BAD_XML_CODE = '''\
-<earth>
-<country><b>Russia</country></b>
-<capital>Moscow</capital>
-</Earth>'''
-
-    prompt = format(
-        'Correct the following XML to make it well-formed',
-        contexts=BAD_XML_CODE,
-        delimiters=ALPACA_INSTRUCT_DELIMITERS)
-    print(prompt, '\n')
-
-    # Customize parameters for model behavior
-    # More info: https://platform.openai.com/docs/api-reference/completions
-    model_params = dict(
-        max_tokens=60,  # Limit number of generated tokens
-        top_p=1,  # AKA nucleus sampling; can increase generated text diversity
-        frequency_penalty=0,  # Favor more or less frequent tokens
-        presence_penalty=1,  # Prefer new, previously unused tokens
-        )
-    model_params.update(openai_params)
-
-    # Pro tip: When creating tasks with asyncio.create_task be mindful to not
-    # accidentally lose references to tasks, lest they get garbage collected,
-    # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11)
-    # is a better alternative, but we can't use them in this case because
-    # they wait for all tasks to complete whereas we're done once only
-    # the LLM generation task is complete
-    indicator_task = asyncio.create_task(indicate_progress())
-    # Notice the pattern of passing in the callable iself, then the params
-    # You can't just do, say llm(prompt) because that will actually
-    # call the function & block on the LLM request
-    llm_task = asyncio.create_task(
-        schedule_callable(openai_api_surrogate, prompt, **model_params))
-    tasks = [indicator_task, llm_task]
-    done, _ = await asyncio.wait(
-        tasks, return_when=asyncio.FIRST_COMPLETED
-        )
-
-    # Instance of openai.openai_object.OpenAIObject, with lots of useful info
-    retval = next(iter(done)).result()
-    print(type(retval))
-    # Response is a json-like object; extract the text
-    print('\nFull response data from LLM:\n', retval)
-
-    # response is a json-like object; 
-    # just get back the text of the response
-    response_text = oapi_first_choice_text(retval)
-    print('\nResponse text from LLM:\n\n', response_text)
-
-
-# Command line arguments defined in click decorators
-@click.command()
-@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
-@click.option('--port', default='8000', help='OpenAI API port')
-@click.option('--llmtemp', default='0.1', type=float, help='LLM temperature')
-@click.option('--openai', is_flag=True, default=False, type=bool,
-              help='Use live OpenAI API. If you use this option, you must have '
-              '"OPENAI_API_KEY" defined in your environmnt')
-@click.option('--model', default='', type=str, 
-              help='OpenAI model to use (see https://platform.openai.com/docs/models)')
-def main(host, port, llmtemp, openai, model):
-    # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
-    if openai:
-        assert not (host or port), 'Don\'t use --host or --port with --openai'
-        model = model or 'text-davinci-003'
-        openai_api = config.openai_live(
-            model=model, debug=True)
-    else:
-        # For now the model param is most useful in conjunction with --openai
-        model = model or config.HOST_DEFAULT
-        openai_api = config.openai_emulation(
-            host=host, port=port, model=model, debug=True)
-
-    # Preserve the provided temperature setting
-    openai_api.params.temperature = llmtemp
-    asyncio.run(async_main(openai_api.params))
-
-
-if __name__ == '__main__':
-    # CLI entry point
-    # Also protects against multiple launching of the overall program
-    # when a child process imports this
-    # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import
-    main()
diff --git a/demo/alpaca_simple_qa_discord.py b/demo/alpaca_simple_qa_discord.py
index 6ecce4b..b5b4a5d 100644
--- a/demo/alpaca_simple_qa_discord.py
+++ b/demo/alpaca_simple_qa_discord.py
@@ -8,10 +8,6 @@
 so for example if a request is sent, and a second comes in before it has completed,
 only the latter will complete.
 
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Say it's at my-llm-host:8000, you can do:
-
 Prerequisites: python-dotenv discord.py
 
 You also need to make sure Python has root SSL certificates installed
diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py
index 3785af2..c6a2601 100644
--- a/demo/chat_pdf_streamlit_ui.py
+++ b/demo/chat_pdf_streamlit_ui.py
@@ -14,10 +14,6 @@
 Single-PDF support, for now, to keep the demo code simple. Can easily extend to
 e.g. work with multiple docs dropped in a directory
 
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Assume for the following it's at my-llm-host:8000
-
 Prerequisites. From OgbujiPT cloned dir:.
 
 ```sh
diff --git a/demo/multiprocess.py b/demo/multiprocess.py
new file mode 100644
index 0000000..4bcc168
--- /dev/null
+++ b/demo/multiprocess.py
@@ -0,0 +1,112 @@
+'''
+Advanced demo showing quick chat with an LLM, but with 3 simultaneous requests,
+and also a separate, progress indicator dislay while the LLM instances are generating.
+Key is taking advantage of Python's asyncio, and also multiprocess, which requires some finesse,
+to work even when the LLM framework in use doesn't suport asyncio.
+Luckily `ogbujipt.async_helper` comes in handy.
+
+```sh
+python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000
+```
+
+Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai
+'''
+import sys
+import asyncio
+
+import openai
+
+from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
+from ogbujipt import config
+from ogbujipt.prompting.basic import format
+from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
+
+model_params = {}
+
+
+async def indicate_progress(pause=0.5):
+    '''
+    Simple progress indicator for the console. Just prints dots.
+    '''
+    while True:
+        print('.', end='', flush=True)
+        await asyncio.sleep(pause)
+
+
+openai_globals = ['api_base', 'api_key', 'api_key_path']
+
+
+def cache_openai_api_params():
+    params = {}
+    for k in openai_globals:
+        if hasattr(openai, k):
+            params[k] = getattr(openai, k)
+    return params
+
+
+def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
+    # Reset API details, relevant when emulating OpenAI
+    trimmed_kwargs = {}
+    for k in kwargs:
+        if k in openai_globals:
+            setattr(openai, k, kwargs[k])
+        else:
+            trimmed_kwargs[k] = kwargs[k]
+    # Send other, provided args to the generation function
+    return api_func(prompt=prompt, **trimmed_kwargs)
+
+
+class llm_request:
+    tasks = {}
+
+    def __init__(self, topic):
+        self.topic = topic
+
+    def wrap(self):
+        prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS)
+
+        self.task = asyncio.create_task(
+            schedule_callable(openai_api_surrogate, prompt, model='text-ada-001', **cache_openai_api_params()))
+        llm_request.tasks[self.task] = self
+        return self.task
+
+
+async def async_main():
+    topics = ['wild animals', 'vehicles', 'space aliens']
+
+    # model_params = dict(
+    #     max_tokens=60,  # Limit number of generated tokens
+    #     top_p=1,  # AKA nucleus sampling; can increase generated text diversity
+    #     frequency_penalty=0,  # Favor more or less frequent tokens
+    #     presence_penalty=1,  # Prefer new, previously unused tokens
+    #     )
+    indicator_task = asyncio.create_task(indicate_progress())
+    # Notice the pattern of passing in the callable iself, then the params
+    # You can't just do, say llm(prompt) because that will actually
+    # call the function & block on the LLM request
+    llm_requests = [llm_request(t) for t in topics]
+    llm_tasks = [req.wrap() for req in llm_requests]
+    # Need to gather to make sure all LLm tasks are completed
+    gathered_llm_tasks = asyncio.gather(*llm_tasks)
+    done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED)
+
+    # Only completed task will be from the gather() of llm_tasks, and it has results in original order
+    results = zip(llm_requests, next(iter(done)).result())
+    for req, resp in results:
+        print(f'Result re {req.topic}')
+        print(resp)
+
+
+def main():
+    openai.model = 'text-ada-001'
+    # Just hardcode these params
+    model_params['llmtemp'], model_params['model'] = 1, 'text-ada-001'
+    openai.api_key_path = sys.argv[1]
+    # openai_api = config.openai_live(model=model, debug=True)
+    # model_params['api_key_path'] = openai.api_key_path
+    asyncio.run(async_main())
+
+
+if __name__ == '__main__':
+    # Re-entry control. Don't want main() executed on re-import
+    main()
diff --git a/demo/alpaca_simple_fix_xml.py b/demo/simple_fix_xml.py
similarity index 100%
rename from demo/alpaca_simple_fix_xml.py
rename to demo/simple_fix_xml.py

From aedffeb135d88a98bdbf1f8291851e761022d959 Mon Sep 17 00:00:00 2001
From: Uche Ogbuji <uche@ogbuji.net>
Date: Sat, 22 Jul 2023 07:11:32 -0600
Subject: [PATCH 2/4] [#19] Replace demo/alpaca_multitask_fix_xml.py with
 demo/multiprocess.py. Improvements to async_helper.py and config.py

---
 demo/multiprocess.py  | 126 ++++++++++++++++++++++--------------------
 pylib/async_helper.py |  88 +++++++++++++++--------------
 pylib/config.py       |  43 ++++----------
 3 files changed, 123 insertions(+), 134 deletions(-)

diff --git a/demo/multiprocess.py b/demo/multiprocess.py
index 4bcc168..2134769 100644
--- a/demo/multiprocess.py
+++ b/demo/multiprocess.py
@@ -11,102 +11,108 @@
 
 Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai
 '''
-import sys
 import asyncio
 
-import openai
+# import openai
 
-from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
+import click
+
+from ogbujipt import oapi_first_choice_text
 from ogbujipt import config
+from ogbujipt.async_helper import (
+    schedule_callable,
+    openai_api_surrogate,
+    console_progress_indicator,
+    save_openai_api_params)
 from ogbujipt.prompting.basic import format
 from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
 
-model_params = {}
-
 
-async def indicate_progress(pause=0.5):
+class llm_request:
     '''
-    Simple progress indicator for the console. Just prints dots.
+    Encapsulates each LLM service request via OpenAI API (even for self-hosted LLM)
     '''
-    while True:
-        print('.', end='', flush=True)
-        await asyncio.sleep(pause)
-
-
-openai_globals = ['api_base', 'api_key', 'api_key_path']
-
-
-def cache_openai_api_params():
-    params = {}
-    for k in openai_globals:
-        if hasattr(openai, k):
-            params[k] = getattr(openai, k)
-    return params
-
-
-def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
-    # Reset API details, relevant when emulating OpenAI
-    trimmed_kwargs = {}
-    for k in kwargs:
-        if k in openai_globals:
-            setattr(openai, k, kwargs[k])
-        else:
-            trimmed_kwargs[k] = kwargs[k]
-    # Send other, provided args to the generation function
-    return api_func(prompt=prompt, **trimmed_kwargs)
-
-
-class llm_request:
     tasks = {}
 
-    def __init__(self, topic):
+    def __init__(self, topic, llmtemp, **model_params):
+        '''
+        topic - a particular topic about which we'll ask the LLM
+        model_params - mapping of custom parameters for model behavior, e.g.:
+            max_tokens: limit number of generated tokens (default 16)
+            top_p: AKA nucleus sampling; can increase generated text diversity
+            frequency_penalty: Favor more or less frequent tokens
+            presence_penalty: Prefer new, previously unused tokens
+            More info: https://platform.openai.com/docs/api-reference/completions
+        '''
         self.topic = topic
+        self.llmtemp = llmtemp
+        self.model_params = model_params
 
     def wrap(self):
         prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS)
 
+        # Pattern of passing in the callable iself, then the params—required for multiprocess execution
         self.task = asyncio.create_task(
-            schedule_callable(openai_api_surrogate, prompt, model='text-ada-001', **cache_openai_api_params()))
+            schedule_callable(openai_api_surrogate, prompt, temperature=self.llmtemp,
+                              **self.model_params, **save_openai_api_params()))
         llm_request.tasks[self.task] = self
         return self.task
 
 
-async def async_main():
-    topics = ['wild animals', 'vehicles', 'space aliens']
-
-    # model_params = dict(
-    #     max_tokens=60,  # Limit number of generated tokens
-    #     top_p=1,  # AKA nucleus sampling; can increase generated text diversity
-    #     frequency_penalty=0,  # Favor more or less frequent tokens
-    #     presence_penalty=1,  # Prefer new, previously unused tokens
-    #     )
-    indicator_task = asyncio.create_task(indicate_progress())
+async def async_main(topics, llmtemp):
+    # Pro tip: When creating tasks with asyncio.create_task be mindful to not
+    # accidentally lose references to tasks, lest they get garbage collected,
+    # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11)
+    # is a better alternative, but we can't use them in this case because
+    # they wait for all tasks to complete whereas we're done once only
+    # the LLM generation task is complete
+    indicator_task = asyncio.create_task(console_progress_indicator())
     # Notice the pattern of passing in the callable iself, then the params
     # You can't just do, say llm(prompt) because that will actually
     # call the function & block on the LLM request
-    llm_requests = [llm_request(t) for t in topics]
+    llm_requests = [llm_request(t, llmtemp, max_tokens=1024) for t in topics]
     llm_tasks = [req.wrap() for req in llm_requests]
     # Need to gather to make sure all LLm tasks are completed
     gathered_llm_tasks = asyncio.gather(*llm_tasks)
     done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED)
 
-    # Only completed task will be from the gather() of llm_tasks, and it has results in original order
+    # Completed task will from gather() of llm_tasks; results in original task arg order
     results = zip(llm_requests, next(iter(done)).result())
     for req, resp in results:
         print(f'Result re {req.topic}')
-        print(resp)
+        # resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info
+        print('\nFull response data from LLM:\n', resp)
+        # Just the response text
+        response_text = oapi_first_choice_text(resp)
+        print('\nResponse text from LLM:\n\n', response_text)
+
+
+# Command line arguments defined in click decorators
+@click.command()
+@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
+@click.option('--port', default='8000', help='OpenAI API port')
+@click.option('--llmtemp', default='0.9', type=float, help='LLM temperature')
+@click.option('--openai', is_flag=True, default=False, type=bool,
+              help='Use live OpenAI API. If you use this option, you must have '
+              '"OPENAI_API_KEY" defined in your environmnt')
+@click.option('--model', default='', type=str, 
+              help='OpenAI model to use (see https://platform.openai.com/docs/models)')
+def main(host, port, llmtemp, openai, model):
+    # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
+    if openai:
+        model = model or 'text-davinci-003'
+        config.openai_live(model=model, debug=True)
+    else:
+        # Generally not really useful except in conjunction with --openai
+        model = model or config.HOST_DEFAULT
+        config.openai_emulation(host=host, port=port, model=model, debug=True)
 
+    topics = ['wild animals', 'vehicles', 'space aliens']
 
-def main():
-    openai.model = 'text-ada-001'
-    # Just hardcode these params
-    model_params['llmtemp'], model_params['model'] = 1, 'text-ada-001'
-    openai.api_key_path = sys.argv[1]
-    # openai_api = config.openai_live(model=model, debug=True)
-    # model_params['api_key_path'] = openai.api_key_path
-    asyncio.run(async_main())
+    asyncio.run(async_main(topics, llmtemp))
 
 
 if __name__ == '__main__':
-    # Re-entry control. Don't want main() executed on re-import
+    # CLI entry point. Also protects against re-execution of main() after process fork
+    # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import
     main()
diff --git a/pylib/async_helper.py b/pylib/async_helper.py
index 922436d..8c133f7 100644
--- a/pylib/async_helper.py
+++ b/pylib/async_helper.py
@@ -6,13 +6,12 @@
 Coroutines to make it a little easier to multitask LLM access
 using Python asyncio
 '''
+import sys
 
 import asyncio
 import concurrent.futures
 from functools import partial
 
-import openai
-
 
 async def schedule_callable(callable, *args, **kwargs):
     '''
@@ -41,41 +40,8 @@ async def schedule_callable(callable, *args, **kwargs):
     return response
 
 
-async def schedule_openai_call(callable, *args, **kwargs):
-    '''
-    Schedule long-running/blocking LLM request in a separate process,
-    wrapped to work well in an asyncio event loop
-
-    Basically hides away a bunch of the multiprocessing webbing
-
-    e.g. `llm_task = asyncio.create_task(schedule_callable(llm, prompt))`
-
-    Can then use asyncio.wait(), asyncio.gather(), etc. with `llm_task`
-
-    Args:
-        callable (callable): Callable to be scheduled
-
-    Returns:
-        response: Response object
-    '''
-    # Link up the current async event loop for multiprocess execution
-    loop = asyncio.get_running_loop()
-    executor = concurrent.futures.ProcessPoolExecutor()
-    # Need to partial execute to get in any kwargs for the target callable
-    if 'model' not in kwargs:
-        kwargs['model'] = ''
-    prepped_callable = partial(
-        callable,
-        api_base=openai.api_base,
-        api_key=openai.api_key,
-        **kwargs)
-    # Spawn a separate process for the LLM call
-    response = await loop.run_in_executor(executor, prepped_callable, *args)
-    return response
-
-
 # FIXME: Add all arguments for OpenAI API generation functions here
-def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
+def openai_api_surrogate(prompt, api_func=None, **kwargs):
     '''
     Wrapper around OpenAI API generation functions. Needed for use
     in multiprocessing because it seems when the openai library gets
@@ -88,10 +54,50 @@ def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
         api_func: API function to utilize
 
     Returns:
-        api_func (openai): OpenAI API object
+        api_func: Result of OpenAI API call
     '''
-    # Reset API details, relevant when emulating OpenAI
-    openai.api_base = kwargs['api_base']
-    openai.api_key = kwargs['api_key']
+    import openai
+
+    api_func = api_func or openai.Completion.create
+
+    trimmed_kwargs = {}
+    for k in kwargs:
+        if k in OPENAI_GLOALS:
+            setattr(openai, k, kwargs[k])
+        else:
+            trimmed_kwargs[k] = kwargs[k]
     # Send other, provided args to the generation function
-    return api_func(prompt=prompt, **kwargs)
+    return api_func(prompt=prompt, **trimmed_kwargs)
+
+
+# Extracted from https://github.com/openai/openai-python/blob/main/openai/__init__.py
+OPENAI_GLOALS = ['api_key', 'api_key_path', 'api_base', 'organization', 'api_type', 'api_version',
+                 'proxy', 'app_info', 'debug', 'log']
+
+
+def save_openai_api_params():
+    '''
+    openai package uses globals for a lot of its parameters, including the mandatory api_key.
+    In some circumstances, such as when multiprocessing, these should be saved in order to be re-set when the module is re-imported.
+    '''
+    import openai
+
+    params = {}
+    # model also carried as a user convenience
+    for k in OPENAI_GLOALS + ['model']:
+        if hasattr(openai, k):
+            params[k] = getattr(openai, k)
+    return params
+
+
+async def console_progress_indicator(pause=0.5, file=sys.stderr):
+    '''
+    Simple progress indicator for the console. Just prints dots.
+
+    pause - seconds between each dot printed to console, default half a sec
+
+    file - file for dots output, default STDERR
+    '''
+    while True:
+        print('.', end='', flush=True, file=file)
+        await asyncio.sleep(pause)
diff --git a/pylib/config.py b/pylib/config.py
index 5e279ab..e4c7aa7 100644
--- a/pylib/config.py
+++ b/pylib/config.py
@@ -14,7 +14,7 @@
 # Really just a bogus name for cases when OpenAPI is being emulated
 # OpenAI API requires the model be specified, but many compaitble APIs
 # have a model predetermined by the host
-HOST_DEFAULT = 'HOST-DEFAULT'
+HOST_DEFAULT_MODEL = HOST_DEFAULT = 'HOST-DEFAULT'
 
 
 class attr_dict(dict):
@@ -25,18 +25,13 @@ class attr_dict(dict):
     __delattr__ = dict.__delitem__
 
 
-def openai_live(
-        rev='v1',
-        model='',
-        apikey=None,
-        debug=True
-        ):
+def openai_live(apikey=None, debug=True, model=''):
     '''
     Set up to use OpenAI proper. If you don't pass in an API key, the
     environment variable OPENAI_API_KEY will be checked
 
     Side note: a lot of OpenAI tutorials suggest that you embed your
-    OpenAI private key into the code, which is a horrible, terrible idea
+    OpenAI private key into the code, which is a horrible, no-good idea
 
     Extra reminder: If you set up your environment via .env file, make sure
     it's in .gitignore or equivalent so it never gets accidentally committed!
@@ -49,27 +44,21 @@ def openai_live(
     Returns:
         openai_api (openai): Prepared OpenAI API
     '''
+    import os
     import openai as openai_api
 
     # openai_api.api_version
     openai_api.debug = debug
-    openai_api.params = attr_dict(
-        rev=rev,
-        api_key=apikey,
-        model=model,
-        debug=debug)
-
+    openai_api.api_key = apikey or os.getenv('OPENAI_API_KEY')
+    openai_api.model = model
     return openai_api
 
 
 def openai_emulation(
         host='http://127.0.0.1',
         port='8000',
-        rev='v1',
-        model=HOST_DEFAULT,
         apikey='BOGUS',
-        oaitype='open_ai',
-        debug=True):
+        debug=True, model=''):
     '''
     Set up emulation, to use a alternative, OpenAI API compatible service
     Port 8000 for llama-cpp-python, Port 5001 for Oobabooga
@@ -79,11 +68,7 @@ def openai_emulation(
 
         port (str, optional): Port to use at "host"
 
-        rev (str, optional): OpenAI revision to use
-
-        apikey (str, optional): API key to use for authentication
-
-        oaitype (str, optional): OpenAI type to use
+        apikey (str, optional): Unused standin for OpenAI API key
 
         debug (bool, optional): Debug flag
 
@@ -92,17 +77,9 @@ def openai_emulation(
     '''
     import openai as openai_api
 
+    rev = 'v1'
     openai_api.api_key = apikey
-    openai_api.api_type = oaitype
     openai_api.api_base = f'{host}:{port}/{rev}'
     openai_api.debug = debug
-
-    openai_api.params = attr_dict(
-        api_key=apikey,
-        api_type=oaitype,
-        api_base=openai_api.api_base,
-        model=model,
-        debug=debug
-        )
-
+    openai_api.model = model
     return openai_api

From 80175a90f7d4f50a9a5a8250aa9a3bb517fc5da0 Mon Sep 17 00:00:00 2001
From: Uche Ogbuji <uche@ogbuji.net>
Date: Sat, 22 Jul 2023 07:16:49 -0600
Subject: [PATCH 3/4] [#19] Lint fix

---
 pylib/async_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylib/async_helper.py b/pylib/async_helper.py
index 8c133f7..7fd4ec3 100644
--- a/pylib/async_helper.py
+++ b/pylib/async_helper.py
@@ -78,7 +78,7 @@ def openai_api_surrogate(prompt, api_func=None, **kwargs):
 def save_openai_api_params():
     '''
     openai package uses globals for a lot of its parameters, including the mandatory api_key.
-    In some circumstances, such as when multiprocessing, these should be saved in order to be re-set when the module is re-imported.
+    In some circs, e.g. multiprocessing, these should be saved for re-set when the module is re-imported.
     '''
     import openai
 

From fb1501c6a6f490d857fb71339cd35c8eda150a77 Mon Sep 17 00:00:00 2001
From: Uche Ogbuji <uche@ogbuji.net>
Date: Sat, 22 Jul 2023 07:51:45 -0600
Subject: [PATCH 4/4] [#19] Fix demos

---
 demo/README.md                   | 15 ++++++---------
 demo/alpaca_simple_qa_discord.py | 15 +++++++--------
 demo/chat_web_selects.py         | 17 +++++++----------
 3 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/demo/README.md b/demo/README.md
index c6d45bb..7a47210 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -2,20 +2,17 @@ For all these demos you need access to an OpenAI-like service. Default assumptio
 
 # Simplest
 
-## alpaca_simple_fix_xml.py
+## simple_fix_xml.py
 
-Quick demo, sending an Alpaca-compatible LLM some bad XML & asking it to make corrections.
+Quick demo, sending a Llama or Alpaca-compatible LLM some bad XML & asking it to make corrections.
 
 # Intermediate
 
-## alpaca_multitask_fix_xml.py
+## multiprocess.py
 
-Intermediate demo using an LLM to repair data (XML), like
-alpaca_simple_fix_xml.py
-but running a separate, progress indicator task in the background
-while the LLm works, using asyncio. This should work even
-if the LLM framework we're using doesn't suport asyncio,
-thanks to ogbujipt.async_helper 
+Intermediate demo asking an LLM multiple simultaneous riddles on various topics,
+running a separate, progress indicator task in the background, using asyncio.
+Works even if the LLM framework suport asyncio, thanks to ogbujipt.async_helper 
 
 # Advanced
 
diff --git a/demo/alpaca_simple_qa_discord.py b/demo/alpaca_simple_qa_discord.py
index b5b4a5d..d7a7734 100644
--- a/demo/alpaca_simple_qa_discord.py
+++ b/demo/alpaca_simple_qa_discord.py
@@ -6,7 +6,7 @@
 
 Note: This is a simple demo, which doesn't do any client-side job management,
 so for example if a request is sent, and a second comes in before it has completed,
-only the latter will complete.
+the LLM back end is relied on to cope.
 
 Prerequisites: python-dotenv discord.py
 
@@ -40,7 +40,7 @@
 from dotenv import load_dotenv
 
 from ogbujipt.config import openai_emulation
-from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
+from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params
 from ogbujipt import oapi_first_choice_text
 from ogbujipt.prompting.basic import format
 from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
@@ -62,12 +62,12 @@ async def send_llm_msg(msg):
 
     # See demo/alpaca_multitask_fix_xml.py for some important warnings here
     llm_task = asyncio.create_task(
-        schedule_callable(openai_api_surrogate, prompt, **llm.params))
+        schedule_callable(openai_api_surrogate, prompt, temperature=llmtemp, max_tokens=512,
+                          **save_openai_api_params()))
 
     tasks = [llm_task]
     done, _ = await asyncio.wait(
-        tasks, return_when=asyncio.FIRST_COMPLETED
-        )
+        tasks, return_when=asyncio.FIRST_COMPLETED)
 
     response = next(iter(done)).result()
 
@@ -113,7 +113,7 @@ async def on_ready():
 
 def main():
     # A real app would probably use a discord.py cog w/ these as data members
-    global llm, llm_temp
+    global llm, llmtemp
 
     load_dotenv()  # From .env file
     DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
@@ -122,8 +122,7 @@ def main():
 
     # Set up API connector & update temperature from environment
     llm = openai_emulation(host=LLM_HOST, port=LLM_PORT)
-    llm.params.llmtemp = os.getenv('LLM_TEMP')
-    llm.params.max_tokens = 512
+    llmtemp = os.getenv('LLM_TEMP')
 
     # launch Discord client event loop
     client.run(DISCORD_TOKEN)
diff --git a/demo/chat_web_selects.py b/demo/chat_web_selects.py
index 8b511de..db1fba4 100644
--- a/demo/chat_web_selects.py
+++ b/demo/chat_web_selects.py
@@ -34,7 +34,7 @@
 
 from ogbujipt import config
 from ogbujipt.prompting import format, ALPACA_INSTRUCT_DELIMITERS
-from ogbujipt.async_helper import schedule_openai_call, openai_api_surrogate
+from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params
 from ogbujipt import oapi_first_choice_text
 from ogbujipt.text_helper import text_splitter
 from ogbujipt.embedding_helper import qdrant_collection
@@ -82,7 +82,7 @@ async def read_site(url, collection):
     print(f'{collection.count()} chunks added to collection')
 
 
-async def async_main(sites, api_params):
+async def async_main(sites):
     # Automatic download from HuggingFace
     # Seem to be reentrancy issues with HuggingFace; defer import
     from sentence_transformers import SentenceTransformer
@@ -134,7 +134,7 @@ async def async_main(sites, api_params):
 
             indicator_task = asyncio.create_task(indicate_progress())
             llm_task = asyncio.create_task(
-                schedule_openai_call(openai_api_surrogate, prompt, **model_params))
+                schedule_callable(openai_api_surrogate, prompt, **model_params, **save_openai_api_params()))
             tasks = [indicator_task, llm_task]
             done, _ = await asyncio.wait(
                 tasks, return_when=asyncio.FIRST_COMPLETED)
@@ -164,17 +164,14 @@ async def async_main(sites, api_params):
 def main(host, port, openai_key, model, sites):
     # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
     if openai_key:
-        assert not (host or port), 'Don\'t use --host or --port with --openai'
         model = model or 'text-davinci-003'
-        openai_api = config.openai_live(
-            model=model, debug=True)
+        config.openai_live(apikey=openai_key, model=model, debug=True)
     else:
-        # For now the model param is most useful in conjunction with --openai
+        # Generally not really useful except in conjunction with --openai
         model = model or config.HOST_DEFAULT
-        openai_api = config.openai_emulation(
-            host=host, port=port, model=model, debug=True)
+        config.openai_emulation(host=host, port=port, model=model, debug=True)
 
-    asyncio.run(async_main(sites, openai_api.params))
+    asyncio.run(async_main(sites))
 
 
 if __name__ == '__main__':