Merge pull request #20 from uogbuji/19-openai-reentrancy

Improve Openai reentrancy workarounds & have a better multiprocess demo
OoriData · Jul 22, 2023 · d54256b · d54256b
2 parents cfb0b74 + fb1501c
commit d54256b
Show file tree

Hide file tree

Showing 9 changed files with 197 additions and 239 deletions.
diff --git a/demo/README.md b/demo/README.md
@@ -1,19 +1,18 @@
+For all these demos you need access to an OpenAI-like service. Default assumption is that you have a self-hosted framework such as llama-cpp-python or text-generation-webui running
+
 # Simplest
 
-## alpaca_simple_fix_xml.py
+## simple_fix_xml.py
 
-Quick demo, sending an Alpaca-compatible LLM some bad XML & asking it to make corrections.
+Quick demo, sending a Llama or Alpaca-compatible LLM some bad XML & asking it to make corrections.
 
 # Intermediate
 
-## alpaca_multitask_fix_xml.py
+## multiprocess.py
 
-Intermediate demo using an LLM to repair data (XML), like
-alpaca_simple_fix_xml.py
-but running a separate, progress indicator task in the background
-while the LLm works, using asyncio. This should work even
-if the LLM framework we're using doesn't suport asyncio,
-thanks to ogbujipt.async_helper 
+Intermediate demo asking an LLM multiple simultaneous riddles on various topics,
+running a separate, progress indicator task in the background, using asyncio.
+Works even if the LLM framework suport asyncio, thanks to ogbujipt.async_helper 
 
 # Advanced
 

diff --git a/demo/alpaca_multitask_fix_xml.py b/demo/alpaca_multitask_fix_xml.py
diff --git a/demo/alpaca_simple_qa_discord.py b/demo/alpaca_simple_qa_discord.py
@@ -6,11 +6,7 @@
 
 Note: This is a simple demo, which doesn't do any client-side job management,
 so for example if a request is sent, and a second comes in before it has completed,
-only the latter will complete.
-
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Say it's at my-llm-host:8000, you can do:
+the LLM back end is relied on to cope.
 
 Prerequisites: python-dotenv discord.py
 
@@ -44,7 +40,7 @@
 from dotenv import load_dotenv
 
 from ogbujipt.config import openai_emulation
-from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
+from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params
 from ogbujipt import oapi_first_choice_text
 from ogbujipt.prompting.basic import format
 from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
@@ -66,12 +62,12 @@ async def send_llm_msg(msg):
 
     # See demo/alpaca_multitask_fix_xml.py for some important warnings here
     llm_task = asyncio.create_task(
-        schedule_callable(openai_api_surrogate, prompt, **llm.params))
+        schedule_callable(openai_api_surrogate, prompt, temperature=llmtemp, max_tokens=512,
+                          **save_openai_api_params()))
 
     tasks = [llm_task]
     done, _ = await asyncio.wait(
-        tasks, return_when=asyncio.FIRST_COMPLETED
-        )
+        tasks, return_when=asyncio.FIRST_COMPLETED)
 
     response = next(iter(done)).result()
 
@@ -117,7 +113,7 @@ async def on_ready():
 
 def main():
     # A real app would probably use a discord.py cog w/ these as data members
-    global llm, llm_temp
+    global llm, llmtemp
 
     load_dotenv()  # From .env file
     DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
@@ -126,8 +122,7 @@ def main():
 
     # Set up API connector & update temperature from environment
     llm = openai_emulation(host=LLM_HOST, port=LLM_PORT)
-    llm.params.llmtemp = os.getenv('LLM_TEMP')
-    llm.params.max_tokens = 512
+    llmtemp = os.getenv('LLM_TEMP')
 
     # launch Discord client event loop
     client.run(DISCORD_TOKEN)

diff --git a/demo/chat_pdf_streamlit_ui.py b/demo/chat_pdf_streamlit_ui.py
@@ -14,10 +14,6 @@
 Single-PDF support, for now, to keep the demo code simple. Can easily extend to
 e.g. work with multiple docs dropped in a directory
 
-You need access to an OpenAI-like service. Default assumption is that you
-have a self-hosted framework such as llama-cpp-python or text-generation-webui
-running. Assume for the following it's at my-llm-host:8000
-
 Prerequisites. From OgbujiPT cloned dir:.
 
 ```sh

diff --git a/demo/chat_web_selects.py b/demo/chat_web_selects.py
@@ -34,7 +34,7 @@
 
 from ogbujipt import config
 from ogbujipt.prompting import format, ALPACA_INSTRUCT_DELIMITERS
-from ogbujipt.async_helper import schedule_openai_call, openai_api_surrogate
+from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params
 from ogbujipt import oapi_first_choice_text
 from ogbujipt.text_helper import text_splitter
 from ogbujipt.embedding_helper import qdrant_collection
@@ -82,7 +82,7 @@ async def read_site(url, collection):
     print(f'{collection.count()} chunks added to collection')
 
 
-async def async_main(sites, api_params):
+async def async_main(sites):
     # Automatic download from HuggingFace
     # Seem to be reentrancy issues with HuggingFace; defer import
     from sentence_transformers import SentenceTransformer
@@ -134,7 +134,7 @@ async def async_main(sites, api_params):
 
             indicator_task = asyncio.create_task(indicate_progress())
             llm_task = asyncio.create_task(
-                schedule_openai_call(openai_api_surrogate, prompt, **model_params))
+                schedule_callable(openai_api_surrogate, prompt, **model_params, **save_openai_api_params()))
             tasks = [indicator_task, llm_task]
             done, _ = await asyncio.wait(
                 tasks, return_when=asyncio.FIRST_COMPLETED)
@@ -164,17 +164,14 @@ async def async_main(sites, api_params):
 def main(host, port, openai_key, model, sites):
     # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
     if openai_key:
-        assert not (host or port), 'Don\'t use --host or --port with --openai'
         model = model or 'text-davinci-003'
-        openai_api = config.openai_live(
-            model=model, debug=True)
+        config.openai_live(apikey=openai_key, model=model, debug=True)
     else:
-        # For now the model param is most useful in conjunction with --openai
+        # Generally not really useful except in conjunction with --openai
         model = model or config.HOST_DEFAULT
-        openai_api = config.openai_emulation(
-            host=host, port=port, model=model, debug=True)
+        config.openai_emulation(host=host, port=port, model=model, debug=True)
 
-    asyncio.run(async_main(sites, openai_api.params))
+    asyncio.run(async_main(sites))
 
 
 if __name__ == '__main__':

diff --git a/demo/multiprocess.py b/demo/multiprocess.py
@@ -0,0 +1,118 @@
+'''
+Advanced demo showing quick chat with an LLM, but with 3 simultaneous requests,
+and also a separate, progress indicator dislay while the LLM instances are generating.
+Key is taking advantage of Python's asyncio, and also multiprocess, which requires some finesse,
+to work even when the LLM framework in use doesn't suport asyncio.
+Luckily `ogbujipt.async_helper` comes in handy.
+
+```sh
+python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000
+```
+
+Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai
+'''
+import asyncio
+
+# import openai
+
+import click
+
+from ogbujipt import oapi_first_choice_text
+from ogbujipt import config
+from ogbujipt.async_helper import (
+    schedule_callable,
+    openai_api_surrogate,
+    console_progress_indicator,
+    save_openai_api_params)
+from ogbujipt.prompting.basic import format
+from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
+
+
+class llm_request:
+    '''
+    Encapsulates each LLM service request via OpenAI API (even for self-hosted LLM)
+    '''
+    tasks = {}
+
+    def __init__(self, topic, llmtemp, **model_params):
+        '''
+        topic - a particular topic about which we'll ask the LLM
+        model_params - mapping of custom parameters for model behavior, e.g.:
+            max_tokens: limit number of generated tokens (default 16)
+            top_p: AKA nucleus sampling; can increase generated text diversity
+            frequency_penalty: Favor more or less frequent tokens
+            presence_penalty: Prefer new, previously unused tokens
+            More info: https://platform.openai.com/docs/api-reference/completions
+        '''
+        self.topic = topic
+        self.llmtemp = llmtemp
+        self.model_params = model_params
+
+    def wrap(self):
+        prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS)
+
+        # Pattern of passing in the callable iself, then the params—required for multiprocess execution
+        self.task = asyncio.create_task(
+            schedule_callable(openai_api_surrogate, prompt, temperature=self.llmtemp,
+                              **self.model_params, **save_openai_api_params()))
+        llm_request.tasks[self.task] = self
+        return self.task
+
+
+async def async_main(topics, llmtemp):
+    # Pro tip: When creating tasks with asyncio.create_task be mindful to not
+    # accidentally lose references to tasks, lest they get garbage collected,
+    # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11)
+    # is a better alternative, but we can't use them in this case because
+    # they wait for all tasks to complete whereas we're done once only
+    # the LLM generation task is complete
+    indicator_task = asyncio.create_task(console_progress_indicator())
+    # Notice the pattern of passing in the callable iself, then the params
+    # You can't just do, say llm(prompt) because that will actually
+    # call the function & block on the LLM request
+    llm_requests = [llm_request(t, llmtemp, max_tokens=1024) for t in topics]
+    llm_tasks = [req.wrap() for req in llm_requests]
+    # Need to gather to make sure all LLm tasks are completed
+    gathered_llm_tasks = asyncio.gather(*llm_tasks)
+    done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED)
+
+    # Completed task will from gather() of llm_tasks; results in original task arg order
+    results = zip(llm_requests, next(iter(done)).result())
+    for req, resp in results:
+        print(f'Result re {req.topic}')
+        # resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info
+        print('\nFull response data from LLM:\n', resp)
+        # Just the response text
+        response_text = oapi_first_choice_text(resp)
+        print('\nResponse text from LLM:\n\n', response_text)
+
+
+# Command line arguments defined in click decorators
+@click.command()
+@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
+@click.option('--port', default='8000', help='OpenAI API port')
+@click.option('--llmtemp', default='0.9', type=float, help='LLM temperature')
+@click.option('--openai', is_flag=True, default=False, type=bool,
+              help='Use live OpenAI API. If you use this option, you must have '
+              '"OPENAI_API_KEY" defined in your environmnt')
+@click.option('--model', default='', type=str, 
+              help='OpenAI model to use (see https://platform.openai.com/docs/models)')
+def main(host, port, llmtemp, openai, model):
+    # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
+    if openai:
+        model = model or 'text-davinci-003'
+        config.openai_live(model=model, debug=True)
+    else:
+        # Generally not really useful except in conjunction with --openai
+        model = model or config.HOST_DEFAULT
+        config.openai_emulation(host=host, port=port, model=model, debug=True)
+
+    topics = ['wild animals', 'vehicles', 'space aliens']
+
+    asyncio.run(async_main(topics, llmtemp))
+
+
+if __name__ == '__main__':
+    # CLI entry point. Also protects against re-execution of main() after process fork
+    # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import
+    main()
diff --git a/demo/alpaca_simple_fix_xml.py → demo/simple_fix_xml.py b/demo/alpaca_simple_fix_xml.py → demo/simple_fix_xml.py