[#19] Replace demo/alpaca_multitask_fix_xml.py with demo/multiprocess…

….py. Improvements to async_helper.py and config.py
OoriData · Jul 22, 2023 · aedffeb · aedffeb
1 parent 1d35367
commit aedffeb
Show file tree

Hide file tree

Showing 3 changed files with 123 additions and 134 deletions.
diff --git a/demo/multiprocess.py b/demo/multiprocess.py
@@ -11,102 +11,108 @@
 
 Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai
 '''
-import sys
 import asyncio
 
-import openai
+# import openai
 
-from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
+import click
+
+from ogbujipt import oapi_first_choice_text
 from ogbujipt import config
+from ogbujipt.async_helper import (
+    schedule_callable,
+    openai_api_surrogate,
+    console_progress_indicator,
+    save_openai_api_params)
 from ogbujipt.prompting.basic import format
 from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
 
-model_params = {}
-
 
-async def indicate_progress(pause=0.5):
+class llm_request:
     '''
-    Simple progress indicator for the console. Just prints dots.
+    Encapsulates each LLM service request via OpenAI API (even for self-hosted LLM)
     '''
-    while True:
-        print('.', end='', flush=True)
-        await asyncio.sleep(pause)
-
-
-openai_globals = ['api_base', 'api_key', 'api_key_path']
-
-
-def cache_openai_api_params():
-    params = {}
-    for k in openai_globals:
-        if hasattr(openai, k):
-            params[k] = getattr(openai, k)
-    return params
-
-
-def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
-    # Reset API details, relevant when emulating OpenAI
-    trimmed_kwargs = {}
-    for k in kwargs:
-        if k in openai_globals:
-            setattr(openai, k, kwargs[k])
-        else:
-            trimmed_kwargs[k] = kwargs[k]
-    # Send other, provided args to the generation function
-    return api_func(prompt=prompt, **trimmed_kwargs)
-
-
-class llm_request:
     tasks = {}
 
-    def __init__(self, topic):
+    def __init__(self, topic, llmtemp, **model_params):
+        '''
+        topic - a particular topic about which we'll ask the LLM
+        model_params - mapping of custom parameters for model behavior, e.g.:
+            max_tokens: limit number of generated tokens (default 16)
+            top_p: AKA nucleus sampling; can increase generated text diversity
+            frequency_penalty: Favor more or less frequent tokens
+            presence_penalty: Prefer new, previously unused tokens
+            More info: https://platform.openai.com/docs/api-reference/completions
+        '''
         self.topic = topic
+        self.llmtemp = llmtemp
+        self.model_params = model_params
 
     def wrap(self):
         prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS)
 
+        # Pattern of passing in the callable iself, then the params—required for multiprocess execution
         self.task = asyncio.create_task(
-            schedule_callable(openai_api_surrogate, prompt, model='text-ada-001', **cache_openai_api_params()))
+            schedule_callable(openai_api_surrogate, prompt, temperature=self.llmtemp,
+                              **self.model_params, **save_openai_api_params()))
         llm_request.tasks[self.task] = self
         return self.task
 
 
-async def async_main():
-    topics = ['wild animals', 'vehicles', 'space aliens']
-
-    # model_params = dict(
-    #     max_tokens=60,  # Limit number of generated tokens
-    #     top_p=1,  # AKA nucleus sampling; can increase generated text diversity
-    #     frequency_penalty=0,  # Favor more or less frequent tokens
-    #     presence_penalty=1,  # Prefer new, previously unused tokens
-    #     )
-    indicator_task = asyncio.create_task(indicate_progress())
+async def async_main(topics, llmtemp):
+    # Pro tip: When creating tasks with asyncio.create_task be mindful to not
+    # accidentally lose references to tasks, lest they get garbage collected,
+    # which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11)
+    # is a better alternative, but we can't use them in this case because
+    # they wait for all tasks to complete whereas we're done once only
+    # the LLM generation task is complete
+    indicator_task = asyncio.create_task(console_progress_indicator())
     # Notice the pattern of passing in the callable iself, then the params
     # You can't just do, say llm(prompt) because that will actually
     # call the function & block on the LLM request
-    llm_requests = [llm_request(t) for t in topics]
+    llm_requests = [llm_request(t, llmtemp, max_tokens=1024) for t in topics]
     llm_tasks = [req.wrap() for req in llm_requests]
     # Need to gather to make sure all LLm tasks are completed
     gathered_llm_tasks = asyncio.gather(*llm_tasks)
     done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED)
 
-    # Only completed task will be from the gather() of llm_tasks, and it has results in original order
+    # Completed task will from gather() of llm_tasks; results in original task arg order
     results = zip(llm_requests, next(iter(done)).result())
     for req, resp in results:
         print(f'Result re {req.topic}')
-        print(resp)
+        # resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info
+        print('\nFull response data from LLM:\n', resp)
+        # Just the response text
+        response_text = oapi_first_choice_text(resp)
+        print('\nResponse text from LLM:\n\n', response_text)
+
+
+# Command line arguments defined in click decorators
+@click.command()
+@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
+@click.option('--port', default='8000', help='OpenAI API port')
+@click.option('--llmtemp', default='0.9', type=float, help='LLM temperature')
+@click.option('--openai', is_flag=True, default=False, type=bool,
+              help='Use live OpenAI API. If you use this option, you must have '
+              '"OPENAI_API_KEY" defined in your environmnt')
+@click.option('--model', default='', type=str, 
+              help='OpenAI model to use (see https://platform.openai.com/docs/models)')
+def main(host, port, llmtemp, openai, model):
+    # Use OpenAI API if specified, otherwise emulate with supplied host, etc.
+    if openai:
+        model = model or 'text-davinci-003'
+        config.openai_live(model=model, debug=True)
+    else:
+        # Generally not really useful except in conjunction with --openai
+        model = model or config.HOST_DEFAULT
+        config.openai_emulation(host=host, port=port, model=model, debug=True)
 
+    topics = ['wild animals', 'vehicles', 'space aliens']
 
-def main():
-    openai.model = 'text-ada-001'
-    # Just hardcode these params
-    model_params['llmtemp'], model_params['model'] = 1, 'text-ada-001'
-    openai.api_key_path = sys.argv[1]
-    # openai_api = config.openai_live(model=model, debug=True)
-    # model_params['api_key_path'] = openai.api_key_path
-    asyncio.run(async_main())
+    asyncio.run(async_main(topics, llmtemp))
 
 
 if __name__ == '__main__':
-    # Re-entry control. Don't want main() executed on re-import
+    # CLI entry point. Also protects against re-execution of main() after process fork
+    # viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import
     main()
diff --git a/pylib/async_helper.py b/pylib/async_helper.py
@@ -6,13 +6,12 @@
 Coroutines to make it a little easier to multitask LLM access
 using Python asyncio
 '''
+import sys
 
 import asyncio
 import concurrent.futures
 from functools import partial
 
-import openai
-
 
 async def schedule_callable(callable, *args, **kwargs):
     '''
@@ -41,41 +40,8 @@ async def schedule_callable(callable, *args, **kwargs):
     return response
 
 
-async def schedule_openai_call(callable, *args, **kwargs):
-    '''
-    Schedule long-running/blocking LLM request in a separate process,
-    wrapped to work well in an asyncio event loop
-
-    Basically hides away a bunch of the multiprocessing webbing
-
-    e.g. `llm_task = asyncio.create_task(schedule_callable(llm, prompt))`
-
-    Can then use asyncio.wait(), asyncio.gather(), etc. with `llm_task`
-
-    Args:
-        callable (callable): Callable to be scheduled
-
-    Returns:
-        response: Response object
-    '''
-    # Link up the current async event loop for multiprocess execution
-    loop = asyncio.get_running_loop()
-    executor = concurrent.futures.ProcessPoolExecutor()
-    # Need to partial execute to get in any kwargs for the target callable
-    if 'model' not in kwargs:
-        kwargs['model'] = ''
-    prepped_callable = partial(
-        callable,
-        api_base=openai.api_base,
-        api_key=openai.api_key,
-        **kwargs)
-    # Spawn a separate process for the LLM call
-    response = await loop.run_in_executor(executor, prepped_callable, *args)
-    return response
-
-
 # FIXME: Add all arguments for OpenAI API generation functions here
-def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
+def openai_api_surrogate(prompt, api_func=None, **kwargs):
     '''
     Wrapper around OpenAI API generation functions. Needed for use
     in multiprocessing because it seems when the openai library gets
@@ -88,10 +54,50 @@ def openai_api_surrogate(prompt, api_func=openai.Completion.create, **kwargs):
         api_func: API function to utilize
 
     Returns:
-        api_func (openai): OpenAI API object
+        api_func: Result of OpenAI API call
     '''
-    # Reset API details, relevant when emulating OpenAI
-    openai.api_base = kwargs['api_base']
-    openai.api_key = kwargs['api_key']
+    import openai
+
+    api_func = api_func or openai.Completion.create
+
+    trimmed_kwargs = {}
+    for k in kwargs:
+        if k in OPENAI_GLOALS:
+            setattr(openai, k, kwargs[k])
+        else:
+            trimmed_kwargs[k] = kwargs[k]
     # Send other, provided args to the generation function
-    return api_func(prompt=prompt, **kwargs)
+    return api_func(prompt=prompt, **trimmed_kwargs)
+
+
+# Extracted from https://github.com/openai/openai-python/blob/main/openai/__init__.py
+OPENAI_GLOALS = ['api_key', 'api_key_path', 'api_base', 'organization', 'api_type', 'api_version',
+                 'proxy', 'app_info', 'debug', 'log']
+
+
+def save_openai_api_params():
+    '''
+    openai package uses globals for a lot of its parameters, including the mandatory api_key.
+    In some circumstances, such as when multiprocessing, these should be saved in order to be re-set when the module is re-imported.
+    '''
+    import openai
+
+    params = {}
+    # model also carried as a user convenience
+    for k in OPENAI_GLOALS + ['model']:
+        if hasattr(openai, k):
+            params[k] = getattr(openai, k)
+    return params
+
+
+async def console_progress_indicator(pause=0.5, file=sys.stderr):
+    '''
+    Simple progress indicator for the console. Just prints dots.
+
+    pause - seconds between each dot printed to console, default half a sec
+
+    file - file for dots output, default STDERR
+    '''
+    while True:
+        print('.', end='', flush=True, file=file)
+        await asyncio.sleep(pause)
diff --git a/pylib/config.py b/pylib/config.py
@@ -14,7 +14,7 @@
 # Really just a bogus name for cases when OpenAPI is being emulated
 # OpenAI API requires the model be specified, but many compaitble APIs
 # have a model predetermined by the host
-HOST_DEFAULT = 'HOST-DEFAULT'
+HOST_DEFAULT_MODEL = HOST_DEFAULT = 'HOST-DEFAULT'
 
 
 class attr_dict(dict):
@@ -25,18 +25,13 @@ class attr_dict(dict):
     __delattr__ = dict.__delitem__
 
 
-def openai_live(
-        rev='v1',
-        model='',
-        apikey=None,
-        debug=True
-        ):
+def openai_live(apikey=None, debug=True, model=''):
     '''
     Set up to use OpenAI proper. If you don't pass in an API key, the
     environment variable OPENAI_API_KEY will be checked
 
     Side note: a lot of OpenAI tutorials suggest that you embed your
-    OpenAI private key into the code, which is a horrible, terrible idea
+    OpenAI private key into the code, which is a horrible, no-good idea
 
     Extra reminder: If you set up your environment via .env file, make sure
     it's in .gitignore or equivalent so it never gets accidentally committed!
@@ -49,27 +44,21 @@ def openai_live(
     Returns:
         openai_api (openai): Prepared OpenAI API
     '''
+    import os
     import openai as openai_api
 
     # openai_api.api_version
     openai_api.debug = debug
-    openai_api.params = attr_dict(
-        rev=rev,
-        api_key=apikey,
-        model=model,
-        debug=debug)
-
+    openai_api.api_key = apikey or os.getenv('OPENAI_API_KEY')
+    openai_api.model = model
     return openai_api
 
 
 def openai_emulation(
         host='http://127.0.0.1',
         port='8000',
-        rev='v1',
-        model=HOST_DEFAULT,
         apikey='BOGUS',
-        oaitype='open_ai',
-        debug=True):
+        debug=True, model=''):
     '''
     Set up emulation, to use a alternative, OpenAI API compatible service
     Port 8000 for llama-cpp-python, Port 5001 for Oobabooga
@@ -79,11 +68,7 @@ def openai_emulation(
 
         port (str, optional): Port to use at "host"
 
-        rev (str, optional): OpenAI revision to use
-
-        apikey (str, optional): API key to use for authentication
-
-        oaitype (str, optional): OpenAI type to use
+        apikey (str, optional): Unused standin for OpenAI API key
 
         debug (bool, optional): Debug flag
 
@@ -92,17 +77,9 @@ def openai_emulation(
     '''
     import openai as openai_api
 
+    rev = 'v1'
     openai_api.api_key = apikey
-    openai_api.api_type = oaitype
     openai_api.api_base = f'{host}:{port}/{rev}'
     openai_api.debug = debug
-
-    openai_api.params = attr_dict(
-        api_key=apikey,
-        api_type=oaitype,
-        api_base=openai_api.api_base,
-        model=model,
-        debug=debug
-        )
-
+    openai_api.model = model
     return openai_api