Skip to content

Commit

Permalink
Merge pull request #20 from uogbuji/19-openai-reentrancy
Browse files Browse the repository at this point in the history
Improve Openai reentrancy workarounds & have a better multiprocess demo
  • Loading branch information
uogbuji committed Jul 22, 2023
2 parents cfb0b74 + fb1501c commit d54256b
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 239 deletions.
17 changes: 8 additions & 9 deletions demo/README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
For all these demos you need access to an OpenAI-like service. Default assumption is that you have a self-hosted framework such as llama-cpp-python or text-generation-webui running

# Simplest

## alpaca_simple_fix_xml.py
## simple_fix_xml.py

Quick demo, sending an Alpaca-compatible LLM some bad XML & asking it to make corrections.
Quick demo, sending a Llama or Alpaca-compatible LLM some bad XML & asking it to make corrections.

# Intermediate

## alpaca_multitask_fix_xml.py
## multiprocess.py

Intermediate demo using an LLM to repair data (XML), like
alpaca_simple_fix_xml.py
but running a separate, progress indicator task in the background
while the LLm works, using asyncio. This should work even
if the LLM framework we're using doesn't suport asyncio,
thanks to ogbujipt.async_helper
Intermediate demo asking an LLM multiple simultaneous riddles on various topics,
running a separate, progress indicator task in the background, using asyncio.
Works even if the LLM framework suport asyncio, thanks to ogbujipt.async_helper

# Advanced

Expand Down
130 changes: 0 additions & 130 deletions demo/alpaca_multitask_fix_xml.py

This file was deleted.

19 changes: 7 additions & 12 deletions demo/alpaca_simple_qa_discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
Note: This is a simple demo, which doesn't do any client-side job management,
so for example if a request is sent, and a second comes in before it has completed,
only the latter will complete.
You need access to an OpenAI-like service. Default assumption is that you
have a self-hosted framework such as llama-cpp-python or text-generation-webui
running. Say it's at my-llm-host:8000, you can do:
the LLM back end is relied on to cope.
Prerequisites: python-dotenv discord.py
Expand Down Expand Up @@ -44,7 +40,7 @@
from dotenv import load_dotenv

from ogbujipt.config import openai_emulation
from ogbujipt.async_helper import schedule_callable, openai_api_surrogate
from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params
from ogbujipt import oapi_first_choice_text
from ogbujipt.prompting.basic import format
from ogbujipt.prompting.model_style import ALPACA_DELIMITERS
Expand All @@ -66,12 +62,12 @@ async def send_llm_msg(msg):

# See demo/alpaca_multitask_fix_xml.py for some important warnings here
llm_task = asyncio.create_task(
schedule_callable(openai_api_surrogate, prompt, **llm.params))
schedule_callable(openai_api_surrogate, prompt, temperature=llmtemp, max_tokens=512,
**save_openai_api_params()))

tasks = [llm_task]
done, _ = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED
)
tasks, return_when=asyncio.FIRST_COMPLETED)

response = next(iter(done)).result()

Expand Down Expand Up @@ -117,7 +113,7 @@ async def on_ready():

def main():
# A real app would probably use a discord.py cog w/ these as data members
global llm, llm_temp
global llm, llmtemp

load_dotenv() # From .env file
DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
Expand All @@ -126,8 +122,7 @@ def main():

# Set up API connector & update temperature from environment
llm = openai_emulation(host=LLM_HOST, port=LLM_PORT)
llm.params.llmtemp = os.getenv('LLM_TEMP')
llm.params.max_tokens = 512
llmtemp = os.getenv('LLM_TEMP')

# launch Discord client event loop
client.run(DISCORD_TOKEN)
Expand Down
4 changes: 0 additions & 4 deletions demo/chat_pdf_streamlit_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
Single-PDF support, for now, to keep the demo code simple. Can easily extend to
e.g. work with multiple docs dropped in a directory
You need access to an OpenAI-like service. Default assumption is that you
have a self-hosted framework such as llama-cpp-python or text-generation-webui
running. Assume for the following it's at my-llm-host:8000
Prerequisites. From OgbujiPT cloned dir:.
```sh
Expand Down
17 changes: 7 additions & 10 deletions demo/chat_web_selects.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from ogbujipt import config
from ogbujipt.prompting import format, ALPACA_INSTRUCT_DELIMITERS
from ogbujipt.async_helper import schedule_openai_call, openai_api_surrogate
from ogbujipt.async_helper import schedule_callable, openai_api_surrogate, save_openai_api_params
from ogbujipt import oapi_first_choice_text
from ogbujipt.text_helper import text_splitter
from ogbujipt.embedding_helper import qdrant_collection
Expand Down Expand Up @@ -82,7 +82,7 @@ async def read_site(url, collection):
print(f'{collection.count()} chunks added to collection')


async def async_main(sites, api_params):
async def async_main(sites):
# Automatic download from HuggingFace
# Seem to be reentrancy issues with HuggingFace; defer import
from sentence_transformers import SentenceTransformer
Expand Down Expand Up @@ -134,7 +134,7 @@ async def async_main(sites, api_params):

indicator_task = asyncio.create_task(indicate_progress())
llm_task = asyncio.create_task(
schedule_openai_call(openai_api_surrogate, prompt, **model_params))
schedule_callable(openai_api_surrogate, prompt, **model_params, **save_openai_api_params()))
tasks = [indicator_task, llm_task]
done, _ = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED)
Expand Down Expand Up @@ -164,17 +164,14 @@ async def async_main(sites, api_params):
def main(host, port, openai_key, model, sites):
# Use OpenAI API if specified, otherwise emulate with supplied host, etc.
if openai_key:
assert not (host or port), 'Don\'t use --host or --port with --openai'
model = model or 'text-davinci-003'
openai_api = config.openai_live(
model=model, debug=True)
config.openai_live(apikey=openai_key, model=model, debug=True)
else:
# For now the model param is most useful in conjunction with --openai
# Generally not really useful except in conjunction with --openai
model = model or config.HOST_DEFAULT
openai_api = config.openai_emulation(
host=host, port=port, model=model, debug=True)
config.openai_emulation(host=host, port=port, model=model, debug=True)

asyncio.run(async_main(sites, openai_api.params))
asyncio.run(async_main(sites))


if __name__ == '__main__':
Expand Down
118 changes: 118 additions & 0 deletions demo/multiprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
'''
Advanced demo showing quick chat with an LLM, but with 3 simultaneous requests,
and also a separate, progress indicator dislay while the LLM instances are generating.
Key is taking advantage of Python's asyncio, and also multiprocess, which requires some finesse,
to work even when the LLM framework in use doesn't suport asyncio.
Luckily `ogbujipt.async_helper` comes in handy.
```sh
python demo/alpaca_multitask_fix_xml.py --host=http://my-llm-host --port=8000
```
Also allows you to use the actual OpenAI ChatGPT service, by specifying --openai
'''
import asyncio

# import openai

import click

from ogbujipt import oapi_first_choice_text
from ogbujipt import config
from ogbujipt.async_helper import (
schedule_callable,
openai_api_surrogate,
console_progress_indicator,
save_openai_api_params)
from ogbujipt.prompting.basic import format
from ogbujipt.prompting.model_style import ALPACA_DELIMITERS


class llm_request:
'''
Encapsulates each LLM service request via OpenAI API (even for self-hosted LLM)
'''
tasks = {}

def __init__(self, topic, llmtemp, **model_params):
'''
topic - a particular topic about which we'll ask the LLM
model_params - mapping of custom parameters for model behavior, e.g.:
max_tokens: limit number of generated tokens (default 16)
top_p: AKA nucleus sampling; can increase generated text diversity
frequency_penalty: Favor more or less frequent tokens
presence_penalty: Prefer new, previously unused tokens
More info: https://platform.openai.com/docs/api-reference/completions
'''
self.topic = topic
self.llmtemp = llmtemp
self.model_params = model_params

def wrap(self):
prompt = format(f'Tell me a funny joke about {self.topic}', delimiters=ALPACA_DELIMITERS)

# Pattern of passing in the callable iself, then the params—required for multiprocess execution
self.task = asyncio.create_task(
schedule_callable(openai_api_surrogate, prompt, temperature=self.llmtemp,
**self.model_params, **save_openai_api_params()))
llm_request.tasks[self.task] = self
return self.task


async def async_main(topics, llmtemp):
# Pro tip: When creating tasks with asyncio.create_task be mindful to not
# accidentally lose references to tasks, lest they get garbage collected,
# which sows chaos. In some cases asyncio.TaskGroup (new in Python 3.11)
# is a better alternative, but we can't use them in this case because
# they wait for all tasks to complete whereas we're done once only
# the LLM generation task is complete
indicator_task = asyncio.create_task(console_progress_indicator())
# Notice the pattern of passing in the callable iself, then the params
# You can't just do, say llm(prompt) because that will actually
# call the function & block on the LLM request
llm_requests = [llm_request(t, llmtemp, max_tokens=1024) for t in topics]
llm_tasks = [req.wrap() for req in llm_requests]
# Need to gather to make sure all LLm tasks are completed
gathered_llm_tasks = asyncio.gather(*llm_tasks)
done, _ = await asyncio.wait((indicator_task, gathered_llm_tasks), return_when=asyncio.FIRST_COMPLETED)

# Completed task will from gather() of llm_tasks; results in original task arg order
results = zip(llm_requests, next(iter(done)).result())
for req, resp in results:
print(f'Result re {req.topic}')
# resp is an instance of openai.openai_object.OpenAIObject, with lots of useful info
print('\nFull response data from LLM:\n', resp)
# Just the response text
response_text = oapi_first_choice_text(resp)
print('\nResponse text from LLM:\n\n', response_text)


# Command line arguments defined in click decorators
@click.command()
@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
@click.option('--port', default='8000', help='OpenAI API port')
@click.option('--llmtemp', default='0.9', type=float, help='LLM temperature')
@click.option('--openai', is_flag=True, default=False, type=bool,
help='Use live OpenAI API. If you use this option, you must have '
'"OPENAI_API_KEY" defined in your environmnt')
@click.option('--model', default='', type=str,
help='OpenAI model to use (see https://platform.openai.com/docs/models)')
def main(host, port, llmtemp, openai, model):
# Use OpenAI API if specified, otherwise emulate with supplied host, etc.
if openai:
model = model or 'text-davinci-003'
config.openai_live(model=model, debug=True)
else:
# Generally not really useful except in conjunction with --openai
model = model or config.HOST_DEFAULT
config.openai_emulation(host=host, port=port, model=model, debug=True)

topics = ['wild animals', 'vehicles', 'space aliens']

asyncio.run(async_main(topics, llmtemp))


if __name__ == '__main__':
# CLI entry point. Also protects against re-execution of main() after process fork
# viz https://docs.python.org/3/library/multiprocessing.html#multiprocessing-safe-main-import
main()
File renamed without changes.
Loading

0 comments on commit d54256b

Please sign in to comment.