Skip to content

Commit

Permalink
Merge pull request #17 from uogbuji/16-better-vector-db
Browse files Browse the repository at this point in the history
Refactor qdrant vector DB interface
  • Loading branch information
uogbuji committed Jul 20, 2023
2 parents 372f121 + e81ff3e commit 3f2ca21
Show file tree
Hide file tree
Showing 12 changed files with 559 additions and 406 deletions.
17 changes: 14 additions & 3 deletions demo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## alpaca_simple_fix_xml.py

Quick demo, sending an Alpaca-compatible LLm some bad XML & asking it to make corrections.
Quick demo, sending an Alpaca-compatible LLM some bad XML & asking it to make corrections.

# Intermediate

Expand Down Expand Up @@ -30,12 +30,23 @@ Demonstrates:
consideration until more sever-side LLM hosting frameworks reliablly
support multiprocessing

## chat_web_selects.py

Simple, command-line "chat my web site" demo, but supporting self-hosted LLM.

Definitely a good idea for you to understand demos/alpaca_multitask_fix_xml.py
before swapping this in.

Vector store: Qdrant - https://qdrant.tech/

Supports multiple web URLs, specified on cmdline

## chat_pdf_streamlit_ui.py

<img width="970" alt="image" src="https://github.com/uogbuji/OgbujiPT/assets/279982/57b479a9-2dbc-4d65-ac19-e954df2a21d0">

"Chat my PDF" demo, but using self-hosted LLM. Definitely a good idea for you to understand
demos/alpaca_multitask_fix_xml.py
"Chat my PDF" demo, supporting self-hosted LLM. Definitely a good idea for you to understand
alpaca_multitask_fix_xml.py & chat_web_selects.py
before swapping this in.

UI: Streamlit - streamlit.io
Expand Down
7 changes: 4 additions & 3 deletions demo/alpaca_simple_fix_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from ogbujipt.config import openai_live, openai_emulation
from ogbujipt.prompting.basic import context_build
from ogbujipt.prompting.model_style import VICUNA_DELIMITERS
from ogbujipt.prompting.model_style import ALPACA_INSTRUCT_INPUT_DELIMITERS


# Command line arguments defined in click decorators
Expand Down Expand Up @@ -44,10 +44,11 @@ def main(host, port, llmtemp, openai, model):
</Earth>'''

prompt = context_build(
f'Correct the following XML to make it well-formed\n\n{BAD_XML_CODE}',
'Correct the given XML to make it well-formed',
contexts= BAD_XML_CODE,
preamble='You are a helpful assistant, '
'who answers questions briefly, in 1st grade language',
delimiters=VICUNA_DELIMITERS)
delimiters=ALPACA_INSTRUCT_INPUT_DELIMITERS)
print(prompt, '\n')

response = openai_api.Completion.create(
Expand Down
298 changes: 147 additions & 151 deletions demo/chat_pdf_streamlit_ui.py

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions demo/chat_web_selects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
'''
Advanced, "Chat my docs" demo, using docs from the web
Download one or more web pages and query an LLM using them as context.
Works especially well with airoboros self-hosted LLM.
Vector store: Qdrant - https://qdrant.tech/
Alternatives: pgvector, Chroma, Faiss, Weaviate, etc.
Text to vector (embedding) model:
Alternatives: https://www.sbert.net/docs/pretrained_models.html / OpenAI ada002
Needs access to an OpenAI-like service. Default assumption is self-hosted
via e.g. llama-cpp-python or text-generation-webui
Assume for the following it's at host my-llm-host, port 8000
pip install prerequisites, in addition to OgbujiPT cloned dir:
click sentence_transformers qdrant-client httpx html2text amara3.xml
```sh
python demo/chat_web_selects.py --host http://my-llm-host --port 8000 "www.newworldencyclopedia.org/entry/Igbo_People"
```
An example question might be "Who are the neighbors of the Igbo people?"
'''
# en.wikipedia.org/wiki/Igbo_people|ahiajoku.igbonet.com/2000/|en.wikivoyage.org/wiki/Igbo_phrasebook"
import asyncio
import os

import click
import httpx
import html2text

from ogbujipt import config
from ogbujipt.prompting import format, ALPACA_INSTRUCT_DELIMITERS
from ogbujipt.async_helper import schedule_openai_call, openai_api_surrogate
from ogbujipt import oapi_choice1_text
from ogbujipt.text_helper import text_splitter
from ogbujipt.embedding_helper import qdrant_collection

# Avoid re-entrace complaints from huggingface/tokenizers
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# default https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
DOC_EMBEDDINGS_LLM = 'all-MiniLM-L12-v2'

COLLECTION_NAME = 'chat-web-selects'
USER_PROMPT = 'What do you want to know from the site(s)?\n'

# Hard-code for demo
EMBED_CHUNK_SIZE = 200
EMBED_CHUNK_OVERLAP = 20
DOTS_SPACING = 0.2 # Number of seconds between each dot printed to console


async def indicate_progress(pause=DOTS_SPACING):
while True:
print('.', end='', flush=True)
await asyncio.sleep(pause)


async def read_site(url, collection):
# Crude check; good enough for demo
if not url.startswith('http'): url = 'https://' + url # noqa E701
print('Downloading & processing', url)
async with httpx.AsyncClient(verify=False) as client:
resp = await client.get(url)
html = resp.content.decode(resp.encoding or 'utf-8')

text = html2text.html2text(html)

# Split text into chunks
chunks = text_splitter(text, chunk_size=EMBED_CHUNK_SIZE,
chunk_overlap=EMBED_CHUNK_OVERLAP, separator='\n')

# print('\n\n'.join([ch[:100] for ch in chunks]))
# Crude—for demo. Set URL metadata for all chunks to doc URL
metas = [{'url': url}]*len(chunks)
# Add the text to the collection. Blocks, so no reentrancy concern
collection.update(texts=chunks, metas=metas)
print(f'{collection.count()} chunks added to collection')


async def async_main(sites, api_params):
# Automatic download from HuggingFace
# Seem to be reentrancy issues with HuggingFace; defer import
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(DOC_EMBEDDINGS_LLM)
# Sites fuel in-memory Qdrant vector DB instance
collection = qdrant_collection(COLLECTION_NAME, embedding_model)

url_task_group = asyncio.gather(*[
asyncio.create_task(read_site(site, collection)) for site in sites.split('|')])
indicator_task = asyncio.create_task(indicate_progress())
tasks = [indicator_task, url_task_group]
done, _ = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED)

done = False
while not done:
print()
user_question = input(USER_PROMPT)
if user_question.strip() == 'done':
break

docs = collection.search(user_question, limit=4)

print(docs)
if docs:
# Collects "chunked_doc" into "gathered_chunks"
gathered_chunks = '\n\n'.join(
doc.payload['_text'] for doc in docs if doc.payload)

# Build prompt the doc chunks as context
prompt = format(
f'Given the context, {user_question}\n\n'
f'Context: """\n{gathered_chunks}\n"""\n',
preamble='### SYSTEM:\nYou are a helpful assistant, who answers '
'questions directly and as briefly as possible. '
'If you cannot answer with the given context, just say so.\n',
delimiters=ALPACA_INSTRUCT_DELIMITERS)

print(prompt)

# The rest is much like in demo/alpaca_multitask_fix_xml.py
model_params = dict(
max_tokens=1024, # Limit number of generated tokens
top_p=1, # AKA nucleus sampling; can increase generated text diversity
frequency_penalty=0, # Favor more or less frequent tokens
presence_penalty=1, # Prefer new, previously unused tokens
temperature=0.1
)

indicator_task = asyncio.create_task(indicate_progress())
llm_task = asyncio.create_task(
schedule_openai_call(openai_api_surrogate, prompt, **model_params))
tasks = [indicator_task, llm_task]
done, _ = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED)

# Instance of openai.openai_object.OpenAIObject, with lots of useful info
retval = next(iter(done)).result()
print(type(retval))
# Response is a json-like object; extract the text
print('\nFull response data from LLM:\n', retval)

# response is a json-like object;
# just get back the text of the response
response_text = oapi_choice1_text(retval)
print('\nResponse text from LLM:\n\n', response_text)


# Command line arguments defined in click decorators
@click.command()
@click.option('--host', default='http://127.0.0.1', help='OpenAI API host')
@click.option('--port', default='8000', help='OpenAI API port')
@click.option('--openai-key',
help='OpenAI API key. Leave blank to specify self-hosted model via --host & --port')
@click.option('--model', default='', type=str,
help='OpenAI model to use (see https://platform.openai.com/docs/models).'
'Use only with --openai-key')
@click.argument('sites')
def main(host, port, openai_key, model, sites):
# Use OpenAI API if specified, otherwise emulate with supplied host, etc.
if openai_key:
assert not (host or port), 'Don\'t use --host or --port with --openai'
model = model or 'text-davinci-003'
openai_api = config.openai_live(
model=model, debug=True)
else:
# For now the model param is most useful in conjunction with --openai
model = model or config.HOST_DEFAULT
openai_api = config.openai_emulation(
host=host, port=port, model=model, debug=True)

asyncio.run(async_main(sites, openai_api.params))


if __name__ == '__main__':
main()
4 changes: 3 additions & 1 deletion demo/demo.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copy to .env in the same dir, and update as needed
# Used by:
# demo/alpaca_simple_qa_discord.py
# demo/chat_pdf_streamlit_ui

# Used by demo/alpaca_simple_qa_discord.py
# DISCORD_TOKEN={REPLACEME}
LLM_HOST=http://{my-llm-host}
# LLM_PORT=8000
Expand Down
39 changes: 35 additions & 4 deletions pylib/async_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@

async def schedule_callable(callable, *args, **kwargs):
'''
TODO: rename me? this is convenent for more than just LLM calls
Schedule task long-running/blocking LLM requests in a separate process,
Schedule long-running/blocking function call in a separate process,
wrapped to work well in an asyncio event loop
Basically hides away a bunch of the multiprocessing webbing
Expand All @@ -38,8 +37,40 @@ async def schedule_callable(callable, *args, **kwargs):
# Need to partial execute to get in any kwargs for the target callable
prepped_callable = partial(callable, **kwargs)
# Spawn a separate process for the LLM call
response = await loop.run_in_executor(
executor, prepped_callable, *args)
response = await loop.run_in_executor(executor, prepped_callable, *args)
return response


async def schedule_openai_call(callable, *args, **kwargs):
'''
Schedule long-running/blocking LLM request in a separate process,
wrapped to work well in an asyncio event loop
Basically hides away a bunch of the multiprocessing webbing
e.g. `llm_task = asyncio.create_task(schedule_callable(llm, prompt))`
Can then use asyncio.wait(), asyncio.gather(), etc. with `llm_task`
Args:
callable (callable): Callable to be scheduled
Returns:
response: Response object
'''
# Link up the current async event loop for multiprocess execution
loop = asyncio.get_running_loop()
executor = concurrent.futures.ProcessPoolExecutor()
# Need to partial execute to get in any kwargs for the target callable
if 'model' not in kwargs:
kwargs['model'] = ''
prepped_callable = partial(
callable,
api_base=openai.api_base,
api_key=openai.api_key,
**kwargs)
# Spawn a separate process for the LLM call
response = await loop.run_in_executor(executor, prepped_callable, *args)
return response


Expand Down
14 changes: 8 additions & 6 deletions pylib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class attr_dict(dict):


def openai_live(
rev='v1',
model='',
apikey=None,
debug=True
):
Expand All @@ -48,16 +50,14 @@ def openai_live(
openai_api (openai): Prepared OpenAI API
'''
import openai as openai_api
from dotenv import load_dotenv

load_dotenv()
# openai_api.api_version
openai_api.debug = debug
openai_api.params = attr_dict(
rev=rev,
api_key=apikey,
api_base=openai_api.api_base,
debug=debug
)
model=model,
debug=debug)

return openai_api

Expand All @@ -67,7 +67,9 @@ def openai_emulation(
port='8000',
rev='v1',
model=HOST_DEFAULT,
apikey='BOGUS', oaitype='open_ai', debug=True):
apikey='BOGUS',
oaitype='open_ai',
debug=True):
'''
Set up emulation, to use a alternative, OpenAI API compatible service
Port 8000 for llama-cpp-python, Port 5001 for Oobabooga
Expand Down
Loading

0 comments on commit 3f2ca21

Please sign in to comment.