Demo tweak

OoriData · Mar 21, 2024 · 836e6e4 · 836e6e4
1 parent ea587e1
commit 836e6e4
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 0 deletions.
diff --git a/demo/chat_web_selects.py b/demo/chat_web_selects.py
@@ -148,6 +148,9 @@ async def async_main(oapi, sites, verbose, limit, chunk_size, chunk_overlap, que
             done, _ = await asyncio.wait(
                 tasks, return_when=asyncio.FIRST_COMPLETED)
 
+            # proper cleanup of indicator task, which will still be pending/running
+            indicator_task.cancel()
+
             # Instance of openai.openai_object.OpenAIObject, with lots of useful info
             retval = next(iter(done)).result()
             if verbose:

diff --git a/pylib/llm_wrapper.py b/pylib/llm_wrapper.py
@@ -348,6 +348,7 @@ class llama_cpp_http(llm_wrapper):
     >>> llm_api = llama_cpp_http(base_url='http://localhost:8000')
     >>> resp = asyncio.run(llm_api('Knock knock!', min_p=0.05))
     >>> resp['content']
+
     '''
     def __init__(self, base_url, apikey=None, model=None, **kwargs):
         '''
@@ -369,6 +370,9 @@ async def __call__(self, prompt, req='/completion', timeout=30.0, apikey=None, *
         '''
         Invoke the LLM with a completion request
 
+        Other endpoints are available vit `req`, e.g. /v1/models, /v1/chat/completions, etc.
+        With `/completion--header`, for a stream of preficted tokens with other token probability
+
         Args:
             prompt (str): Prompt to send to the LLM