Add Markdown support & eample to demo/chat_doc_folder.py. Changelog u…

…pdate.
OoriData · Jul 23, 2024 · 8be3635 · 8be3635
1 parent 7fd9960
commit 8be3635
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,17 @@ Notable changes to  Format based on [Keep a Changelog](https://keepachangelog.co
 
 -->
 
+## [0.9.3] - 20240722
+
+### Added
+
+- `demo/chat_doc_folder.py` - "chat my folder of docs" demo
+- `llm_wrapper.response_type` to indicate tool-call LLM responses
+
+### Fixed
+
+- `llm_wrapper.llm_response` object generation
+
 ## [0.9.2] - 20240625
 
 ### Added

diff --git a/demo/chat_doc_folder.py b/demo/chat_doc_folder.py
@@ -29,6 +29,8 @@
 
 Sample query: Tell me about the Calabar Kingdom
 
+You can always check the retrieval using `--verbose`
+
 You can specify your document directory, and/or tweak it with the following command line options:
 --verbose - print more information while processing (for debugging)
 --limit (max number of chunks to retrieve for use as context)
@@ -104,15 +106,27 @@ def read_pdf_doc(fpath, store):
     store.update(chunks, metas=metas)
 
 
+def read_text_or_markdown_doc(fpath, store):
+    '''Split a single text or markdown file into chunks & add these to vector store'''
+    print('Processing as text:', fpath)  # e.g. 'path/to/file.txt'
+    with open(fpath) as docx_content:
+        doctext = docx_content.read()
+    chunks = list(store.text_split(doctext))
+    metas = [{'source': str(fpath)}]*len(chunks)
+    store.update(chunks, metas=metas)
+
+
 async def async_main(oapi, docs, verbose, limit, chunk_size, chunk_overlap, question):
     store = vector_store(chunk_size, chunk_overlap)
 
     for fname in docs.iterdir():
-        print(fname, fname.suffix)
+        # print(fname, fname.suffix)
         if fname.suffix in ['.doc', '.docx']:
             read_word_doc(fname, store)
         elif fname.suffix == '.pdf':
             read_pdf_doc(fname, store)
+        elif fname.suffix in ['.txt', '.md', '.mdx']:
+            read_text_or_markdown_doc(fname, store)
 
     # Main chat loop
     done = False