diff --git a/app/requirements.txt b/app/requirements.txt index d5c294d..28ff963 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -11,24 +11,24 @@ ### pip3 install -r app/requirements.txt ## Top-Level brings in the required dependencies, if adding modules, try to find the minimum required -bokeh==3.6.0 +bokeh==3.6.1 evaluate==0.4.3 faiss-cpu==1.9.0 -giskard==2.15.3 +giskard==2.15.5 IPython==8.29.0 langchain-cohere==0.3.1 -langchain-community==0.3.5 +langchain-community==0.3.7 langchain-huggingface==0.1.2 langchain-ollama==0.2.0 -langchain-openai==0.2.6 -langgraph==0.2.45 -llama_index==0.11.22 +langchain-openai==0.2.8 +langgraph==0.2.48 +llama_index==0.11.23 lxml==5.3.0 matplotlib==3.9.2 oci>=2.0.0 oracledb>=2.0.0 plotly==5.24.1 -streamlit==1.39.0 +streamlit==1.40.1 umap-learn==0.5.7 ## For Licensing Purposes; ensures no GPU modules are installed diff --git a/app/src/modules/split.py b/app/src/modules/split.py index 3fe0999..9efdf0b 100644 --- a/app/src/modules/split.py +++ b/app/src/modules/split.py @@ -150,7 +150,7 @@ def load_and_split_documents( If output_dir, a list of written json files """ split_files = [] - split_docos = [] + all_split_docos = [] for file in src_files: name = os.path.basename(file) stat = os.stat(file) @@ -175,13 +175,15 @@ def load_and_split_documents( split_doc = split_document(model, chunk_size, chunk_overlap, loaded_doc, extension) # Add IDs to metadata + split_docos = [] for idx, chunk in enumerate(split_doc, start=1): split_doc_with_mdata = process_metadata(idx, chunk) split_docos += split_doc_with_mdata - + if write_json and output_dir: split_files.append(doc_to_json(split_docos, file, output_dir)) - logger.info("Total Number of Chunks: %i", len(split_docos)) + all_split_docos += split_docos + logger.info("Total Number of Chunks: %i", len(all_split_docos)) return split_docos, split_files