From c6cf94a6c3593ea58b19f9449e65f121241fc4d1 Mon Sep 17 00:00:00 2001
From: Georg Schuppe <georg.schuppe@gmail.com>
Date: Fri, 12 Jul 2024 09:00:12 +0200
Subject: [PATCH 1/4] doc: install external dependencies

---
 experimental/knowledge_graph_rag/README.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/experimental/knowledge_graph_rag/README.md b/experimental/knowledge_graph_rag/README.md
index 2ec88800..0fe5642f 100644
--- a/experimental/knowledge_graph_rag/README.md
+++ b/experimental/knowledge_graph_rag/README.md
@@ -85,17 +85,22 @@ python3 -m virtualenv venv
 source venv/bin/activate
 ```
 
-### 4. Install the required packages
+### 4. Install external dependencies
+```bash
+sudo apt install poppler-utils ffmpeg libsm6 libxext6 tesseract-ocr libtesseract-dev
+```
+
+### 5. Install the required packages
 
 ```bash
 pip install -r requirements.txt
 ```
 
-### 5. Setup a hosted Milvus vector database
+### 6. Setup a hosted Milvus vector database
 
 Follow the instructions [here](https://milvus.io/docs/install_standalone-docker.md) to deploy a hosted Milvus instance for the vector database backend. Note that it must be Milvus 2.4 or better to support [hybrid search](https://milvus.io/docs/multi-vector-search.md). We do not support disabling this feature for previous versions of Milvus as of now.
 
-### 5. Launch the Streamlit frontend
+### 7. Launch the Streamlit frontend
 
 ```bash
 streamlit run app.py
@@ -103,7 +108,7 @@ streamlit run app.py
 
 Open the URL in your browser to access the UI and chatbot!
 
-### 6. Upload Docs and Train Model
+### 8. Upload Docs and Train Model
 
 Upload your own documents to a folder, or use an existing folder for the knowledge graph creation. Note that the implementation currently focuses on text from PDFs only. It can be extended to other text file formats using the Unstructured.io data loader in LangChain.
 

From a57689707ae3bd2b9c0ea02b372cbc8f337d687c Mon Sep 17 00:00:00 2001
From: Georg Schuppe <georg.schuppe@gmail.com>
Date: Fri, 12 Jul 2024 09:00:36 +0200
Subject: [PATCH 2/4] fix: Requests version conflict

---
 experimental/knowledge_graph_rag/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experimental/knowledge_graph_rag/requirements.txt b/experimental/knowledge_graph_rag/requirements.txt
index 82a9a141..a0ad3b38 100644
--- a/experimental/knowledge_graph_rag/requirements.txt
+++ b/experimental/knowledge_graph_rag/requirements.txt
@@ -7,8 +7,8 @@ llama_index==0.10.50
 networkx==3.2.1
 numpy==1.24.1
 pandas==2.2.2
-pymilvus==2.4.3
-Requests==2.32.3
+pymilvus[model]==2.4.3
+Requests==2.31.0
 streamlit==1.30.0
 unstructured[all-docs]
 tqdm==4.66.1

From 03c2a5fe8b5cc566882e5164e825d78b1cbddc59 Mon Sep 17 00:00:00 2001
From: Georg Schuppe <georg.schuppe@gmail.com>
Date: Fri, 12 Jul 2024 09:01:23 +0200
Subject: [PATCH 3/4] fix: use full module path for import

---
 experimental/knowledge_graph_rag/utils/lc_graph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experimental/knowledge_graph_rag/utils/lc_graph.py b/experimental/knowledge_graph_rag/utils/lc_graph.py
index 5a5a81ac..fc763dc8 100644
--- a/experimental/knowledge_graph_rag/utils/lc_graph.py
+++ b/experimental/knowledge_graph_rag/utils/lc_graph.py
@@ -15,7 +15,7 @@
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 import concurrent.futures
-from preprocessor import extract_triples
+from utils.preprocessor import extract_triples
 from tqdm import tqdm
 from langchain_community.document_loaders import DirectoryLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter

From 708d3784ec793d990218bcb60b18fdd80f3a1ad3 Mon Sep 17 00:00:00 2001
From: Georg Schuppe <georg.schuppe@gmail.com>
Date: Fri, 12 Jul 2024 09:01:53 +0200
Subject: [PATCH 4/4] fix: download NLTK tagger

---
 experimental/knowledge_graph_rag/app.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/experimental/knowledge_graph_rag/app.py b/experimental/knowledge_graph_rag/app.py
index e19edc81..315c940c 100644
--- a/experimental/knowledge_graph_rag/app.py
+++ b/experimental/knowledge_graph_rag/app.py
@@ -25,6 +25,9 @@
 from vectorstore.search import SearchHandler
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
+import nltk
+nltk.download('averaged_perceptron_tagger')
+
 def load_data(input_dir, num_workers):
     reader = SimpleDirectoryReader(input_dir=input_dir)
     documents = reader.load_data(num_workers=num_workers)