|
5 | 5 | "id": "14b2675c", |
6 | 6 | "metadata": {}, |
7 | 7 | "source": [ |
8 | | - "# How to connect Langchain to Oracle 26ai" |
| 8 | + "# RAG Application using Oracle 26ai and Langchain\n", |
| 9 | + "\n", |
| 10 | + "This notebook demonstrates how to build a simple RAG application by using Oracle 26ai's vector storage capabilities.\n" |
9 | 11 | ] |
10 | 12 | }, |
11 | 13 | { |
12 | 14 | "cell_type": "markdown", |
13 | | - "id": "893d6647", |
| 15 | + "id": "0eaa562f", |
14 | 16 | "metadata": {}, |
15 | 17 | "source": [ |
16 | | - "!python -m pip install -U langchain-oracledb langchain-huggingface" |
| 18 | + "## Install necessary packages\n", |
| 19 | + "\n", |
| 20 | + "Before running the notebook, ensure you have the following packages installed:\n", |
| 21 | + "\n", |
| 22 | + "* `langchain-oracledb`: Langchain integration for Oracle databases.\n", |
| 23 | + "* `langchain-huggingface`: Langchain integration for Hugging Face embeddings.\n", |
| 24 | + "* `sentence-transformers`: For generating text embeddings.\n", |
| 25 | + "* `python-dotenv`: To manage environment variables securely." |
17 | 26 | ] |
18 | 27 | }, |
19 | 28 | { |
20 | 29 | "cell_type": "code", |
21 | 30 | "execution_count": null, |
22 | | - "id": "245ac7b7", |
23 | | - "metadata": { |
24 | | - "vscode": { |
25 | | - "languageId": "plaintext" |
26 | | - } |
27 | | - }, |
| 31 | + "id": "b73bbf40", |
| 32 | + "metadata": {}, |
28 | 33 | "outputs": [], |
29 | 34 | "source": [ |
| 35 | + "!python -m pip install -U langchain-oracledb langchain-huggingface sentence-transformers python-dotenv " |
| 36 | + ] |
| 37 | + }, |
| 38 | + { |
| 39 | + "cell_type": "code", |
| 40 | + "execution_count": 17, |
| 41 | + "id": "c6c10075", |
| 42 | + "metadata": {}, |
| 43 | + "outputs": [], |
| 44 | + "source": [ |
| 45 | + "import os\n", |
| 46 | + "import oracledb\n", |
| 47 | + "from dotenv import load_dotenv\n", |
30 | 48 | "from langchain_oracledb.vectorstores import oraclevs\n", |
31 | 49 | "from langchain_oracledb.vectorstores.oraclevs import OracleVS\n", |
32 | 50 | "from langchain_community.vectorstores.utils import DistanceStrategy\n", |
33 | 51 | "from langchain_core.documents import Document\n", |
34 | 52 | "from langchain_huggingface import HuggingFaceEmbeddings" |
35 | 53 | ] |
36 | 54 | }, |
| 55 | + { |
| 56 | + "cell_type": "markdown", |
| 57 | + "id": "7b960988", |
| 58 | + "metadata": {}, |
| 59 | + "source": [ |
| 60 | + "## Setting up the database connection\n", |
| 61 | + "\n", |
| 62 | + "For this notebook to work, we need to have the following environment variables set in a `.env` file or in your system environment:\n", |
| 63 | + "\n", |
| 64 | + "* `ORACLE_USERNAME`: Your Oracle database username.\n", |
| 65 | + "* `ORACLE_PASSWORD`: Your Oracle database password.\n", |
| 66 | + "\n", |
| 67 | + "You also need to store the Oracle Wallet files and reference its location when making the connection. You'll download the wallet from your Oracle Cloud account." |
| 68 | + ] |
| 69 | + }, |
37 | 70 | { |
38 | 71 | "cell_type": "code", |
39 | | - "execution_count": null, |
40 | | - "id": "1c06de86", |
41 | | - "metadata": { |
42 | | - "vscode": { |
43 | | - "languageId": "plaintext" |
44 | | - } |
45 | | - }, |
| 72 | + "execution_count": 18, |
| 73 | + "id": "245ac7b7", |
| 74 | + "metadata": {}, |
46 | 75 | "outputs": [], |
47 | 76 | "source": [ |
48 | | - "import oracledb\n", |
| 77 | + "load_dotenv()\n", |
| 78 | + "\n", |
| 79 | + "WALLET_DIRECTORY = \"../.wallet\"\n", |
| 80 | + "WALLET_PASSWORD = os.getenv(\"ORACLE_PASSWORD\")\n", |
49 | 81 | "\n", |
50 | 82 | "connection = oracledb.connect(\n", |
51 | | - " user=\"ORACLE USERNAME\", \n", |
52 | | - " password=\"ORACLE PASSWORD\", \n", |
| 83 | + " user=os.getenv(\"ORACLE_USERNAME\"),\n", |
| 84 | + " password=os.getenv(\"ORACLE_PASSWORD\"),\n", |
53 | 85 | " dsn=\"sample_low\",\n", |
54 | | - " config_dir=\"FOLDER CONTAINING WALLET DATA\",\n", |
55 | | - " wallet_location=\"FOLDER CONTAINING WALLET DATA\",\n", |
56 | | - " wallet_password=\"WALLET PASSWORD\",\n", |
| 86 | + " config_dir=WALLET_DIRECTORY,\n", |
| 87 | + " wallet_location=WALLET_DIRECTORY,\n", |
| 88 | + " wallet_password=WALLET_PASSWORD,\n", |
57 | 89 | ")" |
58 | 90 | ] |
59 | 91 | }, |
| 92 | + { |
| 93 | + "cell_type": "markdown", |
| 94 | + "id": "f09eea2f", |
| 95 | + "metadata": {}, |
| 96 | + "source": [ |
| 97 | + "## Preparing the knowledge base\n", |
| 98 | + "\n", |
| 99 | + "We want to create a knowledge base that the RAG application can query. For this example, we'll use a simple list of documents.\n", |
| 100 | + "\n", |
| 101 | + "We'll use Langchain's `Document` class to represent each document along with its metadata." |
| 102 | + ] |
| 103 | + }, |
60 | 104 | { |
61 | 105 | "cell_type": "code", |
62 | | - "execution_count": null, |
| 106 | + "execution_count": 19, |
63 | 107 | "id": "a9f058c0", |
64 | | - "metadata": { |
65 | | - "vscode": { |
66 | | - "languageId": "plaintext" |
67 | | - } |
68 | | - }, |
| 108 | + "metadata": {}, |
69 | 109 | "outputs": [], |
70 | 110 | "source": [ |
71 | 111 | "documents_json_list = [\n", |
|
83 | 123 | " \"id\": \"running-flows\",\n", |
84 | 124 | " \"text\": \"The Runner API allows you to start and manage Metaflow runs and other operations programmatically, for instance, to run flows in a script. The Runner class exposes a blocking API, which waits for operations to complete, as well as a non-blocking (asynchronous) APIs, prefixed with async which execute operations in the background. This document provides an overview of common patterns. For detailed API documentation, see the Runner API reference.\",\n", |
85 | 125 | " \"link\": \"https://docs.metaflow.org/metaflow/managing-flows/runner\",\n", |
86 | | - " }\n", |
87 | | - "]" |
88 | | - ] |
89 | | - }, |
90 | | - { |
91 | | - "cell_type": "code", |
92 | | - "execution_count": null, |
93 | | - "id": "e1d67dd3", |
94 | | - "metadata": { |
95 | | - "vscode": { |
96 | | - "languageId": "plaintext" |
97 | | - } |
98 | | - }, |
99 | | - "outputs": [], |
100 | | - "source": [ |
| 126 | + " },\n", |
| 127 | + " {\n", |
| 128 | + " \"id\": \"metaflow\",\n", |
| 129 | + " \"text\": \"This is another document that also covers information about the Runner API and how it exposes a blocking API.\",\n", |
| 130 | + " \"link\": \"https://docs.metaflow.org/\",\n", |
| 131 | + " },\n", |
| 132 | + "]\n", |
| 133 | + "\n", |
101 | 134 | "documents = []\n", |
102 | 135 | "\n", |
103 | 136 | "for doc in documents_json_list:\n", |
104 | 137 | " metadata = {\"id\": doc[\"id\"], \"link\": doc[\"link\"]}\n", |
105 | | - " \n", |
106 | | - " document = Document(\n", |
107 | | - " page_content=doc[\"text\"], \n", |
108 | | - " metadata=metadata\n", |
109 | | - " )\n", |
110 | | - "\n", |
| 138 | + " document = Document(page_content=doc[\"text\"], metadata=metadata)\n", |
111 | 139 | " documents.append(document)" |
112 | 140 | ] |
113 | 141 | }, |
| 142 | + { |
| 143 | + "cell_type": "markdown", |
| 144 | + "id": "e2c5041a", |
| 145 | + "metadata": {}, |
| 146 | + "source": [ |
| 147 | + "## Preparing embedding model\n", |
| 148 | + "\n", |
| 149 | + "We want to generate embeddings for our documents to store them in the Oracle vector database. For this example, we'll use a pre-trained model from Hugging Face via Langchain's `HuggingFaceEmbeddings` class." |
| 150 | + ] |
| 151 | + }, |
114 | 152 | { |
115 | 153 | "cell_type": "code", |
116 | | - "execution_count": null, |
| 154 | + "execution_count": 20, |
117 | 155 | "id": "9560eb7a", |
118 | | - "metadata": { |
119 | | - "vscode": { |
120 | | - "languageId": "plaintext" |
121 | | - } |
122 | | - }, |
| 156 | + "metadata": {}, |
123 | 157 | "outputs": [], |
124 | 158 | "source": [ |
125 | 159 | "model = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")" |
126 | 160 | ] |
127 | 161 | }, |
| 162 | + { |
| 163 | + "cell_type": "markdown", |
| 164 | + "id": "8347e4e6", |
| 165 | + "metadata": {}, |
| 166 | + "source": [ |
| 167 | + "## Ingesting documents into Oracle\n", |
| 168 | + "\n", |
| 169 | + "We can now ingest our documents into the Oracle vector database using a cosine similarity metric.\n", |
| 170 | + "\n", |
| 171 | + "If this is the first time you are running this notebook, the vector store and the `vector_store` table will be created automatically.\n" |
| 172 | + ] |
| 173 | + }, |
128 | 174 | { |
129 | 175 | "cell_type": "code", |
130 | | - "execution_count": null, |
| 176 | + "execution_count": 21, |
131 | 177 | "id": "a863751f", |
132 | | - "metadata": { |
133 | | - "vscode": { |
134 | | - "languageId": "plaintext" |
135 | | - } |
136 | | - }, |
| 178 | + "metadata": {}, |
137 | 179 | "outputs": [], |
138 | 180 | "source": [ |
139 | 181 | "vector_store = OracleVS.from_documents(\n", |
140 | 182 | " documents,\n", |
141 | 183 | " model,\n", |
142 | 184 | " client=connection,\n", |
143 | | - " table_name=\"Documents_COSINE\",\n", |
| 185 | + " table_name=\"vector_store\",\n", |
144 | 186 | " distance_strategy=DistanceStrategy.COSINE,\n", |
145 | 187 | ")" |
146 | 188 | ] |
147 | 189 | }, |
| 190 | + { |
| 191 | + "cell_type": "markdown", |
| 192 | + "id": "437b8613", |
| 193 | + "metadata": {}, |
| 194 | + "source": [ |
| 195 | + "## Creating the search index\n", |
| 196 | + "\n", |
| 197 | + "Now, we need to create the search index for the vector store.\n", |
| 198 | + "\n", |
| 199 | + "We'll create an HNSW index with parallel 16 and Target Accuracy Specification as 97 percent." |
| 200 | + ] |
| 201 | + }, |
148 | 202 | { |
149 | 203 | "cell_type": "code", |
150 | | - "execution_count": null, |
| 204 | + "execution_count": 23, |
151 | 205 | "id": "9f5ef45d", |
152 | | - "metadata": { |
153 | | - "vscode": { |
154 | | - "languageId": "plaintext" |
155 | | - } |
156 | | - }, |
| 206 | + "metadata": {}, |
157 | 207 | "outputs": [], |
158 | 208 | "source": [ |
159 | 209 | "oraclevs.create_index(\n", |
160 | 210 | " connection,\n", |
161 | 211 | " vector_store,\n", |
162 | 212 | " params={\n", |
163 | | - " \"idx_name\": \"hnsw_idx2\",\n", |
| 213 | + " \"idx_name\": \"vector_store_hnsw_idx2\",\n", |
164 | 214 | " \"idx_type\": \"HNSW\",\n", |
165 | 215 | " \"accuracy\": 97,\n", |
166 | 216 | " \"parallel\": 16,\n", |
167 | 217 | " },\n", |
168 | 218 | ")" |
169 | 219 | ] |
170 | 220 | }, |
| 221 | + { |
| 222 | + "cell_type": "markdown", |
| 223 | + "id": "e871dd1d", |
| 224 | + "metadata": {}, |
| 225 | + "source": [ |
| 226 | + "## Querying the vector store\n", |
| 227 | + "\n", |
| 228 | + "Finally, we can query the vector store to retrieve relevant documents based on a user's question. " |
| 229 | + ] |
| 230 | + }, |
171 | 231 | { |
172 | 232 | "cell_type": "code", |
173 | | - "execution_count": null, |
| 233 | + "execution_count": 28, |
174 | 234 | "id": "bd498a2e", |
175 | | - "metadata": { |
176 | | - "vscode": { |
177 | | - "languageId": "plaintext" |
| 235 | + "metadata": {}, |
| 236 | + "outputs": [ |
| 237 | + { |
| 238 | + "name": "stdout", |
| 239 | + "output_type": "stream", |
| 240 | + "text": [ |
| 241 | + "Top 2 relevant documents:\n", |
| 242 | + "\n", |
| 243 | + "0 This is another document that also covers information about the Runner API and how it exposes a blocking API. {'id': 'metaflow', 'link': 'https://docs.metaflow.org/'}\n", |
| 244 | + "1 The Runner API allows you to start and manage Metaflow runs and other operations programmatically, for instance, to run flows in a script. The Runner class exposes a blocking API, which waits for operations to complete, as well as a non-blocking (asynchronous) APIs, prefixed with async which execute operations in the background. This document provides an overview of common patterns. For detailed API documentation, see the Runner API reference. {'id': 'running-flows', 'link': 'https://docs.metaflow.org/metaflow/managing-flows/runner'}\n" |
| 245 | + ] |
178 | 246 | } |
179 | | - }, |
180 | | - "outputs": [], |
| 247 | + ], |
181 | 248 | "source": [ |
182 | 249 | "query = \"What is exposed by the Runner API?\"\n", |
183 | 250 | "result = vector_store.similarity_search(query, 2)\n", |
184 | | - "print(result)" |
| 251 | + "\n", |
| 252 | + "print(\"Top 2 relevant documents:\\n\")\n", |
| 253 | + "\n", |
| 254 | + "for index, doc in enumerate(result):\n", |
| 255 | + " print(index, doc.page_content, doc.metadata)" |
185 | 256 | ] |
| 257 | + }, |
| 258 | + { |
| 259 | + "cell_type": "code", |
| 260 | + "execution_count": null, |
| 261 | + "id": "5ff174cd", |
| 262 | + "metadata": {}, |
| 263 | + "outputs": [], |
| 264 | + "source": [] |
186 | 265 | } |
187 | 266 | ], |
188 | 267 | "metadata": { |
| 268 | + "kernelspec": { |
| 269 | + "display_name": "oracle-ai-developer-hub (3.12.9)", |
| 270 | + "language": "python", |
| 271 | + "name": "python3" |
| 272 | + }, |
189 | 273 | "language_info": { |
190 | | - "name": "python" |
| 274 | + "codemirror_mode": { |
| 275 | + "name": "ipython", |
| 276 | + "version": 3 |
| 277 | + }, |
| 278 | + "file_extension": ".py", |
| 279 | + "mimetype": "text/x-python", |
| 280 | + "name": "python", |
| 281 | + "nbconvert_exporter": "python", |
| 282 | + "pygments_lexer": "ipython3", |
| 283 | + "version": "3.12.9" |
191 | 284 | } |
192 | 285 | }, |
193 | 286 | "nbformat": 4, |
|
0 commit comments