|
17 | 17 |
|
18 | 18 |
|
19 | 19 | from google.cloud import aiplatform
|
20 |
| - |
| 20 | +from google.cloud.aiplatform_v1beta1 import ( |
| 21 | + GoogleDriveSource, |
| 22 | + ImportRagFilesConfig, |
| 23 | + ImportRagFilesRequest, |
| 24 | + ImportRagFilesResponse, |
| 25 | + JiraSource as GapicJiraSource, |
| 26 | + RagContexts, |
| 27 | + RagCorpus as GapicRagCorpus, |
| 28 | + RagEngineConfig as GapicRagEngineConfig, |
| 29 | + RagFileChunkingConfig, |
| 30 | + RagFileParsingConfig, |
| 31 | + RagFileTransformationConfig, |
| 32 | + RagFile as GapicRagFile, |
| 33 | + RagManagedDbConfig as GapicRagManagedDbConfig, |
| 34 | + RagVectorDbConfig as GapicRagVectorDbConfig, |
| 35 | + RetrieveContextsResponse, |
| 36 | + SharePointSources as GapicSharePointSources, |
| 37 | + SlackSource as GapicSlackSource, |
| 38 | + VertexAiSearchConfig as GapicVertexAiSearchConfig, |
| 39 | +) |
| 40 | +from google.cloud.aiplatform_v1beta1.types import api_auth |
21 | 41 | from vertexai.preview.rag import (
|
| 42 | + ANN, |
| 43 | + Basic, |
22 | 44 | EmbeddingModelConfig,
|
| 45 | + Enterprise, |
23 | 46 | Filter,
|
24 | 47 | HybridSearch,
|
| 48 | + JiraQuery, |
| 49 | + JiraSource, |
| 50 | + KNN, |
25 | 51 | LayoutParserConfig,
|
26 | 52 | LlmParserConfig,
|
27 | 53 | LlmRanker,
|
28 | 54 | Pinecone,
|
29 | 55 | RagCorpus,
|
| 56 | + RagEmbeddingModelConfig, |
| 57 | + RagEngineConfig, |
30 | 58 | RagFile,
|
| 59 | + RagManagedDb, |
| 60 | + RagManagedDbConfig, |
31 | 61 | RagResource,
|
32 | 62 | RagRetrievalConfig,
|
33 |
| - Ranking, |
| 63 | + RagVectorDbConfig, |
34 | 64 | RankService,
|
| 65 | + Ranking, |
35 | 66 | SharePointSource,
|
36 | 67 | SharePointSources,
|
37 |
| - SlackChannelsSource, |
38 | 68 | SlackChannel,
|
39 |
| - JiraSource, |
40 |
| - JiraQuery, |
41 |
| - Weaviate, |
| 69 | + SlackChannelsSource, |
42 | 70 | VertexAiSearchConfig,
|
43 |
| - VertexVectorSearch, |
44 | 71 | VertexFeatureStore,
|
45 |
| - RagEmbeddingModelConfig, |
46 | 72 | VertexPredictionEndpoint,
|
47 |
| - RagVectorDbConfig, |
48 |
| - RagManagedDbConfig, |
49 |
| - RagEngineConfig, |
50 |
| - Basic, |
51 |
| - Enterprise, |
52 |
| -) |
53 |
| -from google.cloud.aiplatform_v1beta1 import ( |
54 |
| - GoogleDriveSource, |
55 |
| - RagFileChunkingConfig, |
56 |
| - RagFileTransformationConfig, |
57 |
| - RagFileParsingConfig, |
58 |
| - ImportRagFilesConfig, |
59 |
| - ImportRagFilesRequest, |
60 |
| - ImportRagFilesResponse, |
61 |
| - JiraSource as GapicJiraSource, |
62 |
| - RagCorpus as GapicRagCorpus, |
63 |
| - RagFile as GapicRagFile, |
64 |
| - SharePointSources as GapicSharePointSources, |
65 |
| - SlackSource as GapicSlackSource, |
66 |
| - RagContexts, |
67 |
| - RagManagedDbConfig as GapicRagManagedDbConfig, |
68 |
| - RagEngineConfig as GapicRagEngineConfig, |
69 |
| - RetrieveContextsResponse, |
70 |
| - RagVectorDbConfig as GapicRagVectorDbConfig, |
71 |
| - VertexAiSearchConfig as GapicVertexAiSearchConfig, |
| 73 | + VertexVectorSearch, |
| 74 | + Weaviate, |
72 | 75 | )
|
73 |
| -from google.cloud.aiplatform_v1beta1.types import api_auth |
74 | 76 | from google.protobuf import timestamp_pb2
|
75 | 77 |
|
76 | 78 |
|
|
102 | 104 | index_name=TEST_PINECONE_INDEX_NAME,
|
103 | 105 | api_key=TEST_PINECONE_API_KEY_SECRET_VERSION,
|
104 | 106 | )
|
| 107 | +TEST_RAG_MANAGED_DB_ANN_TREE_DEPTH = 3 |
| 108 | +TEST_RAG_MANAGED_DB_ANN_LEAF_COUNT = 100 |
| 109 | +TEST_RAG_MANAGED_DB_CONFIG = RagManagedDb() |
| 110 | +TEST_RAG_MANAGED_DB_KNN_CONFIG = RagManagedDb( |
| 111 | + retrieval_strategy=KNN(), |
| 112 | +) |
| 113 | +TEST_RAG_MANAGED_DB_ANN_CONFIG = RagManagedDb( |
| 114 | + retrieval_strategy=ANN( |
| 115 | + tree_depth=TEST_RAG_MANAGED_DB_ANN_TREE_DEPTH, |
| 116 | + leaf_count=TEST_RAG_MANAGED_DB_ANN_LEAF_COUNT, |
| 117 | + ), |
| 118 | +) |
105 | 119 | TEST_VERTEX_VECTOR_SEARCH_INDEX_ENDPOINT = "test-vector-search-index-endpoint"
|
106 | 120 | TEST_VERTEX_VECTOR_SEARCH_INDEX = "test-vector-search-index"
|
107 | 121 | TEST_VERTEX_VECTOR_SEARCH_CONFIG = VertexVectorSearch(
|
|
169 | 183 | ),
|
170 | 184 | ),
|
171 | 185 | )
|
| 186 | +TEST_GAPIC_RAG_CORPUS_RAG_MANAGED_DB = GapicRagCorpus( |
| 187 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 188 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 189 | + description=TEST_CORPUS_DISCRIPTION, |
| 190 | + rag_vector_db_config=GapicRagVectorDbConfig( |
| 191 | + rag_managed_db=GapicRagVectorDbConfig.RagManagedDb() |
| 192 | + ), |
| 193 | +) |
| 194 | +TEST_GAPIC_RAG_CORPUS_RAG_MANAGED_DB_KNN = GapicRagCorpus( |
| 195 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 196 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 197 | + description=TEST_CORPUS_DISCRIPTION, |
| 198 | + rag_vector_db_config=GapicRagVectorDbConfig( |
| 199 | + rag_managed_db=GapicRagVectorDbConfig.RagManagedDb( |
| 200 | + knn=GapicRagVectorDbConfig.RagManagedDb.KNN() |
| 201 | + ) |
| 202 | + ), |
| 203 | +) |
| 204 | +TEST_GAPIC_RAG_CORPUS_RAG_MANAGED_DB_ANN = GapicRagCorpus( |
| 205 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 206 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 207 | + description=TEST_CORPUS_DISCRIPTION, |
| 208 | + rag_vector_db_config=GapicRagVectorDbConfig( |
| 209 | + rag_managed_db=GapicRagVectorDbConfig.RagManagedDb( |
| 210 | + ann=GapicRagVectorDbConfig.RagManagedDb.ANN( |
| 211 | + tree_depth=TEST_RAG_MANAGED_DB_ANN_TREE_DEPTH, |
| 212 | + leaf_count=TEST_RAG_MANAGED_DB_ANN_LEAF_COUNT, |
| 213 | + ) |
| 214 | + ) |
| 215 | + ), |
| 216 | +) |
172 | 217 | TEST_EMBEDDING_MODEL_CONFIG = EmbeddingModelConfig(
|
173 | 218 | publisher_model="publishers/google/models/textembedding-gecko",
|
174 | 219 | )
|
175 | 220 | TEST_RAG_EMBEDDING_MODEL_CONFIG = RagEmbeddingModelConfig(
|
176 | 221 | vertex_prediction_endpoint=VertexPredictionEndpoint(
|
177 |
| - publisher_model="publishers/google/models/textembedding-gecko", |
| 222 | + publisher_model="projects/{}/locations/{}/publishers/google/models/textembedding-gecko".format( |
| 223 | + TEST_PROJECT, TEST_REGION |
| 224 | + ), |
178 | 225 | ),
|
179 | 226 | )
|
180 | 227 | TEST_BACKEND_CONFIG_EMBEDDING_MODEL_CONFIG = RagVectorDbConfig(
|
|
207 | 254 | description=TEST_CORPUS_DISCRIPTION,
|
208 | 255 | vector_db=TEST_PINECONE_CONFIG,
|
209 | 256 | )
|
| 257 | +TEST_RAG_CORPUS_RAG_MANAGED_DB = RagCorpus( |
| 258 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 259 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 260 | + vector_db=TEST_RAG_MANAGED_DB_CONFIG, |
| 261 | +) |
| 262 | +TEST_RAG_CORPUS_RAG_MANAGED_DB_KNN = RagCorpus( |
| 263 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 264 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 265 | + vector_db=TEST_RAG_MANAGED_DB_KNN_CONFIG, |
| 266 | +) |
| 267 | +TEST_RAG_CORPUS_RAG_MANAGED_DB_ANN = RagCorpus( |
| 268 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 269 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 270 | + vector_db=TEST_RAG_MANAGED_DB_ANN_CONFIG, |
| 271 | +) |
210 | 272 | TEST_RAG_CORPUS_VERTEX_VECTOR_SEARCH = RagCorpus(
|
211 | 273 | name=TEST_RAG_CORPUS_RESOURCE_NAME,
|
212 | 274 | display_name=TEST_CORPUS_DISPLAY_NAME,
|
|
247 | 309 | ),
|
248 | 310 | ),
|
249 | 311 | )
|
| 312 | +TEST_GAPIC_RAG_CORPUS_RAG_MANAGED_DB_BACKEND_CONFIG = GapicRagCorpus( |
| 313 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 314 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 315 | + description=TEST_CORPUS_DISCRIPTION, |
| 316 | + vector_db_config=GapicRagVectorDbConfig( |
| 317 | + rag_managed_db=GapicRagVectorDbConfig.RagManagedDb() |
| 318 | + ), |
| 319 | +) |
| 320 | +TEST_GAPIC_RAG_CORPUS_RAG_MANAGED_DB_KNN_BACKEND_CONFIG = GapicRagCorpus( |
| 321 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 322 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 323 | + description=TEST_CORPUS_DISCRIPTION, |
| 324 | + vector_db_config=GapicRagVectorDbConfig( |
| 325 | + rag_managed_db=GapicRagVectorDbConfig.RagManagedDb( |
| 326 | + knn=GapicRagVectorDbConfig.RagManagedDb.KNN() |
| 327 | + ) |
| 328 | + ), |
| 329 | +) |
| 330 | +TEST_GAPIC_RAG_CORPUS_RAG_MANAGED_DB_ANN_BACKEND_CONFIG = GapicRagCorpus( |
| 331 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 332 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 333 | + description=TEST_CORPUS_DISCRIPTION, |
| 334 | + vector_db_config=GapicRagVectorDbConfig( |
| 335 | + rag_managed_db=GapicRagVectorDbConfig.RagManagedDb( |
| 336 | + ann=GapicRagVectorDbConfig.RagManagedDb.ANN( |
| 337 | + tree_depth=TEST_RAG_MANAGED_DB_ANN_TREE_DEPTH, |
| 338 | + leaf_count=TEST_RAG_MANAGED_DB_ANN_LEAF_COUNT, |
| 339 | + ) |
| 340 | + ) |
| 341 | + ), |
| 342 | +) |
250 | 343 | TEST_RAG_CORPUS_BACKEND = RagCorpus(
|
251 | 344 | name=TEST_RAG_CORPUS_RESOURCE_NAME,
|
252 | 345 | display_name=TEST_CORPUS_DISPLAY_NAME,
|
|
255 | 348 | TEST_BACKEND_CONFIG_PINECONE_CONFIG = RagVectorDbConfig(
|
256 | 349 | vector_db=TEST_PINECONE_CONFIG,
|
257 | 350 | )
|
| 351 | +TEST_BACKEND_CONFIG_RAG_MANAGED_DB_CONFIG = RagVectorDbConfig( |
| 352 | + vector_db=TEST_RAG_MANAGED_DB_CONFIG, |
| 353 | +) |
| 354 | +TEST_BACKEND_CONFIG_RAG_MANAGED_DB_KNN_CONFIG = RagVectorDbConfig( |
| 355 | + vector_db=TEST_RAG_MANAGED_DB_KNN_CONFIG, |
| 356 | +) |
| 357 | +TEST_BACKEND_CONFIG_RAG_MANAGED_DB_ANN_CONFIG = RagVectorDbConfig( |
| 358 | + vector_db=TEST_RAG_MANAGED_DB_ANN_CONFIG, |
| 359 | +) |
258 | 360 | TEST_RAG_CORPUS_PINECONE_BACKEND = RagCorpus(
|
259 | 361 | name=TEST_RAG_CORPUS_RESOURCE_NAME,
|
260 | 362 | display_name=TEST_CORPUS_DISPLAY_NAME,
|
261 | 363 | description=TEST_CORPUS_DISCRIPTION,
|
262 | 364 | backend_config=TEST_BACKEND_CONFIG_PINECONE_CONFIG,
|
263 | 365 | )
|
| 366 | +TEST_RAG_CORPUS_RAG_MANAGED_DB_BACKEND = RagCorpus( |
| 367 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 368 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 369 | + backend_config=TEST_BACKEND_CONFIG_RAG_MANAGED_DB_CONFIG, |
| 370 | +) |
| 371 | +TEST_RAG_CORPUS_RAG_MANAGED_DB_KNN_BACKEND = RagCorpus( |
| 372 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 373 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 374 | + backend_config=TEST_BACKEND_CONFIG_RAG_MANAGED_DB_KNN_CONFIG, |
| 375 | +) |
| 376 | +TEST_RAG_CORPUS_RAG_MANAGED_DB_ANN_BACKEND = RagCorpus( |
| 377 | + name=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 378 | + display_name=TEST_CORPUS_DISPLAY_NAME, |
| 379 | + backend_config=TEST_BACKEND_CONFIG_RAG_MANAGED_DB_ANN_CONFIG, |
| 380 | +) |
264 | 381 | TEST_BACKEND_CONFIG_VERTEX_VECTOR_SEARCH_CONFIG = RagVectorDbConfig(
|
265 | 382 | vector_db=TEST_VERTEX_VECTOR_SEARCH_CONFIG,
|
266 | 383 | )
|
|
343 | 460 | # GCS
|
344 | 461 | TEST_IMPORT_FILES_CONFIG_GCS = ImportRagFilesConfig(
|
345 | 462 | rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
|
| 463 | + rebuild_ann_index=False, |
| 464 | +) |
| 465 | +TEST_IMPORT_FILES_CONFIG_GCS_REBUILD_ANN_INDEX = ImportRagFilesConfig( |
| 466 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
| 467 | + rebuild_ann_index=True, |
| 468 | +) |
| 469 | +TEST_IMPORT_FILES_CONFIG_GCS_REBUILD_ANN_INDEX.gcs_source.uris = [TEST_GCS_PATH] |
| 470 | +TEST_IMPORT_FILES_CONFIG_GCS_REBUILD_ANN_INDEX.rag_file_parsing_config.advanced_parser.use_advanced_pdf_parsing = ( |
| 471 | + False |
346 | 472 | )
|
347 | 473 | TEST_IMPORT_FILES_CONFIG_GCS.gcs_source.uris = [TEST_GCS_PATH]
|
348 | 474 | TEST_IMPORT_FILES_CONFIG_GCS.rag_file_parsing_config.advanced_parser.use_advanced_pdf_parsing = (
|
|
352 | 478 | parent=TEST_RAG_CORPUS_RESOURCE_NAME,
|
353 | 479 | import_rag_files_config=TEST_IMPORT_FILES_CONFIG_GCS,
|
354 | 480 | )
|
| 481 | +TEST_IMPORT_REQUEST_GCS_REBUILD_ANN_INDEX = ImportRagFilesRequest( |
| 482 | + parent=TEST_RAG_CORPUS_RESOURCE_NAME, |
| 483 | + import_rag_files_config=TEST_IMPORT_FILES_CONFIG_GCS_REBUILD_ANN_INDEX, |
| 484 | +) |
355 | 485 | # Google Drive folders
|
356 | 486 | TEST_DRIVE_FOLDER_ID = "123"
|
357 | 487 | TEST_DRIVE_FOLDER = (
|
|
362 | 492 | )
|
363 | 493 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig(
|
364 | 494 | rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
|
| 495 | + rebuild_ann_index=False, |
365 | 496 | )
|
366 | 497 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.google_drive_source.resource_ids = [
|
367 | 498 | GoogleDriveSource.ResourceId(
|
|
374 | 505 | )
|
375 | 506 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING = ImportRagFilesConfig(
|
376 | 507 | rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
|
| 508 | + rebuild_ann_index=False, |
377 | 509 | )
|
378 | 510 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING.google_drive_source.resource_ids = [
|
379 | 511 | GoogleDriveSource.ResourceId(
|
|
432 | 564 | use_advanced_pdf_parsing=False
|
433 | 565 | )
|
434 | 566 | ),
|
| 567 | + rebuild_ann_index=False, |
435 | 568 | )
|
436 | 569 | TEST_IMPORT_FILES_CONFIG_DRIVE_FILE.max_embedding_requests_per_min = 800
|
437 | 570 |
|
|
491 | 624 | TEST_IMPORT_FILES_CONFIG_SLACK_SOURCE = ImportRagFilesConfig(
|
492 | 625 | rag_file_parsing_config=TEST_RAG_FILE_PARSING_CONFIG,
|
493 | 626 | rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
|
| 627 | + rebuild_ann_index=False, |
494 | 628 | )
|
495 | 629 | TEST_IMPORT_FILES_CONFIG_SLACK_SOURCE.slack_source.channels = [
|
496 | 630 | GapicSlackSource.SlackChannels(
|
|
544 | 678 | TEST_IMPORT_FILES_CONFIG_JIRA_SOURCE = ImportRagFilesConfig(
|
545 | 679 | rag_file_parsing_config=TEST_RAG_FILE_PARSING_CONFIG,
|
546 | 680 | rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
|
| 681 | + rebuild_ann_index=False, |
547 | 682 | )
|
548 | 683 | TEST_IMPORT_FILES_CONFIG_JIRA_SOURCE.jira_source.jira_queries = [
|
549 | 684 | GapicJiraSource.JiraQueries(
|
|
591 | 726 | )
|
592 | 727 | ]
|
593 | 728 | ),
|
| 729 | + rebuild_ann_index=False, |
594 | 730 | )
|
595 | 731 |
|
596 | 732 | TEST_IMPORT_REQUEST_SHARE_POINT_SOURCE = ImportRagFilesRequest(
|
|
681 | 817 | )
|
682 | 818 | ]
|
683 | 819 | ),
|
| 820 | + rebuild_ann_index=False, |
684 | 821 | )
|
685 | 822 |
|
686 | 823 | TEST_IMPORT_REQUEST_SHARE_POINT_SOURCE_NO_FOLDERS = ImportRagFilesRequest(
|
|
0 commit comments