@@ -178,7 +178,7 @@ curl -X POST 'http://0.0.0.0:4000/v1/vector_stores/my-collection-name/search' \
178178| Guardrails | ❌ Not Yet Supported | Guardrails are not currently supported for vector stores |
179179| Cost Tracking | ✅ Supported | Cost is $0 for Milvus searches |
180180| Unified API | ✅ Supported | Call via OpenAI compatible ` /v1/vector_stores/search ` endpoint |
181- | Passthrough | ❌ Not yet supported | |
181+ | Passthrough | ✅ Supported | Use native Milvus API format |
182182
183183## Response Format
184184
@@ -208,6 +208,313 @@ The response follows the standard LiteLLM vector store format:
208208}
209209```
210210
211+ ## Passthrough API (Native Milvus Format)
212+
213+ Use this to allow developers to ** create** and ** search** vector stores using the native Milvus API format, without giving them the Milvus credentials.
214+
215+ This is for the proxy only.
216+
217+ ### Admin Flow
218+
219+ #### 1. Add the vector store to LiteLLM
220+
221+ ``` yaml
222+ model_list :
223+ - model_name : embedding-model
224+ litellm_params :
225+ model : azure/text-embedding-3-large
226+ api_base : https://your-endpoint.cognitiveservices.azure.com/
227+ api_key : os.environ/AZURE_API_KEY
228+ api_version : " 2025-09-01"
229+
230+ vector_store_registry :
231+ - vector_store_name : " milvus-store"
232+ litellm_params :
233+ vector_store_id : " can-be-anything" # vector store id can be anything for the purpose of passthrough api
234+ custom_llm_provider : " milvus"
235+ api_key : os.environ/MILVUS_API_KEY
236+ api_base : https://your-milvus-instance.milvus.io
237+
238+ general_settings :
239+ database_url : " postgresql://user:password@host:port/database"
240+ master_key : " sk-1234"
241+ ` ` `
242+
243+ Add your vector store credentials to LiteLLM.
244+
245+ #### 2. Start the proxy
246+
247+ ` ` ` bash
248+ litellm --config /path/to/config.yaml
249+
250+ # RUNNING on http://0.0.0.0:4000
251+ ```
252+
253+ #### 3. Create a virtual index
254+
255+ ``` bash
256+ curl -L -X POST ' http://0.0.0.0:4000/v1/indexes' \
257+ -H ' Content-Type: application/json' \
258+ -H ' Authorization: Bearer sk-1234' \
259+ -d ' {
260+ "index_name": "dall-e-6",
261+ "litellm_params": {
262+ "vector_store_index": "real-collection-name",
263+ "vector_store_name": "milvus-store"
264+ }
265+ }'
266+ ```
267+
268+ This is a virtual index, which the developer can use to create and search vector stores.
269+
270+ #### 4. Create a key with the vector store permissions
271+
272+ ``` bash
273+ curl -L -X POST ' http://0.0.0.0:4000/key/generate' \
274+ -H ' Content-Type: application/json' \
275+ -H ' Authorization: Bearer sk-1234' \
276+ -d ' {
277+ "allowed_vector_store_indexes": [{"index_name": "dall-e-6", "index_permissions": ["write", "read"]}],
278+ "models": ["embedding-model"]
279+ }'
280+ ```
281+
282+ Give the key access to the virtual index and the embedding model.
283+
284+ ** Expected response**
285+
286+ ``` json
287+ {
288+ "key" : " sk-my-virtual-key"
289+ }
290+ ```
291+
292+ ### Developer Flow
293+
294+ #### 1. Create a collection with schema
295+
296+ Note: Use the ` /milvus ` endpoint for the passthrough api that uses the ` milvus ` provider in your config.
297+
298+ ``` python
299+ from milvus_rest_client import MilvusRESTClient, DataType
300+ import random
301+ import time
302+
303+ # Configuration
304+ uri = " http://0.0.0.0:4000/milvus" # IMPORTANT: Use the '/milvus' endpoint for passthrough
305+ token = " sk-my-virtual-key"
306+ collection_name = " dall-e-6" # Virtual index name
307+
308+ # Initialize client
309+ milvus_client = MilvusRESTClient(uri = uri, token = token)
310+ print (f " Connected to DB: { uri} successfully " )
311+
312+ # Check if the collection exists and drop if it does
313+ check_collection = milvus_client.has_collection(collection_name)
314+ if check_collection:
315+ milvus_client.drop_collection(collection_name)
316+ print (f " Dropped the existing collection { collection_name} successfully " )
317+
318+ # Define schema
319+ dim = 64 # Vector dimension
320+
321+ print (" Start to create the collection schema" )
322+ schema = milvus_client.create_schema()
323+ schema.add_field(
324+ " book_id" , DataType.INT64 , is_primary = True , description = " customized primary id"
325+ )
326+ schema.add_field(" word_count" , DataType.INT64 , description = " word count" )
327+ schema.add_field(
328+ " book_intro" , DataType.FLOAT_VECTOR , dim = dim, description = " book introduction"
329+ )
330+
331+ # Prepare index parameters
332+ print (" Start to prepare index parameters with default AUTOINDEX" )
333+ index_params = milvus_client.prepare_index_params()
334+ index_params.add_index(" book_intro" , metric_type = " L2" )
335+
336+ # Create collection
337+ print (f " Start to create example collection: { collection_name} " )
338+ milvus_client.create_collection(
339+ collection_name, schema = schema, index_params = index_params
340+ )
341+ collection_property = milvus_client.describe_collection(collection_name)
342+ print (" Collection details: %s " % collection_property)
343+ ```
344+
345+ #### 2. Insert data into the collection
346+
347+ ``` python
348+ # Insert data with customized ids
349+ nb = 1000
350+ insert_rounds = 2
351+ start = 0 # first primary key id
352+ total_rt = 0 # total response time for insert
353+
354+ print (
355+ f " Start to insert { nb* insert_rounds} entities into example collection: { collection_name} "
356+ )
357+ for i in range (insert_rounds):
358+ vector = [random.random() for _ in range (dim)]
359+ rows = [
360+ {" book_id" : i, " word_count" : random.randint(1 , 100 ), " book_intro" : vector}
361+ for i in range (start, start + nb)
362+ ]
363+ t0 = time.time()
364+ milvus_client.insert(collection_name, rows)
365+ ins_rt = time.time() - t0
366+ start += nb
367+ total_rt += ins_rt
368+ print (f " Insert completed in { round (total_rt, 4 )} seconds " )
369+
370+ # Flush the collection
371+ print (" Start to flush" )
372+ start_flush = time.time()
373+ milvus_client.flush(collection_name)
374+ end_flush = time.time()
375+ print (f " Flush completed in { round (end_flush - start_flush, 4 )} seconds " )
376+ ```
377+
378+ #### 3. Search the collection
379+
380+ ``` python
381+ # Search configuration
382+ nq = 3 # Number of query vectors
383+ search_params = {" metric_type" : " L2" , " params" : {" level" : 2 }}
384+ limit = 2 # Number of results to return
385+
386+ # Perform searches
387+ for i in range (5 ):
388+ search_vectors = [[random.random() for _ in range (dim)] for _ in range (nq)]
389+ t0 = time.time()
390+ results = milvus_client.search(
391+ collection_name,
392+ data = search_vectors,
393+ limit = limit,
394+ search_params = search_params,
395+ anns_field = " book_intro" ,
396+ )
397+ t1 = time.time()
398+ print (f " Search { i} results: { results} " )
399+ print (f " Search { i} latency: { round (t1- t0, 4 )} seconds " )
400+ ```
401+
402+ #### Complete Example
403+
404+ Here's a full working example:
405+
406+ ``` python
407+ from milvus_rest_client import MilvusRESTClient, DataType
408+ import random
409+ import time
410+
411+ # ----------------------------
412+ # 🔐 CONFIGURATION
413+ # ----------------------------
414+ uri = " http://0.0.0.0:4000/milvus" # IMPORTANT: Use the '/milvus' endpoint
415+ token = " sk-my-virtual-key"
416+ collection_name = " dall-e-6" # Your virtual index name
417+
418+ # ----------------------------
419+ # 📋 STEP 1 — Initialize Client
420+ # ----------------------------
421+ milvus_client = MilvusRESTClient(uri = uri, token = token)
422+ print (f " ✅ Connected to DB: { uri} successfully " )
423+
424+ # ----------------------------
425+ # 🗑️ STEP 2 — Drop Existing Collection (if needed)
426+ # ----------------------------
427+ check_collection = milvus_client.has_collection(collection_name)
428+ if check_collection:
429+ milvus_client.drop_collection(collection_name)
430+ print (f " 🗑️ Dropped the existing collection { collection_name} successfully " )
431+
432+ # ----------------------------
433+ # 📐 STEP 3 — Create Collection Schema
434+ # ----------------------------
435+ dim = 64 # Vector dimension
436+
437+ print (" 📐 Creating the collection schema" )
438+ schema = milvus_client.create_schema()
439+ schema.add_field(
440+ " book_id" , DataType.INT64 , is_primary = True , description = " customized primary id"
441+ )
442+ schema.add_field(" word_count" , DataType.INT64 , description = " word count" )
443+ schema.add_field(
444+ " book_intro" , DataType.FLOAT_VECTOR , dim = dim, description = " book introduction"
445+ )
446+
447+ # ----------------------------
448+ # 🔍 STEP 4 — Create Index
449+ # ----------------------------
450+ print (" 🔍 Preparing index parameters with default AUTOINDEX" )
451+ index_params = milvus_client.prepare_index_params()
452+ index_params.add_index(" book_intro" , metric_type = " L2" )
453+
454+ # ----------------------------
455+ # 🏗️ STEP 5 — Create Collection
456+ # ----------------------------
457+ print (f " 🏗️ Creating collection: { collection_name} " )
458+ milvus_client.create_collection(
459+ collection_name, schema = schema, index_params = index_params
460+ )
461+ collection_property = milvus_client.describe_collection(collection_name)
462+ print (f " ✅ Collection created: { collection_property} " )
463+
464+ # ----------------------------
465+ # 📤 STEP 6 — Insert Data
466+ # ----------------------------
467+ nb = 1000
468+ insert_rounds = 2
469+ start = 0
470+ total_rt = 0
471+
472+ print (f " 📤 Inserting { nb* insert_rounds} entities into collection " )
473+ for i in range (insert_rounds):
474+ vector = [random.random() for _ in range (dim)]
475+ rows = [
476+ {" book_id" : i, " word_count" : random.randint(1 , 100 ), " book_intro" : vector}
477+ for i in range (start, start + nb)
478+ ]
479+ t0 = time.time()
480+ milvus_client.insert(collection_name, rows)
481+ ins_rt = time.time() - t0
482+ start += nb
483+ total_rt += ins_rt
484+ print (f " ✅ Insert completed in { round (total_rt, 4 )} seconds " )
485+
486+ # ----------------------------
487+ # 💾 STEP 7 — Flush Collection
488+ # ----------------------------
489+ print (" 💾 Flushing collection" )
490+ start_flush = time.time()
491+ milvus_client.flush(collection_name)
492+ end_flush = time.time()
493+ print (f " ✅ Flush completed in { round (end_flush - start_flush, 4 )} seconds " )
494+
495+ # ----------------------------
496+ # 🔍 STEP 8 — Search
497+ # ----------------------------
498+ nq = 3
499+ search_params = {" metric_type" : " L2" , " params" : {" level" : 2 }}
500+ limit = 2
501+
502+ print (f " 🔍 Performing { 5 } search operations " )
503+ for i in range (5 ):
504+ search_vectors = [[random.random() for _ in range (dim)] for _ in range (nq)]
505+ t0 = time.time()
506+ results = milvus_client.search(
507+ collection_name,
508+ data = search_vectors,
509+ limit = limit,
510+ search_params = search_params,
511+ anns_field = " book_intro" ,
512+ )
513+ t1 = time.time()
514+ print (f " ✅ Search { i} results: { results} " )
515+ print (f " Search { i} latency: { round (t1- t0, 4 )} seconds " )
516+ ```
517+
211518## How It Works
212519
213520When you search:
0 commit comments