ℹ Chroma can be run in-memory in Python (without Docker), but this feature is not yet available in other languages. To use this library you either need a hosted or local version of ChromaDB running.
If you can run docker-compose up -d --build
you can run Chroma.
git clone https://github.com/chroma-core/chroma.git
cd chroma
# Run a ChromaDB instance at localhost:8000
docker-compose up -d --build
More information about deploying Chroma to production can be found here.
cargo add chromadb
The library crate can be found at crates.io.
The library reference can be found here.
client
- To interface with the ChromaDB server.collection
- To interface with an associated ChromaDB collection.
You can connect to ChromaDB by instantiating a ChromaClient
use chromadb::v2::ChromaClient;
use chromadb::v2::collection::{ChromaCollection, GetQuery, GetResult, CollectionEntries};
use serde_json::json;
// With default ChromaClientOptions
// Defaults to http://localhost:8000
let client: ChromaClient = ChromaClient::new(Default::default());
// With custom ChromaClientOptions
let client: ChromaClient = ChromaClient::new(ChromaClientOptions { url: "<CHROMADB_URL>".into() });
// Get or create a collection with the given name and no metadata.
let collection: ChromaCollection = client.get_or_create_collection("my_collection", None).await?;
// Get the UUID of the collection
let collection_uuid = collection.id();
println!("Collection UUID: {}", collection_uuid);
// Upsert some embeddings with documents and no metadata.
let collection_entries = CollectionEntries {
ids: vec!["demo-id-1".into(), "demo-id-2".into()],
embeddings: Some(vec![vec![0.0_f32; 768], vec![0.0_f32; 768]]),
metadatas: None,
documents: Some(vec![
"Some document about 9 octopus recipies".into(),
"Some other document about DCEU Superman Vs CW Superman".into()
])
};
let result: bool = collection.upsert(collection_entries, None).await?;
// Create a filter object to filter by document content.
let where_document = json!({
"$contains": "Superman"
});
// Get embeddings from a collection with filters and limit set to 1.
// An empty IDs vec will return all embeddings.
let get_query = GetQuery {
ids: vec![],
where_metadata: None,
limit: Some(1),
offset: None,
where_document: Some(where_document),
include: Some(vec!["documents".into(),"embeddings".into()])
};
let get_result: GetResult = collection.get(get_query).await?;
println!("Get result: {:?}", get_result);
Find more information about the available filters and options in the get() documentation.
//Instantiate QueryOptions to perform a similarity search on the collection
//Alternatively, an embedding_function can also be provided with query_texts to perform the search
let query = QueryOptions {
query_texts: None,
query_embeddings: Some(vec![vec![0.0_f32; 768], vec![0.0_f32; 768]]),
where_metadata: None,
where_document: None,
n_results: Some(5),
include: None,
};
let query_result: QueryResult = collection.query(query, None).await?;
println!("Query result: {:?}", query_result);
This crate has built-in support for OpenAI and SBERT embeddings.
To use OpenAI embeddings, enable the openai
feature in your Cargo.toml.
let collection: ChromaCollection = client.get_or_create_collection("openai_collection", None).await?;
let collection_entries = CollectionEntries {
ids: vec!["demo-id-1", "demo-id-2"],
embeddings: None,
metadatas: None,
documents: Some(vec![
"Some document about 9 octopus recipies",
"Some other document about DCEU Superman Vs CW Superman"])
};
// Use OpenAI embeddings
let openai_embeddings = OpenAIEmbeddings::new(Default::default());
collection.upsert(collection_entries, Some(Box::new(openai_embeddings))).await?;
OpenSauced provides insights into open source projects by using data science in git commits.
MIT © 2023