Skip to content

Commit 5c2e17e

Browse files
committed
CSHARP-5717: Introduce basic vector enum types
As discussed with Boris, these are used for index building in EF Core. There isn't any strongly typed API for vector indexes in the driver yet, but, when there is, then these enums will be used there as well as being used by EF.
1 parent c4cc63b commit 5c2e17e

File tree

5 files changed

+538
-32
lines changed

5 files changed

+538
-32
lines changed
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
using System;
17+
using System.Collections.Generic;
18+
using System.Linq;
19+
using System.Linq.Expressions;
20+
using MongoDB.Bson;
21+
using MongoDB.Bson.Serialization;
22+
23+
namespace MongoDB.Driver
24+
{
25+
/// <summary>
26+
/// Defines an Atlas vector search index model using strongly-typed C# APIs.
27+
/// </summary>
28+
public class CreateAtlasVectorIndexModel<TDocument> : CreateSearchIndexModel
29+
{
30+
private readonly RenderArgs<TDocument> _renderArgs
31+
= new(BsonSerializer.LookupSerializer<TDocument>(), BsonSerializer.SerializerRegistry);
32+
33+
/// <summary>
34+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the required
35+
/// options for <see cref="VectorSimilarity"/> and number of vector dimensions to the constructor.
36+
/// </summary>
37+
/// <param name="name">The index name.</param>
38+
/// <param name="field">The field containing the vectors to index.</param>
39+
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
40+
/// <param name="dimensions">Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.</param>
41+
/// <param name="filterFields">Fields that may be used as filters in the vector query.</param>
42+
public CreateAtlasVectorIndexModel(
43+
FieldDefinition<TDocument> field,
44+
string name,
45+
VectorSimilarity similarity,
46+
int dimensions,
47+
params FieldDefinition<TDocument>[] filterFields)
48+
: base(name, SearchIndexType.VectorSearch)
49+
{
50+
Field = field;
51+
Similarity = similarity;
52+
Dimensions = dimensions;
53+
FilterFields = filterFields?.ToList() ?? [];
54+
}
55+
56+
/// <summary>
57+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the required
58+
/// options for <see cref="VectorSimilarity"/> and number of vector dimensions to the constructor.
59+
/// </summary>
60+
/// <param name="name">The index name.</param>
61+
/// <param name="field">An expression pointing to the field containing the vectors to index.</param>
62+
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
63+
/// <param name="dimensions">Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.</param>
64+
/// <param name="filterFields">Expressions pointing to fields that may be used as filters in the vector query.</param>
65+
public CreateAtlasVectorIndexModel(
66+
Expression<Func<TDocument, object>> field,
67+
string name,
68+
VectorSimilarity similarity,
69+
int dimensions,
70+
params Expression<Func<TDocument, object>>[] filterFields)
71+
: this(
72+
new ExpressionFieldDefinition<TDocument>(field),
73+
name,
74+
similarity,
75+
dimensions,
76+
filterFields?.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f)).ToArray())
77+
{
78+
Similarity = similarity;
79+
Dimensions = dimensions;
80+
}
81+
82+
/// <summary>
83+
/// The field containing the vectors to index.
84+
/// </summary>
85+
public FieldDefinition<TDocument> Field { get; }
86+
87+
/// <summary>
88+
/// The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.
89+
/// </summary>
90+
public VectorSimilarity Similarity { get; }
91+
92+
/// <summary>
93+
/// Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.
94+
/// </summary>
95+
public int Dimensions { get; }
96+
97+
/// <summary>
98+
/// Fields that may be used as filters in the vector query.
99+
/// </summary>
100+
public IReadOnlyList<FieldDefinition<TDocument>> FilterFields { get; }
101+
102+
/// <summary>
103+
/// Type of automatic vector quantization for your vectors.
104+
/// </summary>
105+
public VectorQuantization? Quantization { get; init; }
106+
107+
/// <summary>
108+
/// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph.
109+
/// </summary>
110+
public int? HnswMaxEdges { get; init; }
111+
112+
/// <summary>
113+
/// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to find the closest neighbors to connect to a new node.
114+
/// </summary>
115+
public int? HnswNumEdgeCandidates { get; init; }
116+
117+
// /// <summary>Paths to properties that may be used as filters on the entity type or its nested types.</summary>
118+
// public IReadOnlyList<string> FilterPaths { get; init; }
119+
120+
/// <inheritdoc/>
121+
public override SearchIndexType? Type
122+
=> SearchIndexType.VectorSearch;
123+
124+
/// <inheritdoc/>
125+
public override BsonDocument Definition
126+
{
127+
get
128+
{
129+
if (base.Definition != null)
130+
{
131+
return base.Definition;
132+
}
133+
134+
var similarityValue = Similarity == VectorSimilarity.DotProduct
135+
? "dotProduct" // Because neither "DotProduct" or "dotproduct" are allowed.
136+
: Similarity.ToString().ToLowerInvariant();
137+
138+
var vectorField = new BsonDocument
139+
{
140+
{ "type", BsonString.Create("vector") },
141+
{ "path", Field.Render(_renderArgs).FieldName },
142+
{ "numDimensions", BsonInt32.Create(Dimensions) },
143+
{ "similarity", BsonString.Create(similarityValue) },
144+
};
145+
146+
if (Quantization.HasValue)
147+
{
148+
vectorField.Add("quantization", BsonString.Create(Quantization.ToString()?.ToLower()));
149+
}
150+
151+
if (HnswMaxEdges != null || HnswNumEdgeCandidates != null)
152+
{
153+
var hnswDocument = new BsonDocument
154+
{
155+
{ "maxEdges", BsonInt32.Create(HnswMaxEdges ?? 16) },
156+
{ "numEdgeCandidates", BsonInt32.Create(HnswNumEdgeCandidates ?? 100) }
157+
};
158+
vectorField.Add("hnswOptions", hnswDocument);
159+
}
160+
161+
var fieldDocuments = new List<BsonDocument> { vectorField };
162+
163+
if (FilterFields != null)
164+
{
165+
foreach (var filterPath in FilterFields)
166+
{
167+
var fieldDocument = new BsonDocument
168+
{
169+
{ "type", BsonString.Create("filter") },
170+
{ "path", BsonString.Create(filterPath.Render(_renderArgs).FieldName) }
171+
};
172+
173+
fieldDocuments.Add(fieldDocument);
174+
}
175+
}
176+
177+
base.Definition = new BsonDocument { { "fields", BsonArray.Create(fieldDocuments) } };
178+
179+
return base.Definition;
180+
}
181+
}
182+
}
183+
184+
/// <summary>
185+
/// Defines an Atlas vector search index model using strongly-typed C# APIs.
186+
/// </summary>
187+
public class CreateAtlasVectorIndexModel : CreateAtlasVectorIndexModel<BsonDocument>
188+
{
189+
/// <summary>
190+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the required
191+
/// options for <see cref="VectorSimilarity"/> and number of vector dimensions to the constructor.
192+
/// </summary>
193+
/// <param name="name">The index name.</param>
194+
/// <param name="field">The field containing the vectors to index.</param>
195+
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
196+
/// <param name="dimensions">Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.</param>
197+
/// <param name="filterFields">Fields that may be used as filters in the vector query.</param>
198+
public CreateAtlasVectorIndexModel(
199+
FieldDefinition<BsonDocument> field,
200+
string name,
201+
VectorSimilarity similarity,
202+
int dimensions,
203+
params FieldDefinition<BsonDocument>[] filterFields)
204+
: base(field, name, similarity, dimensions, filterFields)
205+
{
206+
}
207+
}
208+
209+
210+
/// <summary>
211+
/// TODO
212+
/// </summary>
213+
public class CreateAtlasSearchIndexModel
214+
{
215+
}
216+
}

src/MongoDB.Driver/CreateSearchIndexModel.cs

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,40 +18,66 @@
1818
namespace MongoDB.Driver
1919
{
2020
/// <summary>
21-
/// Model for creating a search index.
21+
/// Defines an Atlas vector search index model using a <see cref="BsonDocument"/> and acts as a base class
22+
/// for different types of Atlas index models, including <see cref="CreateAtlasVectorIndexModel"/>
23+
/// and <see cref="CreateAtlasSearchIndexModel"/> for strongly-typed Atlas models.
24+
/// definition.
2225
/// </summary>
23-
public sealed class CreateSearchIndexModel
26+
public class CreateSearchIndexModel
2427
{
25-
/// <summary>Gets the index name.</summary>
26-
/// <value>The index name.</value>
27-
public string Name { get; }
28-
29-
/// <summary>Gets the index type.</summary>
30-
/// <value>The index type.</value>
31-
public SearchIndexType? Type { get; }
32-
33-
/// <summary>Gets the index definition.</summary>
34-
/// <value>The definition.</value>
35-
public BsonDocument Definition { get; }
28+
/// <summary>
29+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
30+
/// model as a <see cref="BsonDocument"/>.
31+
/// </summary>
32+
/// <remarks>
33+
/// Consider using <see cref="CreateAtlasVectorIndexModel"/> or <see cref="CreateAtlasSearchIndexModel"/> to
34+
/// build Atlas indexes without specifying the BSON directly.
35+
/// </remarks>
36+
/// <param name="name">The name.</param>
37+
/// <param name="definition">The index definition.</param>
38+
public CreateSearchIndexModel(string name, BsonDocument definition)
39+
: this(name, null, definition)
40+
{
41+
}
3642

3743
/// <summary>
38-
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
44+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
45+
/// model as a <see cref="BsonDocument"/>.
3946
/// </summary>
47+
/// <remarks>
48+
/// Consider using <see cref="CreateAtlasVectorIndexModel"/> or <see cref="CreateAtlasSearchIndexModel"/> to
49+
/// build Atlas indexes without specifying the BSON directly.
50+
/// </remarks>
4051
/// <param name="name">The name.</param>
41-
/// <param name="definition">The definition.</param>
42-
public CreateSearchIndexModel(string name, BsonDocument definition) : this(name, null, definition) { }
52+
/// <param name="type">The type.</param>
53+
/// <param name="definition">The index definition.</param>
54+
public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition)
55+
: this(name, type)
56+
{
57+
Definition = definition;
58+
}
4359

4460
/// <summary>
4561
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
4662
/// </summary>
4763
/// <param name="name">The name.</param>
4864
/// <param name="type">The type.</param>
49-
/// <param name="definition">The definition.</param>
50-
public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition)
65+
protected CreateSearchIndexModel(string name, SearchIndexType? type)
5166
{
5267
Name = name;
5368
Type = type;
54-
Definition = definition;
5569
}
70+
71+
/// <summary>Gets the index name.</summary>
72+
/// <value>The index name.</value>
73+
public virtual string Name { get; }
74+
75+
/// <summary>Gets the index type.</summary>
76+
/// <value>The index type.</value>
77+
public virtual SearchIndexType? Type { get; }
78+
79+
/// <summary>Gets the index definition.</summary>
80+
/// <value>The definition.</value>
81+
public virtual BsonDocument Definition { get; protected set; }
5682
}
5783
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
namespace MongoDB.Driver
17+
{
18+
/// <summary>
19+
/// Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float
20+
/// or double vectors. See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/">
21+
/// Vector Quantization</see> for more information.
22+
/// </summary>
23+
public enum VectorQuantization
24+
{
25+
/// <summary>
26+
/// Indicates no automatic quantization for the vector embeddings. Use this setting if you have pre-quantized
27+
/// vectors for ingestion. If omitted, this is the default value.
28+
/// </summary>
29+
None,
30+
31+
/// <summary>
32+
/// Indicates scalar quantization, which transforms values to 1 byte integers.
33+
/// </summary>
34+
Scalar,
35+
36+
/// <summary>
37+
/// Indicates binary quantization, which transforms values to a single bit.
38+
/// To use this value, numDimensions must be a multiple of 8.
39+
/// If precision is critical, select <see cref="None"/> or <see cref="Scalar"/> instead of <see cref="Binary"/>.
40+
/// </summary>
41+
Binary,
42+
}
43+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
namespace MongoDB.Driver
17+
{
18+
/// <summary>
19+
/// Vector similarity function to use to search for top K-nearest neighbors.
20+
/// See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/">How to Index Fields for
21+
/// Vector Search</see> for more information.
22+
/// </summary>
23+
public enum VectorSimilarity
24+
{
25+
/// <summary>
26+
/// Measures the distance between ends of vectors.
27+
/// </summary>
28+
Euclidean,
29+
30+
/// <summary>
31+
/// Measures similarity based on the angle between vectors.
32+
/// </summary>
33+
Cosine,
34+
35+
/// <summary>
36+
/// mMasures similarity like cosine, but takes into account the magnitude of the vector.
37+
/// </summary>
38+
DotProduct,
39+
}
40+
}

0 commit comments

Comments
 (0)