Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 216 additions & 0 deletions src/MongoDB.Driver/CreateAtlasVectorIndexModel.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Linq.Expressions;
using MongoDB.Bson;
using MongoDB.Bson.Serialization;

namespace MongoDB.Driver
{
/// <summary>
/// Defines an Atlas vector search index model using strongly-typed C# APIs.
/// </summary>
public class CreateAtlasVectorIndexModel<TDocument> : CreateSearchIndexModel
{
private readonly RenderArgs<TDocument> _renderArgs
= new(BsonSerializer.LookupSerializer<TDocument>(), BsonSerializer.SerializerRegistry);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The documentSerializer should come from the collection.DocumentSerializer and not from the registry.

That's why we have Render methods with RenderArgs so that the document serializer can be passed in later when it is known.


/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the required
/// options for <see cref="VectorSimilarity"/> and number of vector dimensions to the constructor.
/// </summary>
/// <param name="name">The index name.</param>
/// <param name="field">The field containing the vectors to index.</param>
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
/// <param name="dimensions">Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.</param>
/// <param name="filterFields">Fields that may be used as filters in the vector query.</param>
public CreateAtlasVectorIndexModel(
FieldDefinition<TDocument> field,
string name,
VectorSimilarity similarity,
int dimensions,
params FieldDefinition<TDocument>[] filterFields)
: base(name, SearchIndexType.VectorSearch)
{
Field = field;
Similarity = similarity;
Dimensions = dimensions;
FilterFields = filterFields?.ToList() ?? [];
}

/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the required
/// options for <see cref="VectorSimilarity"/> and number of vector dimensions to the constructor.
/// </summary>
/// <param name="name">The index name.</param>
/// <param name="field">An expression pointing to the field containing the vectors to index.</param>
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
/// <param name="dimensions">Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.</param>
/// <param name="filterFields">Expressions pointing to fields that may be used as filters in the vector query.</param>
public CreateAtlasVectorIndexModel(
Expression<Func<TDocument, object>> field,
string name,
VectorSimilarity similarity,
int dimensions,
params Expression<Func<TDocument, object>>[] filterFields)
: this(
new ExpressionFieldDefinition<TDocument>(field),
name,
similarity,
dimensions,
filterFields?.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f)).ToArray())
{
Similarity = similarity;
Dimensions = dimensions;
}

/// <summary>
/// The field containing the vectors to index.
/// </summary>
public FieldDefinition<TDocument> Field { get; }

/// <summary>
/// The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.
/// </summary>
public VectorSimilarity Similarity { get; }

/// <summary>
/// Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.
/// </summary>
public int Dimensions { get; }

/// <summary>
/// Fields that may be used as filters in the vector query.
/// </summary>
public IReadOnlyList<FieldDefinition<TDocument>> FilterFields { get; }

/// <summary>
/// Type of automatic vector quantization for your vectors.
/// </summary>
public VectorQuantization? Quantization { get; init; }

/// <summary>
/// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph.
/// </summary>
public int? HnswMaxEdges { get; init; }

/// <summary>
/// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to find the closest neighbors to connect to a new node.
/// </summary>
public int? HnswNumEdgeCandidates { get; init; }

// /// <summary>Paths to properties that may be used as filters on the entity type or its nested types.</summary>
// public IReadOnlyList<string> FilterPaths { get; init; }

/// <inheritdoc/>
public override SearchIndexType? Type
=> SearchIndexType.VectorSearch;

/// <inheritdoc/>
public override BsonDocument Definition
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit odd to see a property getter doing so much work. Usually properties simply return an existing value.

But... making this a method would be a breaking change.

{
get
{
if (base.Definition != null)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't really happen right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what you mean. The idea here is that the type is immutable, and we don't want to re-create the document every time the property is accessed--as you mentioned elsewhere, ideally this should not be a property. So, the first time this is called, the definition stored in the base is null. It is then built and cached so it doesn't have to be built again if the property is accessed again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can't cache the result of Render because if Render is called again with different RenderArgs the result could be different.

I think you should throw an exception if the Document property is accessed on this subclass. Probably the very existence of this property is dubious.

{
return base.Definition;
}

var similarityValue = Similarity == VectorSimilarity.DotProduct
? "dotProduct" // Because neither "DotProduct" or "dotproduct" are allowed.
: Similarity.ToString().ToLowerInvariant();

var vectorField = new BsonDocument
{
{ "type", BsonString.Create("vector") },
{ "path", Field.Render(_renderArgs).FieldName },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not really possible to use a cached renderArgs.

This implies that this functionality should really be in a Render method that TAKES a renderArgs.

How to do that given that this is a property is a conundrum.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would someone have done this correctly (that is, create the document with the correct serializer) before my changes? What happens if they do it wrong?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rstam I've been pondering this some more. Given that we are building metadata for the server here, rather than user documents, shouldn't we be using a standard form of serialization always? In other words, what is the scenario where this metadata should be built with custom serializers?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't usually use serializers to build commands, just for data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rstam Since we are not building user data here, but rather just metadata, then should we not use serializers here? I'm not sure what you mean by "commands" above? If we shouldn't be using serializers, then how to we go from a FieldDefinition to path in the metadata without using a serializer? Or, to ask the same thing another way, why was this originally written to use serializers to create metadata? How would use of those serializers make the metadata generated different?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A "command" is a BSON document we send to the server telling the server to do something. In this case the command is "createSearchIndexes", though in THIS file we are just creating a small part of that command, i.e. the "indexes" field of the "createSearchIndexes" command.

I didn't mean to say that we shouldn't use serializers AT ALL when creating a command, just that we don't use serializers to create the bulk of the command. Of course we have to consult the corresponding serializer when a user uses an Expression to identify a field, so that we can ask the serializer what the element name should be. But that's actually handled by the appropriate overload of Render.

And when creating a command that includes user defined POCOs as data to the command we use serializers to convert the POCOs to BSON inside the command, but that's not the case for this command.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or, to ask the same thing another way, why was this originally written to use serializers to create metadata

I don't understand this question.

What part of this was originally written to use serializers?

{ "numDimensions", BsonInt32.Create(Dimensions) },
{ "similarity", BsonString.Create(similarityValue) },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to call BsonString.Create or BsonInt32.Create.

There are implicit conversions to simplify code like this.

};

if (Quantization.HasValue)
{
vectorField.Add("quantization", BsonString.Create(Quantization.ToString()?.ToLower()));
}

if (HnswMaxEdges != null || HnswNumEdgeCandidates != null)
{
var hnswDocument = new BsonDocument
{
{ "maxEdges", BsonInt32.Create(HnswMaxEdges ?? 16) },
{ "numEdgeCandidates", BsonInt32.Create(HnswNumEdgeCandidates ?? 100) }
};
vectorField.Add("hnswOptions", hnswDocument);
}

var fieldDocuments = new List<BsonDocument> { vectorField };

if (FilterFields != null)
{
foreach (var filterPath in FilterFields)
{
var fieldDocument = new BsonDocument
{
{ "type", BsonString.Create("filter") },
{ "path", BsonString.Create(filterPath.Render(_renderArgs).FieldName) }
};

fieldDocuments.Add(fieldDocument);
}
}

base.Definition = new BsonDocument { { "fields", BsonArray.Create(fieldDocuments) } };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have made the fieldDocuments a BsonArray in the first place. No need for the intermediate List.


return base.Definition;
}
}
}

/// <summary>
/// Defines an Atlas vector search index model using strongly-typed C# APIs.
/// </summary>
public class CreateAtlasVectorIndexModel : CreateAtlasVectorIndexModel<BsonDocument>
{
/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the required
/// options for <see cref="VectorSimilarity"/> and number of vector dimensions to the constructor.
/// </summary>
/// <param name="name">The index name.</param>
/// <param name="field">The field containing the vectors to index.</param>
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
/// <param name="dimensions">Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time.</param>
/// <param name="filterFields">Fields that may be used as filters in the vector query.</param>
public CreateAtlasVectorIndexModel(
FieldDefinition<BsonDocument> field,
string name,
VectorSimilarity similarity,
int dimensions,
params FieldDefinition<BsonDocument>[] filterFields)
: base(field, name, similarity, dimensions, filterFields)
{
}
}


/// <summary>
/// TODO
/// </summary>
public class CreateAtlasSearchIndexModel
{
}
}
64 changes: 45 additions & 19 deletions src/MongoDB.Driver/CreateSearchIndexModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,40 +18,66 @@
namespace MongoDB.Driver
{
/// <summary>
/// Model for creating a search index.
/// Defines an Atlas vector search index model using a <see cref="BsonDocument"/> and acts as a base class
/// for different types of Atlas index models, including <see cref="CreateAtlasVectorIndexModel"/>
/// and <see cref="CreateAtlasSearchIndexModel"/> for strongly-typed Atlas models.
/// definition.
/// </summary>
public sealed class CreateSearchIndexModel
public class CreateSearchIndexModel
{
/// <summary>Gets the index name.</summary>
/// <value>The index name.</value>
public string Name { get; }

/// <summary>Gets the index type.</summary>
/// <value>The index type.</value>
public SearchIndexType? Type { get; }

/// <summary>Gets the index definition.</summary>
/// <value>The definition.</value>
public BsonDocument Definition { get; }
/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
/// model as a <see cref="BsonDocument"/>.
/// </summary>
/// <remarks>
/// Consider using <see cref="CreateAtlasVectorIndexModel"/> or <see cref="CreateAtlasSearchIndexModel"/> to
/// build Atlas indexes without specifying the BSON directly.
/// </remarks>
/// <param name="name">The name.</param>
/// <param name="definition">The index definition.</param>
public CreateSearchIndexModel(string name, BsonDocument definition)
: this(name, null, definition)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
/// model as a <see cref="BsonDocument"/>.
/// </summary>
/// <remarks>
/// Consider using <see cref="CreateAtlasVectorIndexModel"/> or <see cref="CreateAtlasSearchIndexModel"/> to
/// build Atlas indexes without specifying the BSON directly.
/// </remarks>
/// <param name="name">The name.</param>
/// <param name="definition">The definition.</param>
public CreateSearchIndexModel(string name, BsonDocument definition) : this(name, null, definition) { }
/// <param name="type">The type.</param>
/// <param name="definition">The index definition.</param>
public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition)
: this(name, type)
{
Definition = definition;
}

/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
/// </summary>
/// <param name="name">The name.</param>
/// <param name="type">The type.</param>
/// <param name="definition">The definition.</param>
public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition)
protected CreateSearchIndexModel(string name, SearchIndexType? type)
{
Name = name;
Type = type;
Definition = definition;
}

/// <summary>Gets the index name.</summary>
/// <value>The index name.</value>
public virtual string Name { get; }

/// <summary>Gets the index type.</summary>
/// <value>The index type.</value>
public virtual SearchIndexType? Type { get; }

/// <summary>Gets the index definition.</summary>
/// <value>The definition.</value>
public virtual BsonDocument Definition { get; protected set; }
}
}
43 changes: 43 additions & 0 deletions src/MongoDB.Driver/VectorQuantization.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace MongoDB.Driver
{
/// <summary>
/// Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float
/// or double vectors. See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/">
/// Vector Quantization</see> for more information.
/// </summary>
public enum VectorQuantization
{
/// <summary>
/// Indicates no automatic quantization for the vector embeddings. Use this setting if you have pre-quantized
/// vectors for ingestion. If omitted, this is the default value.
/// </summary>
None,

/// <summary>
/// Indicates scalar quantization, which transforms values to 1 byte integers.
/// </summary>
Scalar,

/// <summary>
/// Indicates binary quantization, which transforms values to a single bit.
/// To use this value, numDimensions must be a multiple of 8.
/// If precision is critical, select <see cref="None"/> or <see cref="Scalar"/> instead of <see cref="Binary"/>.
/// </summary>
Binary,
}
}
40 changes: 40 additions & 0 deletions src/MongoDB.Driver/VectorSimilarity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace MongoDB.Driver
{
/// <summary>
/// Vector similarity function to use to search for top K-nearest neighbors.
/// See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/">How to Index Fields for
/// Vector Search</see> for more information.
/// </summary>
public enum VectorSimilarity
{
/// <summary>
/// Measures the distance between ends of vectors.
/// </summary>
Euclidean,

/// <summary>
/// Measures similarity based on the angle between vectors.
/// </summary>
Cosine,

/// <summary>
/// mMasures similarity like cosine, but takes into account the magnitude of the vector.
/// </summary>
DotProduct,
}
}
Loading
Loading