Skip to content

Commit 147ef3b

Browse files
authored
.Net: [MEVD] Go over all key types, add support for Guid (#13315)
* Added new KeyTypeTests to cover provider key support, alongside the existing DataTypeTests and EmbeddingTypeTests; moved all three into TypeTests folder. * As discussed offline, this adds Guid-as-string key support for all providers. From research this seems to be very standard practice across a wide range of databases: JSON-based databases generally encode various types (e.g. DateTime) as string, and Guids are standard there. Sqlite is similar, with Microsoft.Data.Sqlite supporting it natively. This notably allows upper layers to assume that they can always use Guid, which helps MEDI ([see this issue](dotnet/extensions#6973)). * Per-provider notes * Azure AI Search: string only supported, added Guid-as-string as an option ([docs](https://learn.microsoft.com/en-us/rest/api/searchservice/data-type-map-for-indexers-in-azure-search#bkmk_sql_search) encouraging mapping Guid to strings) * MongoDB/CosmosMongoDB: Added int and long support (Guid was already supported). * CosmosNoSql: Added Guid-as-string (standard practice) * Pinecone: Added Guid-as-string * Redis: Added Guid-as-string * SQL Server: removed DateTime and `byte[]` as key types (as above) and added Guid. * SQLite: The Microsoft.Data.Sqlite (the low-level ADO.NET provider we're built on top of) supports GUIDs as TEXT; so it's really not us deciding to support - it's the lower-level driver. In addition, we support mapping boolean to INTEGER (1/0), which seems similar. Added support as data property as well, not just key. Closes #11784 Closes #12182
1 parent d9ea1e3 commit 147ef3b

File tree

74 files changed

+700
-239
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+700
-239
lines changed

dotnet/src/InternalUtilities/connectors/Memory/MongoDB/MongoModelBuilder.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ protected override void ProcessTypeProperties(Type type, VectorStoreCollectionDe
4444

4545
protected override bool IsKeyPropertyTypeValid(Type type, [NotNullWhen(false)] out string? supportedTypes)
4646
{
47-
supportedTypes = "string, Guid, ObjectId";
47+
supportedTypes = "string, int, long, Guid, ObjectId";
4848

49-
return type == typeof(string) || type == typeof(Guid) || type == typeof(ObjectId);
49+
return type == typeof(string) || type == typeof(int) || type == typeof(long) || type == typeof(Guid) || type == typeof(ObjectId);
5050
}
5151

5252
protected override bool IsDataPropertyTypeValid(Type type, [NotNullWhen(false)] out string? supportedTypes)

dotnet/src/VectorData/AzureAISearch/AzureAISearchCollection.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ internal AzureAISearchCollection(SearchIndexClient searchIndexClient, string nam
8383
Verify.NotNull(searchIndexClient);
8484
Verify.NotNullOrWhiteSpace(name);
8585

86-
if (typeof(TKey) != typeof(string) && typeof(TKey) != typeof(object))
86+
if (typeof(TKey) != typeof(string) && typeof(TKey) != typeof(Guid) && typeof(TKey) != typeof(object))
8787
{
88-
throw new NotSupportedException("Only string keys are supported.");
88+
throw new NotSupportedException("Only string and Guid keys are supported.");
8989
}
9090

9191
options ??= AzureAISearchCollectionOptions.Default;
@@ -791,7 +791,13 @@ private string GetStringKey(TKey key)
791791
{
792792
Verify.NotNull(key);
793793

794-
var stringKey = key as string ?? throw new UnreachableException("string key should have been validated during model building");
794+
var stringKey = key switch
795+
{
796+
string s => s,
797+
Guid g => g.ToString(),
798+
799+
_ => throw new UnreachableException("string key should have been validated during model building")
800+
};
795801

796802
Verify.NotNullOrWhiteSpace(stringKey, nameof(key));
797803

dotnet/src/VectorData/AzureAISearch/AzureAISearchModelBuilder.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ protected override bool IsVectorPropertyTypeValid(Type type, [NotNullWhen(false)
3131

3232
internal static bool IsKeyPropertyTypeValidCore(Type type, [NotNullWhen(false)] out string? supportedTypes)
3333
{
34-
supportedTypes = "string";
34+
supportedTypes = "string, Guid";
3535

36-
return type == typeof(string);
36+
return type == typeof(string) || type == typeof(Guid);
3737
}
3838

3939
internal static bool IsDataPropertyTypeValidCore(Type type, [NotNullWhen(false)] out string? supportedTypes)

dotnet/src/VectorData/CosmosMongoDB/CosmosMongoCollection.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ public class CosmosMongoCollection<TKey, TRecord> : VectorStoreCollection<TKey,
6767
/// <summary>The size of the dynamic candidate list for search.</summary>
6868
private readonly int _efSearch;
6969

70+
private static readonly Type[] s_validKeyTypes = [typeof(string), typeof(Guid), typeof(ObjectId), typeof(int), typeof(long)];
71+
7072
/// <summary>
7173
/// Initializes a new instance of the <see cref="CosmosMongoCollection{TKey, TRecord}"/> class.
7274
/// </summary>
@@ -95,9 +97,9 @@ internal CosmosMongoCollection(IMongoDatabase mongoDatabase, string name, Func<C
9597
Verify.NotNull(mongoDatabase);
9698
Verify.NotNullOrWhiteSpace(name);
9799

98-
if (typeof(TKey) != typeof(string) && typeof(TKey) != typeof(object))
100+
if (!s_validKeyTypes.Contains(typeof(TKey)) && typeof(TKey) != typeof(object))
99101
{
100-
throw new NotSupportedException("Only string keys are supported.");
102+
throw new NotSupportedException("Only ObjectID, string, Guid, int and long keys are supported.");
101103
}
102104

103105
options ??= CosmosMongoCollectionOptions.Default;

dotnet/src/VectorData/CosmosNoSql/CosmosNoSqlCollection.cs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,12 @@ internal CosmosNoSqlCollection(
134134
{
135135
try
136136
{
137-
if (typeof(TKey) != typeof(string) && typeof(TKey) != typeof(CosmosNoSqlCompositeKey) && typeof(TKey) != typeof(object))
137+
if (typeof(TKey) != typeof(string)
138+
&& typeof(TKey) != typeof(Guid)
139+
&& typeof(TKey) != typeof(CosmosNoSqlCompositeKey)
140+
&& typeof(TKey) != typeof(object))
138141
{
139-
throw new NotSupportedException($"Only {nameof(String)} and {nameof(CosmosNoSqlCompositeKey)} keys are supported.");
142+
throw new NotSupportedException($"Only string, Guid and {nameof(CosmosNoSqlCompositeKey)} keys are supported.");
140143
}
141144

142145
this._database = databaseProvider(clientWrapper.Client);
@@ -852,13 +855,22 @@ private static IEnumerable<CosmosNoSqlCompositeKey> GetCompositeKeys(IEnumerable
852855
=> keys switch
853856
{
854857
IEnumerable<CosmosNoSqlCompositeKey> k => k,
858+
855859
IEnumerable<string> k => k.Select(key => new CosmosNoSqlCompositeKey(recordKey: key, partitionKey: key)),
860+
861+
IEnumerable<Guid> k => k.Select(key =>
862+
{
863+
var guidString = key.ToString();
864+
return new CosmosNoSqlCompositeKey(recordKey: guidString, partitionKey: guidString);
865+
}),
866+
856867
IEnumerable<object> k => k.Select(key => key switch
857868
{
858869
string s => new CosmosNoSqlCompositeKey(recordKey: s, partitionKey: s),
859870
CosmosNoSqlCompositeKey ck => ck,
860871
_ => throw new ArgumentException($"Invalid key type '{key.GetType().Name}'.")
861872
}),
873+
862874
_ => throw new UnreachableException()
863875
};
864876

dotnet/src/VectorData/CosmosNoSql/CosmosNoSqlModelBuilder.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ internal class CosmosNoSqlModelBuilder() : CollectionJsonModelBuilder(s_modelBui
2323

2424
protected override bool IsKeyPropertyTypeValid(Type type, [NotNullWhen(false)] out string? supportedTypes)
2525
{
26-
// TODO: Cosmos supports other key types (int, Guid...)
27-
supportedTypes = "string";
26+
supportedTypes = "string, Guid";
2827

29-
return type == typeof(string);
28+
return type == typeof(string) || type == typeof(Guid);
3029
}
3130

3231
protected override bool IsDataPropertyTypeValid(Type type, [NotNullWhen(false)] out string? supportedTypes)

dotnet/src/VectorData/MongoDB/MongoCollection.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public class MongoCollection<TKey, TRecord> : VectorStoreCollection<TKey, TRecor
7676
private readonly int? _numCandidates;
7777

7878
/// <summary>Types of keys permitted.</summary>
79-
private readonly Type[] _validKeyTypes = [typeof(string), typeof(Guid), typeof(ObjectId)];
79+
private static readonly Type[] s_validKeyTypes = [typeof(string), typeof(Guid), typeof(ObjectId), typeof(int), typeof(long)];
8080

8181
/// <summary>
8282
/// Initializes a new instance of the <see cref="MongoCollection{TKey, TRecord}"/> class.
@@ -106,9 +106,9 @@ internal MongoCollection(IMongoDatabase mongoDatabase, string name, Func<MongoCo
106106
Verify.NotNull(mongoDatabase);
107107
Verify.NotNullOrWhiteSpace(name);
108108

109-
if (!this._validKeyTypes.Contains(typeof(TKey)) && typeof(TKey) != typeof(object))
109+
if (!s_validKeyTypes.Contains(typeof(TKey)) && typeof(TKey) != typeof(object))
110110
{
111-
throw new NotSupportedException("Only string, Guid and ObjectID keys are supported.");
111+
throw new NotSupportedException("Only ObjectID, string, Guid, int and long keys are supported.");
112112
}
113113

114114
options ??= MongoCollectionOptions.Default;

dotnet/src/VectorData/Pinecone/PineconeCollection.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ internal PineconeCollection(PineconeClient pineconeClient, string name, Func<Pin
7676
Verify.NotNull(pineconeClient);
7777
VerifyCollectionName(name);
7878

79-
if (typeof(TKey) != typeof(string) && typeof(TKey) != typeof(object))
79+
if (typeof(TKey) != typeof(string) && typeof(TKey) != typeof(Guid) && typeof(TKey) != typeof(object))
8080
{
81-
throw new NotSupportedException("Only string keys are supported.");
81+
throw new NotSupportedException("Only string and Guid keys are supported.");
8282
}
8383

8484
options ??= PineconeCollectionOptions.Default;
@@ -617,7 +617,13 @@ private string GetStringKey(TKey key)
617617
{
618618
Verify.NotNull(key);
619619

620-
var stringKey = key as string ?? throw new UnreachableException("string key should have been validated during model building");
620+
var stringKey = key switch
621+
{
622+
string s => s,
623+
Guid g => g.ToString(),
624+
625+
_ => throw new UnreachableException()
626+
};
621627

622628
Verify.NotNullOrWhiteSpace(stringKey, nameof(key));
623629

dotnet/src/VectorData/Pinecone/PineconeFilterTranslator.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,21 @@ internal class PineconeFilterTranslator
2323
private Extensions.VectorData.ProviderServices.CollectionModel _model = null!;
2424
private ParameterExpression _recordParameter = null!;
2525

26-
internal Metadata Translate(LambdaExpression lambdaExpression, Extensions.VectorData.ProviderServices.CollectionModel model)
26+
internal Metadata? Translate(LambdaExpression lambdaExpression, Extensions.VectorData.ProviderServices.CollectionModel model)
2727
{
2828
this._model = model;
2929

3030
Debug.Assert(lambdaExpression.Parameters.Count == 1);
3131
this._recordParameter = lambdaExpression.Parameters[0];
3232

33+
// Pinecone doesn't seem to have a native way of expressing "always true" filters; since this scenario is important for fetching
34+
// all records (via GetAsync with filter), we special-case and support it here. Note that false isn't supported (useless),
35+
// nor is 'x && true'.
36+
if (lambdaExpression.Body is ConstantExpression { Value: true })
37+
{
38+
return null;
39+
}
40+
3341
var preprocessor = new FilterTranslationPreprocessor { SupportsParameterization = false };
3442
var preprocessedExpression = preprocessor.Preprocess(lambdaExpression.Body);
3543

dotnet/src/VectorData/Pinecone/PineconeMapper.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,12 @@ public Vector MapFromDataToStorageModel(TRecord dataModel, Embedding<float>? gen
4444
// TODO: what about sparse values?
4545
var result = new Vector
4646
{
47-
Id = (string)keyObject,
47+
Id = keyObject switch
48+
{
49+
string s => s,
50+
Guid g => g.ToString(),
51+
_ => throw new UnreachableException()
52+
},
4853
Values = values,
4954
Metadata = metadata,
5055
SparseValues = null
@@ -58,7 +63,13 @@ public TRecord MapFromStorageToDataModel(Vector storageModel, bool includeVector
5863
{
5964
var outputRecord = model.CreateRecord<TRecord>()!;
6065

61-
model.KeyProperty.SetValueAsObject(outputRecord, storageModel.Id);
66+
model.KeyProperty.SetValueAsObject(outputRecord, model.KeyProperty.Type switch
67+
{
68+
var t when t == typeof(string) => storageModel.Id,
69+
var t when t == typeof(Guid) => Guid.Parse(storageModel.Id),
70+
71+
_ => throw new UnreachableException()
72+
});
6273

6374
if (includeVectors is true)
6475
{

0 commit comments

Comments
 (0)