|
4 | 4 | import sqlite3
|
5 | 5 | import sys
|
6 | 6 | import time
|
| 7 | +from typing import Any |
7 | 8 |
|
8 | 9 | import numpy as np
|
9 | 10 | import pandas as pd
|
@@ -1056,7 +1057,7 @@ def test_local_milvus() -> None:
|
1056 | 1057 | client.drop_collection(collection_name=COLLECTION_NAME)
|
1057 | 1058 |
|
1058 | 1059 |
|
1059 |
| -def test_milvus_lite_get_online_documents_v2() -> None: |
| 1060 | +def test_milvus_lite_retrieve_online_documents_v2() -> None: |
1060 | 1061 | """
|
1061 | 1062 | Test retrieving documents from the online store in local mode.
|
1062 | 1063 | """
|
@@ -1226,6 +1227,199 @@ def test_milvus_lite_get_online_documents_v2() -> None:
|
1226 | 1227 | assert len(result["distance"]) == len(results[0])
|
1227 | 1228 |
|
1228 | 1229 |
|
| 1230 | +def test_milvus_stored_writes_with_explode() -> None: |
| 1231 | + """ |
| 1232 | + Test storing and retrieving exploded document embeddings with Milvus online store. |
| 1233 | + """ |
| 1234 | + from feast import ( |
| 1235 | + Entity, |
| 1236 | + RequestSource, |
| 1237 | + ) |
| 1238 | + from feast.field import Field |
| 1239 | + from feast.on_demand_feature_view import on_demand_feature_view |
| 1240 | + from feast.types import ( |
| 1241 | + Array, |
| 1242 | + Bytes, |
| 1243 | + Float32, |
| 1244 | + String, |
| 1245 | + ValueType, |
| 1246 | + ) |
| 1247 | + |
| 1248 | + random.seed(42) |
| 1249 | + vector_length = 10 |
| 1250 | + runner = CliRunner() |
| 1251 | + with runner.local_repo( |
| 1252 | + example_repo_py=get_example_repo("example_rag_feature_repo.py"), |
| 1253 | + offline_store="file", |
| 1254 | + online_store="milvus", |
| 1255 | + apply=False, |
| 1256 | + teardown=False, |
| 1257 | + ) as store: |
| 1258 | + # Define entities and sources |
| 1259 | + chunk = Entity( |
| 1260 | + name="chunk", join_keys=["chunk_id"], value_type=ValueType.STRING |
| 1261 | + ) |
| 1262 | + document = Entity( |
| 1263 | + name="document", join_keys=["document_id"], value_type=ValueType.STRING |
| 1264 | + ) |
| 1265 | + |
| 1266 | + input_explode_request_source = RequestSource( |
| 1267 | + name="document_source", |
| 1268 | + schema=[ |
| 1269 | + Field(name="document_id", dtype=String), |
| 1270 | + Field(name="document_text", dtype=String), |
| 1271 | + Field(name="document_bytes", dtype=Bytes), |
| 1272 | + ], |
| 1273 | + ) |
| 1274 | + |
| 1275 | + @on_demand_feature_view( |
| 1276 | + entities=[chunk, document], |
| 1277 | + sources=[input_explode_request_source], |
| 1278 | + schema=[ |
| 1279 | + Field(name="document_id", dtype=String), |
| 1280 | + Field(name="chunk_id", dtype=String), |
| 1281 | + Field(name="chunk_text", dtype=String), |
| 1282 | + Field( |
| 1283 | + name="vector", |
| 1284 | + dtype=Array(Float32), |
| 1285 | + vector_index=True, |
| 1286 | + vector_search_metric="COSINE", # Use COSINE like in Milvus test |
| 1287 | + ), |
| 1288 | + ], |
| 1289 | + mode="python", |
| 1290 | + write_to_online_store=True, |
| 1291 | + ) |
| 1292 | + def milvus_explode_feature_view(inputs: dict[str, Any]): |
| 1293 | + output: dict[str, Any] = { |
| 1294 | + "document_id": ["doc_1", "doc_1", "doc_2", "doc_2"], |
| 1295 | + "chunk_id": ["chunk-1", "chunk-2", "chunk-1", "chunk-2"], |
| 1296 | + "chunk_text": [ |
| 1297 | + "hello friends", |
| 1298 | + "how are you?", |
| 1299 | + "This is a test.", |
| 1300 | + "Document chunking example.", |
| 1301 | + ], |
| 1302 | + "vector": [ |
| 1303 | + [0.1] * vector_length, |
| 1304 | + [0.2] * vector_length, |
| 1305 | + [0.3] * vector_length, |
| 1306 | + [0.4] * vector_length, |
| 1307 | + ], |
| 1308 | + } |
| 1309 | + return output |
| 1310 | + |
| 1311 | + # Apply the feature store configuration |
| 1312 | + store.apply( |
| 1313 | + [ |
| 1314 | + chunk, |
| 1315 | + document, |
| 1316 | + input_explode_request_source, |
| 1317 | + milvus_explode_feature_view, |
| 1318 | + ] |
| 1319 | + ) |
| 1320 | + |
| 1321 | + # Verify feature view registration |
| 1322 | + odfv_applied = store.get_on_demand_feature_view("milvus_explode_feature_view") |
| 1323 | + assert odfv_applied.features[1].vector_index |
| 1324 | + assert odfv_applied.entities == [chunk.name, document.name] |
| 1325 | + assert odfv_applied.entity_columns[0].name == document.join_key |
| 1326 | + assert odfv_applied.entity_columns[1].name == chunk.join_key |
| 1327 | + |
| 1328 | + # Write to online store |
| 1329 | + odfv_entity_rows_to_write = [ |
| 1330 | + { |
| 1331 | + "document_id": "document_1", |
| 1332 | + "document_text": "Hello world. How are you?", |
| 1333 | + }, |
| 1334 | + { |
| 1335 | + "document_id": "document_2", |
| 1336 | + "document_text": "This is a test. Document chunking example.", |
| 1337 | + }, |
| 1338 | + ] |
| 1339 | + store.write_to_online_store( |
| 1340 | + feature_view_name="milvus_explode_feature_view", |
| 1341 | + df=odfv_entity_rows_to_write, |
| 1342 | + ) |
| 1343 | + |
| 1344 | + # Verify feature retrieval |
| 1345 | + fv_entity_rows_to_read = [ |
| 1346 | + { |
| 1347 | + "document_id": "doc_1", |
| 1348 | + "chunk_id": "chunk-2", |
| 1349 | + }, |
| 1350 | + { |
| 1351 | + "document_id": "doc_2", |
| 1352 | + "chunk_id": "chunk-1", |
| 1353 | + }, |
| 1354 | + ] |
| 1355 | + |
| 1356 | + online_response = store.get_online_features( |
| 1357 | + entity_rows=fv_entity_rows_to_read, |
| 1358 | + features=[ |
| 1359 | + "milvus_explode_feature_view:document_id", |
| 1360 | + "milvus_explode_feature_view:chunk_id", |
| 1361 | + "milvus_explode_feature_view:chunk_text", |
| 1362 | + ], |
| 1363 | + ).to_dict() |
| 1364 | + |
| 1365 | + assert sorted(list(online_response.keys())) == sorted( |
| 1366 | + [ |
| 1367 | + "chunk_id", |
| 1368 | + "chunk_text", |
| 1369 | + "document_id", |
| 1370 | + ] |
| 1371 | + ) |
| 1372 | + |
| 1373 | + # Test vector search using Milvus |
| 1374 | + query_embedding = [0.1] * vector_length |
| 1375 | + |
| 1376 | + # First get Milvus client and search directly |
| 1377 | + client = store._provider._online_store.client |
| 1378 | + collection_name = client.list_collections()[0] |
| 1379 | + search_params = { |
| 1380 | + "metric_type": "COSINE", |
| 1381 | + "params": {"nprobe": 10}, |
| 1382 | + } |
| 1383 | + |
| 1384 | + direct_results = client.search( |
| 1385 | + collection_name=collection_name, |
| 1386 | + data=[query_embedding], |
| 1387 | + anns_field="vector", |
| 1388 | + search_params=search_params, |
| 1389 | + limit=2, |
| 1390 | + output_fields=["document_id", "chunk_id", "chunk_text"], |
| 1391 | + ) |
| 1392 | + |
| 1393 | + # Then use the Feast API |
| 1394 | + feast_results = store.retrieve_online_documents_v2( |
| 1395 | + features=[ |
| 1396 | + "milvus_explode_feature_view:document_id", |
| 1397 | + "milvus_explode_feature_view:chunk_id", |
| 1398 | + "milvus_explode_feature_view:chunk_text", |
| 1399 | + ], |
| 1400 | + query=query_embedding, |
| 1401 | + top_k=2, |
| 1402 | + ).to_dict() |
| 1403 | + |
| 1404 | + # Validate vector search results |
| 1405 | + assert "document_id" in feast_results |
| 1406 | + assert "chunk_id" in feast_results |
| 1407 | + assert "chunk_text" in feast_results |
| 1408 | + assert "distance" in feast_results |
| 1409 | + assert len(feast_results["distance"]) == 2 |
| 1410 | + assert len(feast_results["document_id"]) == 2 |
| 1411 | + assert ( |
| 1412 | + len(direct_results[0]) == 2 |
| 1413 | + ) # Verify both approaches return same number of results |
| 1414 | + del feast_results["distance"] |
| 1415 | + |
| 1416 | + assert feast_results == { |
| 1417 | + "document_id": ["doc_2", "doc_1"], |
| 1418 | + "chunk_id": ["chunk-1", "chunk-2"], |
| 1419 | + "chunk_text": ["This is a test.", "how are you?"], |
| 1420 | + } |
| 1421 | + |
| 1422 | + |
1229 | 1423 | def test_milvus_native_from_feast_data() -> None:
|
1230 | 1424 | import random
|
1231 | 1425 | from datetime import datetime
|
|
0 commit comments