Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions docs/reference/online-stores/elasticsearch.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ online_store:
port: ES_PORT
user: ES_USERNAME
password: ES_PASSWORD
vector_len: 512
write_batch_size: 1000
```
{% endcode %}
Expand Down Expand Up @@ -88,7 +87,7 @@ Currently, the indexing mapping in the ElasticSearch online store is configured
"created_ts": {"type": "date"},
"vector_value": {
"type": "dense_vector",
"dims": config.online_store.vector_len,
"dims": vector_field_length,
"index": "true",
"similarity": config.online_store.similarity,
},
Expand Down
3 changes: 1 addition & 2 deletions docs/reference/online-stores/postgres.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ online_store:
sslcert_path: /path/to/client-cert.pem
sslrootcert_path: /path/to/server-ca.pem
vector_enabled: false
vector_len: 512
```
{% endcode %}

Expand Down Expand Up @@ -67,7 +66,7 @@ To compare this set of functionality against other online stores, please see the
The Postgres online store supports the use of [PGVector](https://github.com/pgvector/pgvector) for storing feature values.
To enable PGVector, set `vector_enabled: true` in the online store configuration.

The `vector_len` parameter can be used to specify the length of the vector. The default value is 512.
The `vector_length` parameter can be used to specify the length of the vector in the Field.

Please make sure to follow the instructions in the repository, which, as the time of this writing, requires you to
run `CREATE EXTENSION vector;` in the database.
Expand Down
1 change: 0 additions & 1 deletion docs/reference/online-stores/qdrant.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ online_store:
type: qdrant
host: localhost
port: 6333
vector_len: 384
write_batch_size: 100
```

Expand Down
3 changes: 3 additions & 0 deletions protos/feast/core/Feature.proto
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,7 @@ message FeatureSpecV2 {

// Metric used for vector similarity search.
string vector_search_metric = 6;

// Field indicating the vector length
int32 vector_length = 7;
}
29 changes: 13 additions & 16 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
from feast.feast_object import FeastObject
from feast.feature_service import FeatureService
from feast.feature_view import DUMMY_ENTITY, DUMMY_ENTITY_NAME, FeatureView
from feast.field import Field
from feast.inference import (
update_data_sources_with_inferred_event_timestamp_col,
update_feature_views_with_inferred_features_and_entities,
Expand Down Expand Up @@ -91,7 +90,7 @@
from feast.stream_feature_view import StreamFeatureView
from feast.transformation.pandas_transformation import PandasTransformation
from feast.transformation.python_transformation import PythonTransformation
from feast.utils import _utc_now
from feast.utils import _get_feature_view_vector_field_metadata, _utc_now

warnings.simplefilter("once", DeprecationWarning)

Expand Down Expand Up @@ -856,7 +855,6 @@ def apply(
if not isinstance(objects, Iterable):
objects = [objects]
assert isinstance(objects, list)

if not objects_to_delete:
objects_to_delete = []

Expand Down Expand Up @@ -1555,6 +1553,18 @@ def _get_feature_view_and_df_for_online_write(
except Exception as _:
raise DataFrameSerializationError(df)

if feature_view.features[0].vector_index and df is not None:
fv_vector_feature_name = feature_view.features[0].name
df_vector_feature_index = df.columns.get_loc(fv_vector_feature_name)
if feature_view.features[0].vector_length != 0:
if (
df.shape[df_vector_feature_index]
> feature_view.features[0].vector_length
):
raise ValueError(
f"The dataframe for {fv_vector_feature_name} column has {df.shape[1]} vectors which is greater than expected (i.e {feature_view.features[0].vector_length}) by feature view {feature_view.name}."
)

# # Apply transformations if this is an OnDemandFeatureView with write_to_online_store=True
if (
isinstance(feature_view, OnDemandFeatureView)
Expand Down Expand Up @@ -2502,16 +2512,3 @@ def _validate_data_sources(data_sources: List[DataSource]):
raise DataSourceRepeatNamesException(case_insensitive_ds_name)
else:
ds_names.add(case_insensitive_ds_name)


def _get_feature_view_vector_field_metadata(
feature_view: FeatureView,
) -> Optional[Field]:
vector_fields = [field for field in feature_view.schema if field.vector_index]
if len(vector_fields) > 1:
raise ValueError(
f"Feature view {feature_view.name} has multiple vector fields. Only one vector field per feature view is supported."
)
if not vector_fields:
return None
return vector_fields[0]
9 changes: 9 additions & 0 deletions sdk/python/feast/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class Field:
description: A human-readable description.
tags: User-defined metadata in dictionary form.
vector_index: If set to True the field will be indexed for vector similarity search.
vector_length: The length of the vector if the vector index is set to True.
vector_search_metric: The metric used for vector similarity search.
"""

Expand All @@ -41,6 +42,7 @@ class Field:
description: str
tags: Dict[str, str]
vector_index: bool
vector_length: int
vector_search_metric: Optional[str]

def __init__(
Expand All @@ -51,6 +53,7 @@ def __init__(
description: str = "",
tags: Optional[Dict[str, str]] = None,
vector_index: bool = False,
vector_length: int = 0,
vector_search_metric: Optional[str] = None,
):
"""
Expand All @@ -69,6 +72,7 @@ def __init__(
self.description = description
self.tags = tags or {}
self.vector_index = vector_index
self.vector_length = vector_length
self.vector_search_metric = vector_search_metric

def __eq__(self, other):
Expand All @@ -80,6 +84,7 @@ def __eq__(self, other):
or self.dtype != other.dtype
or self.description != other.description
or self.tags != other.tags
or self.vector_length != other.vector_length
# or self.vector_index != other.vector_index
# or self.vector_search_metric != other.vector_search_metric
):
Expand All @@ -100,6 +105,7 @@ def __repr__(self):
f" description={self.description!r},\n"
f" tags={self.tags!r}\n"
f" vector_index={self.vector_index!r}\n"
f" vector_length={self.vector_length!r}\n"
f" vector_search_metric={self.vector_search_metric!r}\n"
f")"
)
Expand All @@ -117,6 +123,7 @@ def to_proto(self) -> FieldProto:
description=self.description,
tags=self.tags,
vector_index=self.vector_index,
vector_length=self.vector_length,
vector_search_metric=vector_search_metric,
)

Expand All @@ -131,12 +138,14 @@ def from_proto(cls, field_proto: FieldProto):
value_type = ValueType(field_proto.value_type)
vector_search_metric = getattr(field_proto, "vector_search_metric", "")
vector_index = getattr(field_proto, "vector_index", False)
vector_length = getattr(field_proto, "vector_length", 0)
return cls(
name=field_proto.name,
dtype=from_value_type(value_type=value_type),
tags=dict(field_proto.tags),
description=field_proto.description,
vector_index=vector_index,
vector_length=vector_length,
vector_search_metric=vector_search_metric,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
from feast.repo_config import FeastConfigBaseModel
from feast.utils import _build_retrieve_online_document_record, to_naive_utc
from feast.utils import (
_build_retrieve_online_document_record,
_get_feature_view_vector_field_metadata,
to_naive_utc,
)


class ElasticSearchOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig):
Expand Down Expand Up @@ -161,6 +165,10 @@ def create_index(self, config: RepoConfig, table: FeatureView):
config: Feast repo configuration object.
table: FeatureView table for which the index needs to be created.
"""
vector_field_length = getattr(
_get_feature_view_vector_field_metadata(table), "vector_length", 512
)

index_mapping = {
"properties": {
"entity_key": {"type": "binary"},
Expand All @@ -170,7 +178,7 @@ def create_index(self, config: RepoConfig, table: FeatureView):
"created_ts": {"type": "date"},
"vector_value": {
"type": "dense_vector",
"dims": config.online_store.vector_len,
"dims": vector_field_length,
"index": "true",
"similarity": config.online_store.similarity,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def update(
for table in tables_to_keep:
table_name = _table_id(project, table)
if config.online_store.vector_enabled:
vector_value_type = f"vector({config.online_store.vector_len})"
vector_value_type = "vector"
else:
# keep the vector_value_type as BYTEA if pgvector is not enabled, to maintain compatibility
vector_value_type = "BYTEA"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
from feast.repo_config import FeastConfigBaseModel
from feast.utils import _build_retrieve_online_document_record, to_naive_utc
from feast.utils import (
_build_retrieve_online_document_record,
_get_feature_view_vector_field_metadata,
to_naive_utc,
)

SCROLL_SIZE = 1000

Expand Down Expand Up @@ -198,13 +202,17 @@ def create_collection(self, config: RepoConfig, table: FeatureView):
table: FeatureView table for which the index needs to be created.
"""

vector_field_length = getattr(
_get_feature_view_vector_field_metadata(table), "vector_length", 512
)

client: QdrantClient = self._get_client(config)

client.create_collection(
collection_name=table.name,
vectors_config={
config.online_store.vector_name: models.VectorParams(
size=config.online_store.vector_len,
size=vector_field_length,
distance=DISTANCE_MAPPING[config.online_store.similarity.lower()],
)
},
Expand Down
28 changes: 19 additions & 9 deletions sdk/python/feast/infra/online_stores/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from feast.types import FEAST_VECTOR_TYPES, PrimitiveFeastType
from feast.utils import (
_build_retrieve_online_document_record,
_get_feature_view_vector_field_metadata,
_serialize_vector_to_float_list,
to_naive_utc,
)
Expand Down Expand Up @@ -100,7 +101,6 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig):
""" (optional) Path to sqlite db """

vector_enabled: bool = False
vector_len: Optional[int] = None
text_search_enabled: bool = False


Expand Down Expand Up @@ -171,8 +171,13 @@ def online_write_batch(
feature_type_dict.get(feature_name, None)
in FEAST_VECTOR_TYPES
):
vector_field_length = getattr(
_get_feature_view_vector_field_metadata(table),
"vector_length",
512,
)
val_bin = serialize_f32(
val.float_list_val.val, config.online_store.vector_len
val.float_list_val.val, vector_field_length
) # type: ignore
else:
val_bin = feast_value_type_to_python_type(val)
Expand Down Expand Up @@ -354,15 +359,19 @@ def retrieve_online_documents(
conn = self._get_conn(config)
cur = conn.cursor()

vector_field_length = getattr(
_get_feature_view_vector_field_metadata(table), "vector_length", 512
)

# Convert the embedding to a binary format instead of using SerializeToString()
query_embedding_bin = serialize_f32(embedding, config.online_store.vector_len)
query_embedding_bin = serialize_f32(embedding, vector_field_length)
table_name = _table_id(project, table)
vector_field = _get_vector_field(table)

cur.execute(
f"""
CREATE VIRTUAL TABLE vec_table using vec0(
vector_value float[{config.online_store.vector_len}]
vector_value float[{vector_field_length}]
);
"""
)
Expand All @@ -378,7 +387,7 @@ def retrieve_online_documents(
cur.execute(
f"""
CREATE VIRTUAL TABLE IF NOT EXISTS vec_table using vec0(
vector_value float[{config.online_store.vector_len}]
vector_value float[{vector_field_length}]
);
"""
)
Expand Down Expand Up @@ -476,18 +485,19 @@ def retrieve_online_documents_v2(
conn = self._get_conn(config)
cur = conn.cursor()

if online_store.vector_enabled and not online_store.vector_len:
raise ValueError("vector_len is not configured in the online store config")
vector_field_length = getattr(
_get_feature_view_vector_field_metadata(table), "vector_length", 512
)

table_name = _table_id(config.project, table)
vector_field = _get_vector_field(table)

if online_store.vector_enabled:
query_embedding_bin = serialize_f32(query, online_store.vector_len) # type: ignore
query_embedding_bin = serialize_f32(query, vector_field_length) # type: ignore
cur.execute(
f"""
CREATE VIRTUAL TABLE IF NOT EXISTS vec_table using vec0(
vector_value float[{online_store.vector_len}]
vector_value float[{vector_field_length}]
);
"""
)
Expand Down
3 changes: 0 additions & 3 deletions sdk/python/feast/infra/online_stores/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ class VectorStoreConfig:
# This is only applicable for online store.
vector_enabled: Optional[bool] = False

# If vector is enabled, the length of the vector field
vector_len: Optional[int] = 512

# The vector similarity metric to use in KNN search
# It is helpful for vector database that does not support config at retrieval runtime
# E.g.
Expand Down
1 change: 1 addition & 0 deletions sdk/python/feast/on_demand_feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ def from_proto(
name=feature.name,
dtype=from_value_type(ValueType(feature.value_type)),
vector_index=feature.vector_index,
vector_length=feature.vector_length,
vector_search_metric=feature.vector_search_metric,
)
for feature in on_demand_feature_view_proto.spec.features
Expand Down
16 changes: 13 additions & 3 deletions sdk/python/feast/protos/feast/core/Aggregation_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading