Skip to content

Commit b95f441

Browse files
authored
fix: Don't prevent apply from running given duplicate empty names in data sources. Also fix repeated apply of Spark data source. (#2415)
* fix: Print more warning statements on requirement for data sources to have name in future, but don't prevent apply from running if there are duplicate empty data sources. Also attach class type when applying data sources so repeated feast apply commands properly work for Spark Signed-off-by: Danny Chiao <[email protected]> * typo Signed-off-by: Danny Chiao <[email protected]> * typo Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * More tests Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * revert Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]>
1 parent ad5694e commit b95f441

File tree

11 files changed

+68
-32
lines changed

11 files changed

+68
-32
lines changed

protos/feast/core/SavedDataset.proto

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
2424

2525
import "google/protobuf/timestamp.proto";
2626
import "feast/core/DataSource.proto";
27-
import "feast/core/FeatureService.proto";
2827

2928
message SavedDatasetSpec {
3029
// Name of the dataset. Must be unique since it's possible to overwrite dataset by name

sdk/python/feast/cli.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import logging
16+
import warnings
1617
from datetime import datetime
1718
from pathlib import Path
1819
from typing import List, Optional
@@ -151,6 +152,11 @@ def data_source_describe(ctx: click.Context, name: str):
151152
print(e)
152153
exit(1)
153154

155+
warnings.warn(
156+
"Describing data sources will only work properly if all data sources have names or table names specified. "
157+
"Starting Feast 0.21, data source unique names will be required to encourage data source discovery.",
158+
RuntimeWarning,
159+
)
154160
print(
155161
yaml.dump(
156162
yaml.safe_load(str(data_source)), default_flow_style=False, sort_keys=False
@@ -173,6 +179,11 @@ def data_source_list(ctx: click.Context):
173179

174180
from tabulate import tabulate
175181

182+
warnings.warn(
183+
"Listing data sources will only work properly if all data sources have names or table names specified. "
184+
"Starting Feast 0.21, data source unique names will be required to encourage data source discovery",
185+
RuntimeWarning,
186+
)
176187
print(tabulate(table, headers=["NAME", "CLASS"], tablefmt="plain"))
177188

178189

sdk/python/feast/data_source.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from abc import ABC, abstractmethod
1818
from typing import Any, Callable, Dict, Iterable, Optional, Tuple
1919

20+
from google.protobuf.json_format import MessageToJson
21+
2022
from feast import type_map
2123
from feast.data_format import StreamFormat
2224
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
@@ -192,6 +194,9 @@ def __init__(
192194
def __hash__(self):
193195
return hash((id(self), self.name))
194196

197+
def __str__(self):
198+
return str(MessageToJson(self.to_proto()))
199+
195200
def __eq__(self, other):
196201
if not isinstance(other, DataSource):
197202
raise TypeError("Comparisons should only involve DataSource class objects.")

sdk/python/feast/diff/registry_diff.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ def to_string(self):
6060
continue
6161
if feast_object_diff.transition_type == TransitionType.UNCHANGED:
6262
continue
63+
if feast_object_diff.feast_object_type == FeastObjectType.DATA_SOURCE:
64+
# TODO(adchia): Print statements out starting in Feast 0.21
65+
continue
6366
action, color = message_action_map[feast_object_diff.transition_type]
6467
log_string += f"{action} {feast_object_diff.feast_object_type.value} {Style.BRIGHT + color}{feast_object_diff.name}{Style.RESET_ALL}\n"
6568
if feast_object_diff.transition_type == TransitionType.UPDATE:
@@ -78,8 +81,11 @@ def to_string(self):
7881
def tag_objects_for_keep_delete_update_add(
7982
existing_objs: Iterable[FeastObject], desired_objs: Iterable[FeastObject]
8083
) -> Tuple[Set[FeastObject], Set[FeastObject], Set[FeastObject], Set[FeastObject]]:
81-
existing_obj_names = {e.name for e in existing_objs}
82-
desired_obj_names = {e.name for e in desired_objs}
84+
# TODO(adchia): Remove the "if X.name" condition when data sources are forced to have names
85+
existing_obj_names = {e.name for e in existing_objs if e.name}
86+
desired_objs = [obj for obj in desired_objs if obj.name]
87+
existing_objs = [obj for obj in existing_objs if obj.name]
88+
desired_obj_names = {e.name for e in desired_objs if e.name}
8389

8490
objs_to_add = {e for e in desired_objs if e.name not in existing_obj_names}
8591
objs_to_update = {e for e in desired_objs if e.name in existing_obj_names}

sdk/python/feast/feature_store.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2063,14 +2063,17 @@ def _validate_feature_views(feature_views: List[BaseFeatureView]):
20632063
def _validate_data_sources(data_sources: List[DataSource]):
20642064
"""Verify data sources have case-insensitively unique names"""
20652065
ds_names = set()
2066-
for fv in data_sources:
2067-
case_insensitive_ds_name = fv.name.lower()
2066+
for ds in data_sources:
2067+
case_insensitive_ds_name = ds.name.lower()
20682068
if case_insensitive_ds_name in ds_names:
2069-
raise ValueError(
2070-
f"More than one data source with name {case_insensitive_ds_name} found. "
2071-
f"Please ensure that all data source names are case-insensitively unique. "
2072-
f"It may be necessary to ignore certain files in your feature repository by using a .feastignore file."
2073-
)
2069+
if case_insensitive_ds_name.strip():
2070+
warnings.warn(
2071+
f"More than one data source with name {case_insensitive_ds_name} found. "
2072+
f"Please ensure that all data source names are case-insensitively unique. "
2073+
f"It may be necessary to ignore certain files in your feature repository by using a .feastignore "
2074+
f"file. Starting in Feast 0.21, unique names (perhaps inferred from the table name) will be "
2075+
f"required in data sources to encourage data source discovery"
2076+
)
20742077
else:
20752078
ds_names.add(case_insensitive_ds_name)
20762079

sdk/python/feast/infra/offline_stores/bigquery_source.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __init__(
6464
else:
6565
warnings.warn(
6666
(
67-
"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`."
67+
f"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`: {self.query}"
6868
),
6969
DeprecationWarning,
7070
)

sdk/python/feast/infra/offline_stores/redshift_source.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ def __init__(
4141
query (optional): The query to be executed to obtain the features.
4242
name (optional): Name for the source. Defaults to the table_ref if not specified.
4343
"""
44+
# The default Redshift schema is named "public".
45+
_schema = "public" if table and not schema else schema
46+
self.redshift_options = RedshiftOptions(
47+
table=table, schema=_schema, query=query
48+
)
49+
4450
if table is None and query is None:
4551
raise ValueError('No "table" argument provided.')
4652
_name = name
@@ -50,7 +56,8 @@ def __init__(
5056
else:
5157
warnings.warn(
5258
(
53-
"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`."
59+
f"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) "
60+
f"or `table`: {self.query}"
5461
),
5562
DeprecationWarning,
5663
)
@@ -63,13 +70,6 @@ def __init__(
6370
date_partition_column,
6471
)
6572

66-
# The default Redshift schema is named "public".
67-
_schema = "public" if table and not schema else schema
68-
69-
self.redshift_options = RedshiftOptions(
70-
table=table, schema=_schema, query=query
71-
)
72-
7373
@staticmethod
7474
def from_proto(data_source: DataSourceProto):
7575
"""

sdk/python/feast/infra/offline_stores/snowflake_source.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ def __init__(
4444
"""
4545
if table is None and query is None:
4646
raise ValueError('No "table" argument provided.')
47+
# The default Snowflake schema is named "PUBLIC".
48+
_schema = "PUBLIC" if (database and table and not schema) else schema
49+
50+
self.snowflake_options = SnowflakeOptions(
51+
database=database, schema=_schema, table=table, query=query
52+
)
4753

4854
# If no name, use the table as the default name
4955
_name = name
@@ -53,7 +59,8 @@ def __init__(
5359
else:
5460
warnings.warn(
5561
(
56-
"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) or `table`."
62+
f"Starting in Feast 0.21, Feast will require either a name for a data source (if using query) "
63+
f"or `table`: {self.query}"
5764
),
5865
DeprecationWarning,
5966
)
@@ -66,13 +73,6 @@ def __init__(
6673
date_partition_column,
6774
)
6875

69-
# The default Snowflake schema is named "PUBLIC".
70-
_schema = "PUBLIC" if (database and table and not schema) else schema
71-
72-
self.snowflake_options = SnowflakeOptions(
73-
database=database, schema=_schema, table=table, query=query
74-
)
75-
7676
@staticmethod
7777
def from_proto(data_source: DataSourceProto):
7878
"""

sdk/python/feast/registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,11 +314,13 @@ def apply_data_source(
314314
commit: Whether to immediately commit to the registry
315315
"""
316316
registry = self._prepare_registry_for_changes()
317-
318317
for idx, existing_data_source_proto in enumerate(registry.data_sources):
319318
if existing_data_source_proto.name == data_source.name:
320319
del registry.data_sources[idx]
321320
data_source_proto = data_source.to_proto()
321+
data_source_proto.data_source_class_type = (
322+
f"{data_source.__class__.__module__}.{data_source.__class__.__name__}"
323+
)
322324
data_source_proto.project = project
323325
data_source_proto.data_source_class_type = (
324326
f"{data_source.__class__.__module__}.{data_source.__class__.__name__}"

sdk/python/feast/repo_operations.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,8 @@ def extract_objects_for_apply_delete(project, registry, repo):
188188
return (
189189
all_to_apply,
190190
all_to_delete,
191-
set(
192-
objs_to_add[FeastObjectType.FEATURE_VIEW].union(
193-
objs_to_update[FeastObjectType.FEATURE_VIEW]
194-
)
191+
set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union(
192+
set(objs_to_update[FeastObjectType.FEATURE_VIEW])
195193
),
196194
objs_to_delete[FeastObjectType.FEATURE_VIEW],
197195
)

0 commit comments

Comments
 (0)