Skip to content

Commit dcd8ec9

Browse files
chore: Clean up push source tests (#2912)
* Delete unnecessary unit test Signed-off-by: Felix Wang <[email protected]> * Clean up python feature server test Signed-off-by: Felix Wang <[email protected]> * Clean up push source offline retrieval test Signed-off-by: Felix Wang <[email protected]> * Clean up push source online retrieval test Signed-off-by: Felix Wang <[email protected]> * Clean up offline write tests Signed-off-by: Felix Wang <[email protected]> * Add back reorder columns test for offline write Signed-off-by: Felix Wang <[email protected]> * Rename create_dataset Signed-off-by: Felix Wang <[email protected]> * Add SFV back into online retrieval test Signed-off-by: Felix Wang <[email protected]>
1 parent adf3212 commit dcd8ec9

File tree

9 files changed

+103
-214
lines changed

9 files changed

+103
-214
lines changed

sdk/python/tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from feast import FeatureStore
2828
from feast.wait import wait_retry_backoff
29-
from tests.data.data_creator import create_dataset
29+
from tests.data.data_creator import create_basic_driver_dataset
3030
from tests.integration.feature_repos.integration_test_repo_config import (
3131
IntegrationTestRepoConfig,
3232
)
@@ -351,7 +351,7 @@ def universal_data_sources(environment) -> TestData:
351351

352352
@pytest.fixture
353353
def e2e_data_sources(environment: Environment):
354-
df = create_dataset()
354+
df = create_basic_driver_dataset()
355355
data_source = environment.data_source_creator.create_data_source(
356356
df, environment.feature_store.project, field_mapping={"ts_1": "ts"},
357357
)

sdk/python/tests/data/data_creator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from feast.types import FeastType, Float32, Int32, Int64, String
88

99

10-
def create_dataset(
10+
def create_basic_driver_dataset(
1111
entity_type: FeastType = Int32,
1212
feature_dtype: str = None,
1313
feature_is_list: bool = False,

sdk/python/tests/integration/e2e/test_python_feature_server.py

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,8 @@
77

88
from feast.feast_object import FeastObject
99
from feast.feature_server import get_app
10-
from tests.integration.feature_repos.integration_test_repo_config import (
11-
IntegrationTestRepoConfig,
12-
)
1310
from tests.integration.feature_repos.repo_configuration import (
14-
construct_test_environment,
1511
construct_universal_feature_views,
16-
construct_universal_test_data,
1712
)
1813
from tests.integration.feature_repos.universal.entities import (
1914
customer,
@@ -63,16 +58,13 @@ def test_get_online_features(python_fs_client):
6358
@pytest.mark.integration
6459
@pytest.mark.universal_online_stores
6560
def test_push(python_fs_client):
66-
# TODO(felixwang9817): Note that we choose an entity value of 102 here since it is not included
67-
# in the existing range of entity values (1-49). This allows us to push data for this test
68-
# without affecting other tests. This decision is tech debt, and should be resolved by finding a
69-
# better way to isolate data sources across tests.
61+
initial_temp = get_temperatures(python_fs_client, location_ids=[1])[0]
7062
json_data = json.dumps(
7163
{
7264
"push_source_name": "location_stats_push_source",
7365
"df": {
74-
"location_id": [102],
75-
"temperature": [4],
66+
"location_id": [1],
67+
"temperature": [initial_temp * 100],
7668
"event_timestamp": [str(datetime.utcnow())],
7769
"created": [str(datetime.utcnow())],
7870
},
@@ -82,7 +74,7 @@ def test_push(python_fs_client):
8274

8375
# Check new pushed temperature is fetched
8476
assert response.status_code == 200
85-
assert get_temperatures(python_fs_client, location_ids=[102]) == [4]
77+
assert get_temperatures(python_fs_client, location_ids=[1]) == [initial_temp * 100]
8678

8779

8880
def get_temperatures(client, location_ids: List[int]):
@@ -102,20 +94,14 @@ def get_temperatures(client, location_ids: List[int]):
10294

10395

10496
@pytest.fixture
105-
def python_fs_client(request):
106-
config = IntegrationTestRepoConfig()
107-
environment = construct_test_environment(config, fixture_request=request)
97+
def python_fs_client(environment, universal_data_sources, request):
10898
fs = environment.feature_store
109-
try:
110-
entities, datasets, data_sources = construct_universal_test_data(environment)
111-
feature_views = construct_universal_feature_views(data_sources)
112-
feast_objects: List[FeastObject] = []
113-
feast_objects.extend(feature_views.values())
114-
feast_objects.extend([driver(), customer(), location()])
115-
fs.apply(feast_objects)
116-
fs.materialize(environment.start_date, environment.end_date)
117-
client = TestClient(get_app(fs))
118-
yield client
119-
finally:
120-
fs.teardown()
121-
environment.data_source_creator.teardown()
99+
entities, datasets, data_sources = universal_data_sources
100+
feature_views = construct_universal_feature_views(data_sources)
101+
feast_objects: List[FeastObject] = []
102+
feast_objects.extend(feature_views.values())
103+
feast_objects.extend([driver(), customer(), location()])
104+
fs.apply(feast_objects)
105+
fs.materialize(environment.start_date, environment.end_date)
106+
client = TestClient(get_app(fs))
107+
yield client

sdk/python/tests/integration/offline_store/test_offline_write.py

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,52 +7,54 @@
77

88
from feast import FeatureView, Field
99
from feast.types import Float32, Int32
10+
from tests.integration.feature_repos.repo_configuration import (
11+
construct_universal_feature_views,
12+
)
1013
from tests.integration.feature_repos.universal.entities import driver
1114

12-
# TODO(felixwang9817): Add a unit test that checks that write_to_offline_store can reorder columns.
13-
# This should only happen after https://github.com/feast-dev/feast/issues/2797 is fixed.
14-
1515

1616
@pytest.mark.integration
1717
@pytest.mark.universal_offline_stores
18-
@pytest.mark.universal_online_stores(only=["sqlite"])
19-
def test_writing_incorrect_schema_fails(environment, universal_data_sources):
20-
"""Tests that writing a dataframe with an incorrect schema fails."""
18+
def test_reorder_columns(environment, universal_data_sources):
19+
"""Tests that a dataframe with columns in the wrong order is reordered."""
2120
store = environment.feature_store
2221
_, _, data_sources = universal_data_sources
23-
driver_entity = driver()
24-
driver_stats = FeatureView(
25-
name="driver_stats",
26-
entities=[driver_entity],
27-
schema=[
28-
Field(name="avg_daily_trips", dtype=Int32),
29-
Field(name="conv_rate", dtype=Float32),
30-
Field(name="acc_rate", dtype=Float32),
31-
],
32-
source=data_sources.driver,
33-
)
22+
feature_views = construct_universal_feature_views(data_sources)
23+
driver_fv = feature_views.driver
24+
store.apply([driver(), driver_fv])
3425

3526
now = datetime.utcnow()
3627
ts = pd.Timestamp(now).round("ms")
3728

38-
entity_df = pd.DataFrame.from_dict(
39-
{"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts]}
29+
# This dataframe has columns in the wrong order.
30+
df_to_write = pd.DataFrame.from_dict(
31+
{
32+
"avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
33+
"created": [ts, ts],
34+
"conv_rate": [random.random(), random.random()],
35+
"event_timestamp": [ts, ts],
36+
"acc_rate": [random.random(), random.random()],
37+
"driver_id": [1001, 1001],
38+
},
4039
)
4140

42-
store.apply([driver_entity, driver_stats])
43-
df = store.get_historical_features(
44-
entity_df=entity_df,
45-
features=[
46-
"driver_stats:conv_rate",
47-
"driver_stats:acc_rate",
48-
"driver_stats:avg_daily_trips",
49-
],
50-
full_feature_names=False,
51-
).to_df()
41+
store.write_to_offline_store(
42+
driver_fv.name, df_to_write, allow_registry_cache=False
43+
)
5244

53-
assert df["conv_rate"].isnull().all()
54-
assert df["acc_rate"].isnull().all()
55-
assert df["avg_daily_trips"].isnull().all()
45+
46+
@pytest.mark.integration
47+
@pytest.mark.universal_offline_stores
48+
def test_writing_incorrect_schema_fails(environment, universal_data_sources):
49+
"""Tests that writing a dataframe with an incorrect schema fails."""
50+
store = environment.feature_store
51+
_, _, data_sources = universal_data_sources
52+
feature_views = construct_universal_feature_views(data_sources)
53+
driver_fv = feature_views.driver
54+
store.apply([driver(), driver_fv])
55+
56+
now = datetime.utcnow()
57+
ts = pd.Timestamp(now).round("ms")
5658

5759
expected_df = pd.DataFrame.from_dict(
5860
{
@@ -65,13 +67,12 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
6567
)
6668
with pytest.raises(ValueError):
6769
store.write_to_offline_store(
68-
driver_stats.name, expected_df, allow_registry_cache=False
70+
driver_fv.name, expected_df, allow_registry_cache=False
6971
)
7072

7173

7274
@pytest.mark.integration
7375
@pytest.mark.universal_offline_stores
74-
@pytest.mark.universal_online_stores(only=["sqlite"])
7576
def test_writing_consecutively_to_offline_store(environment, universal_data_sources):
7677
store = environment.feature_store
7778
_, _, data_sources = universal_data_sources
@@ -96,7 +97,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
9697
entity_df = pd.DataFrame.from_dict(
9798
{
9899
"driver_id": [1001, 1001],
99-
"event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
100+
"event_timestamp": [ts + timedelta(hours=3), ts + timedelta(hours=4)],
100101
}
101102
)
102103

@@ -117,7 +118,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
117118

118119
first_df = pd.DataFrame.from_dict(
119120
{
120-
"event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
121+
"event_timestamp": [ts + timedelta(hours=3), ts + timedelta(hours=4)],
121122
"driver_id": [1001, 1001],
122123
"conv_rate": [random.random(), random.random()],
123124
"acc_rate": [random.random(), random.random()],
@@ -155,7 +156,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
155156

156157
second_df = pd.DataFrame.from_dict(
157158
{
158-
"event_timestamp": [ts - timedelta(hours=1), ts],
159+
"event_timestamp": [ts + timedelta(hours=5), ts + timedelta(hours=6)],
159160
"driver_id": [1001, 1001],
160161
"conv_rate": [random.random(), random.random()],
161162
"acc_rate": [random.random(), random.random()],
@@ -172,10 +173,10 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
172173
{
173174
"driver_id": [1001, 1001, 1001, 1001],
174175
"event_timestamp": [
175-
ts - timedelta(hours=4),
176-
ts - timedelta(hours=3),
177-
ts - timedelta(hours=1),
178-
ts,
176+
ts + timedelta(hours=3),
177+
ts + timedelta(hours=4),
178+
ts + timedelta(hours=5),
179+
ts + timedelta(hours=6),
179180
],
180181
}
181182
)

sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py renamed to sdk/python/tests/integration/offline_store/test_push_offline.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,30 @@
88
from tests.integration.feature_repos.repo_configuration import (
99
construct_universal_feature_views,
1010
)
11-
from tests.integration.feature_repos.universal.entities import (
12-
customer,
13-
driver,
14-
location,
15-
)
11+
from tests.integration.feature_repos.universal.entities import location
1612

1713

1814
@pytest.mark.integration
1915
@pytest.mark.universal_offline_stores
20-
@pytest.mark.universal_online_stores(only=["sqlite"])
21-
def test_push_features_and_read_from_offline_store(environment, universal_data_sources):
16+
def test_push_features_and_read(environment, universal_data_sources):
2217
store = environment.feature_store
23-
24-
(_, _, data_sources) = universal_data_sources
18+
_, _, data_sources = universal_data_sources
2519
feature_views = construct_universal_feature_views(data_sources)
26-
now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
20+
location_fv = feature_views.pushed_locations
21+
store.apply([location(), location_fv])
2722

28-
store.apply([driver(), customer(), location(), *feature_views.values()])
29-
entity_df = pd.DataFrame.from_dict({"location_id": [100], "event_timestamp": [now]})
23+
now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
24+
entity_df = pd.DataFrame.from_dict({"location_id": [1], "event_timestamp": [now]})
3025

3126
before_df = store.get_historical_features(
3227
entity_df=entity_df,
3328
features=["pushable_location_stats:temperature"],
3429
full_feature_names=False,
3530
).to_df()
3631

37-
# TODO(felixwang9817): Note that we choose an entity value of 100 here since it is not included
38-
# in the existing range of entity values (1-49). This allows us to push data for this test
39-
# without affecting other tests. This decision is tech debt, and should be resolved by finding a
40-
# better way to isolate data sources across tests.
4132
data = {
4233
"event_timestamp": [now],
43-
"location_id": [100],
34+
"location_id": [1],
4435
"temperature": [4],
4536
"created": [now],
4637
}

sdk/python/tests/integration/online_store/test_push_online_retrieval.py renamed to sdk/python/tests/integration/online_store/test_push_online.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,20 @@
66
from tests.integration.feature_repos.repo_configuration import (
77
construct_universal_feature_views,
88
)
9-
from tests.integration.feature_repos.universal.entities import (
10-
customer,
11-
driver,
12-
location,
13-
)
9+
from tests.integration.feature_repos.universal.entities import location
1410

1511

1612
@pytest.mark.integration
1713
@pytest.mark.universal_online_stores
1814
def test_push_features_and_read(environment, universal_data_sources):
1915
store = environment.feature_store
20-
21-
(_, datasets, data_sources) = universal_data_sources
16+
_, _, data_sources = universal_data_sources
2217
feature_views = construct_universal_feature_views(data_sources)
18+
location_fv = feature_views.pushed_locations
19+
store.apply([location(), location_fv])
2320

24-
store.apply([driver(), customer(), location(), *feature_views.values()])
25-
26-
# TODO(felixwang9817): Note that we choose an entity value of 101 here since it is not included
27-
# in the existing range of entity values (1-49). This allows us to push data for this test
28-
# without affecting other tests. This decision is tech debt, and should be resolved by finding a
29-
# better way to isolate data sources across tests.
3021
data = {
31-
"location_id": [101],
22+
"location_id": [1],
3223
"temperature": [4],
3324
"event_timestamp": [pd.Timestamp(datetime.datetime.utcnow()).round("ms")],
3425
"created": [pd.Timestamp(datetime.datetime.utcnow()).round("ms")],
@@ -39,8 +30,8 @@ def test_push_features_and_read(environment, universal_data_sources):
3930

4031
online_resp = store.get_online_features(
4132
features=["pushable_location_stats:temperature"],
42-
entity_rows=[{"location_id": 101}],
33+
entity_rows=[{"location_id": 1}],
4334
)
4435
online_resp_dict = online_resp.to_dict()
45-
assert online_resp_dict["location_id"] == [101]
36+
assert online_resp_dict["location_id"] == [1]
4637
assert online_resp_dict["temperature"] == [4]

0 commit comments

Comments
 (0)