Skip to content

Commit a66d890

Browse files
authored
feat: Added ray example template (#5570)
Signed-off-by: ntkathole <[email protected]>
1 parent 9ab32e5 commit a66d890

File tree

12 files changed

+758
-64
lines changed

12 files changed

+758
-64
lines changed

sdk/python/feast/cli/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List
379379
"ikv",
380380
"couchbase",
381381
"milvus",
382+
"ray",
382383
],
383384
case_sensitive=False,
384385
),

sdk/python/feast/infra/compute_engines/ray/compute.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
RayDAGRetrievalJob,
2525
RayMaterializationJob,
2626
)
27+
from feast.infra.compute_engines.ray.utils import write_to_online_store
2728
from feast.infra.offline_stores.offline_store import RetrievalJob
2829
from feast.infra.registry.base_registry import BaseRegistry
2930

@@ -203,11 +204,12 @@ def _materialize_from_offline_store(
203204
arrow_table = retrieval_job.to_arrow()
204205

205206
# Write to online store if enabled
206-
if getattr(feature_view, "online", False):
207-
# TODO: Implement proper online store writing with correct data format conversion
208-
logger.debug(
209-
"Online store writing not implemented yet for Ray compute engine"
210-
)
207+
write_to_online_store(
208+
arrow_table=arrow_table,
209+
feature_view=feature_view,
210+
online_store=self.online_store,
211+
repo_config=self.repo_config,
212+
)
211213

212214
# Write to offline store if enabled (this handles sink_source automatically for derived views)
213215
if getattr(feature_view, "offline", False):

sdk/python/feast/infra/compute_engines/ray/nodes.py

Lines changed: 43 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
from feast.infra.compute_engines.dag.node import DAGNode
1919
from feast.infra.compute_engines.dag.value import DAGValue
2020
from feast.infra.compute_engines.ray.config import RayComputeEngineConfig
21+
from feast.infra.compute_engines.ray.utils import (
22+
safe_batch_processor,
23+
write_to_online_store,
24+
)
2125
from feast.infra.compute_engines.utils import create_offline_store_retrieval_job
2226
from feast.infra.ray_shared_utils import (
2327
apply_field_mapping,
@@ -149,9 +153,8 @@ def execute(self, context: ExecutionContext) -> DAGValue:
149153
feature_df = feature_dataset.to_pandas()
150154
feature_ref = ray.put(feature_df)
151155

156+
@safe_batch_processor
152157
def join_with_aggregated_features(batch: pd.DataFrame) -> pd.DataFrame:
153-
if batch.empty:
154-
return batch
155158
features = ray.get(feature_ref)
156159
if join_keys:
157160
result = pd.merge(
@@ -226,10 +229,9 @@ def execute(self, context: ExecutionContext) -> DAGValue:
226229
input_value.assert_format(DAGFormat.RAY)
227230
dataset: Dataset = input_value.data
228231

232+
@safe_batch_processor
229233
def apply_filters(batch: pd.DataFrame) -> pd.DataFrame:
230234
"""Apply TTL and custom filters to the batch."""
231-
if batch.empty:
232-
return batch
233235

234236
filtered_batch = batch.copy()
235237

@@ -447,11 +449,9 @@ def execute(self, context: ExecutionContext) -> DAGValue:
447449
input_value.assert_format(DAGFormat.RAY)
448450
dataset: Dataset = input_value.data
449451

452+
@safe_batch_processor
450453
def deduplicate_batch(batch: pd.DataFrame) -> pd.DataFrame:
451454
"""Remove duplicates from the batch."""
452-
if batch.empty:
453-
return batch
454-
455455
# Get deduplication keys
456456
join_keys = self.column_info.join_keys
457457
timestamp_col = self.column_info.timestamp_column
@@ -518,27 +518,21 @@ def execute(self, context: ExecutionContext) -> DAGValue:
518518
elif callable(self.transformation):
519519
transformation_serialized = dill.dumps(self.transformation)
520520

521+
@safe_batch_processor
521522
def apply_transformation_with_serialized_udf(
522523
batch: pd.DataFrame,
523524
) -> pd.DataFrame:
524525
"""Apply the transformation using pre-serialized UDF."""
525-
if batch.empty:
526-
return batch
527-
528-
try:
529-
if transformation_serialized:
530-
transformation_func = dill.loads(transformation_serialized)
531-
transformed_batch = transformation_func(batch)
532-
else:
533-
logger.warning(
534-
"No serialized transformation available, returning original batch"
535-
)
536-
transformed_batch = batch
526+
if transformation_serialized:
527+
transformation_func = dill.loads(transformation_serialized)
528+
transformed_batch = transformation_func(batch)
529+
else:
530+
logger.warning(
531+
"No serialized transformation available, returning original batch"
532+
)
533+
transformed_batch = batch
537534

538-
return transformed_batch
539-
except Exception as e:
540-
logger.error(f"Transformation failed: {e}")
541-
return batch
535+
return transformed_batch
542536

543537
transformed_dataset = dataset.map_batches(
544538
apply_transformation_with_serialized_udf, batch_format="pandas"
@@ -645,46 +639,36 @@ def execute(self, context: ExecutionContext) -> DAGValue:
645639
feature_view=self.feature_view, repo_config=context.repo_config
646640
)
647641

642+
@safe_batch_processor
648643
def write_batch_with_serialized_artifacts(batch: pd.DataFrame) -> pd.DataFrame:
649644
"""Write each batch using pre-serialized artifacts."""
650-
if batch.empty:
651-
return batch
652-
653-
try:
654-
(
655-
feature_view,
656-
online_store,
657-
offline_store,
658-
repo_config,
659-
) = serialized_artifacts.unserialize()
660-
661-
arrow_table = pa.Table.from_pandas(batch)
662-
663-
# Write to online store if enabled
664-
if getattr(feature_view, "online", False):
665-
# TODO: Implement proper online store writing with correct data format conversion
666-
logger.debug(
667-
"Online store writing not implemented yet for Ray compute engine"
668-
)
669-
670-
# Write to offline store if enabled
671-
if getattr(feature_view, "offline", False):
672-
try:
673-
offline_store.offline_write_batch(
674-
config=repo_config,
675-
feature_view=feature_view,
676-
table=arrow_table,
677-
progress=lambda x: None,
678-
)
679-
except Exception as e:
680-
logger.error(f"Failed to write to offline store: {e}")
681-
raise
645+
(
646+
feature_view,
647+
online_store,
648+
offline_store,
649+
repo_config,
650+
) = serialized_artifacts.unserialize()
651+
652+
arrow_table = pa.Table.from_pandas(batch)
653+
654+
# Write to online store if enabled
655+
write_to_online_store(
656+
arrow_table=arrow_table,
657+
feature_view=feature_view,
658+
online_store=online_store,
659+
repo_config=repo_config,
660+
)
682661

683-
return batch
662+
# Write to offline store if enabled
663+
if getattr(feature_view, "offline", False):
664+
offline_store.offline_write_batch(
665+
config=repo_config,
666+
feature_view=feature_view,
667+
table=arrow_table,
668+
progress=lambda x: None,
669+
)
684670

685-
except Exception as e:
686-
logger.error(f"Write operation failed: {e}")
687-
raise
671+
return batch
688672

689673
written_dataset = dataset.map_batches(
690674
write_batch_with_serialized_artifacts, batch_format="pandas"
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""
2+
Utility functions for Ray compute engine.
3+
"""
4+
5+
import logging
6+
from typing import Callable, Dict, Union
7+
8+
import pandas as pd
9+
import pyarrow as pa
10+
11+
from feast.batch_feature_view import BatchFeatureView
12+
from feast.feature_view import FeatureView
13+
from feast.infra.online_stores.online_store import OnlineStore
14+
from feast.repo_config import RepoConfig
15+
from feast.stream_feature_view import StreamFeatureView
16+
from feast.utils import _convert_arrow_to_proto
17+
from feast.value_type import ValueType
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
def write_to_online_store(
23+
arrow_table: pa.Table,
24+
feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView],
25+
online_store: OnlineStore,
26+
repo_config: RepoConfig,
27+
) -> None:
28+
"""
29+
Writes Arrow table data to the online store.
30+
31+
Args:
32+
arrow_table: Arrow table containing the data to write
33+
feature_view: Feature view being materialized
34+
online_store: Online store instance
35+
repo_config: Repository configuration
36+
"""
37+
if not getattr(feature_view, "online", False):
38+
return
39+
40+
try:
41+
join_key_to_value_type: Dict[str, ValueType] = {}
42+
if hasattr(feature_view, "entity_columns") and feature_view.entity_columns:
43+
join_key_to_value_type = {
44+
entity.name: entity.dtype.to_value_type()
45+
for entity in feature_view.entity_columns
46+
}
47+
48+
rows_to_write = _convert_arrow_to_proto(
49+
arrow_table, feature_view, join_key_to_value_type
50+
)
51+
52+
if rows_to_write:
53+
online_store.online_write_batch(
54+
config=repo_config,
55+
table=feature_view,
56+
data=rows_to_write,
57+
progress=lambda x: None,
58+
)
59+
logger.debug(
60+
f"Successfully wrote {len(rows_to_write)} rows to online store for {feature_view.name}"
61+
)
62+
else:
63+
logger.warning(f"No rows to write for {feature_view.name}")
64+
65+
except Exception as e:
66+
logger.error(f"Failed to write to online store for {feature_view.name}: {e}")
67+
68+
69+
def safe_batch_processor(
70+
func: Callable[[pd.DataFrame], pd.DataFrame],
71+
) -> Callable[[pd.DataFrame], pd.DataFrame]:
72+
"""
73+
Decorator for batch processing functions that handles empty batches and errors gracefully.
74+
75+
Args:
76+
func: Function that processes a pandas DataFrame batch
77+
78+
Returns:
79+
Wrapped function that handles empty batches and exceptions
80+
"""
81+
82+
def wrapper(batch: pd.DataFrame) -> pd.DataFrame:
83+
# Handle empty batches
84+
if batch.empty:
85+
return batch
86+
87+
try:
88+
return func(batch)
89+
except Exception as e:
90+
logger.error(f"Batch processing failed in {func.__name__}: {e}")
91+
return batch
92+
93+
return wrapper
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Feast Ray Template
2+
3+
This template demonstrates Feast's Ray integration, showcasing both the **Ray Offline Store** and **Ray Compute Engine** capabilities for distributed feature processing.
4+
5+
## What's Included
6+
7+
```
8+
ray_template/
9+
├── feature_repo/
10+
│ ├── feature_store.yaml # Ray offline store + compute engine config
11+
│ ├── example_repo.py # Feature definitions with Ray optimizations
12+
│ ├── test_workflow.py # Demo script showing Ray capabilities
13+
│ └── data/ # Sample datasets (generated by bootstrap)
14+
│ ├── driver_stats.parquet
15+
│ └── customer_daily_profile.parquet
16+
└── README.md # This file
17+
```
18+
19+
20+
## Getting Started
21+
22+
1. **Initialize the template**:
23+
```bash
24+
feast init -t ray my_ray_project
25+
cd my_ray_project/feature_repo
26+
```
27+
28+
2. **Install Ray dependencies**:
29+
```bash
30+
pip install feast[ray]
31+
```
32+
33+
3. **Apply feature definitions**:
34+
```bash
35+
feast apply
36+
```
37+
38+
4. **Run the demo**:
39+
```bash
40+
python test_workflow.py
41+
```

sdk/python/feast/templates/ray/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)