14
14
import copy
15
15
import warnings
16
16
from datetime import datetime , timedelta
17
- from typing import Dict , List , Optional , Tuple , Type
17
+ from typing import Dict , List , Optional , Tuple , Type , Union
18
18
19
19
from google .protobuf .duration_pb2 import Duration
20
20
from google .protobuf .message import Message
@@ -90,6 +90,7 @@ class FeatureView(BaseFeatureView):
90
90
ttl : Optional [timedelta ]
91
91
batch_source : DataSource
92
92
stream_source : Optional [DataSource ]
93
+ source_views : Optional [List ["FeatureView" ]]
93
94
entity_columns : List [Field ]
94
95
features : List [Field ]
95
96
online : bool
@@ -103,7 +104,8 @@ def __init__(
103
104
self ,
104
105
* ,
105
106
name : str ,
106
- source : DataSource ,
107
+ source : Union [DataSource , "FeatureView" , List ["FeatureView" ]],
108
+ sink_source : Optional [DataSource ] = None ,
107
109
schema : Optional [List [Field ]] = None ,
108
110
entities : Optional [List [Entity ]] = None ,
109
111
ttl : Optional [timedelta ] = timedelta (days = 0 ),
@@ -144,22 +146,45 @@ def __init__(
144
146
self .ttl = ttl
145
147
schema = schema or []
146
148
147
- # Initialize data sources.
149
+ # Normalize source
150
+ self .stream_source = None
151
+ self .data_source : Optional [DataSource ] = None
152
+ self .source_views : List [FeatureView ] = []
153
+
154
+ if isinstance (source , DataSource ):
155
+ self .data_source = source
156
+ elif isinstance (source , FeatureView ):
157
+ self .source_views = [source ]
158
+ elif isinstance (source , list ) and all (
159
+ isinstance (sv , FeatureView ) for sv in source
160
+ ):
161
+ self .source_views = source
162
+ else :
163
+ raise TypeError (
164
+ "source must be a DataSource, a FeatureView, or a list of FeatureView."
165
+ )
166
+
167
+ # Set up stream, batch and derived view sources
148
168
if (
149
- isinstance (source , PushSource )
150
- or isinstance (source , KafkaSource )
151
- or isinstance (source , KinesisSource )
169
+ isinstance (self . data_source , PushSource )
170
+ or isinstance (self . data_source , KafkaSource )
171
+ or isinstance (self . data_source , KinesisSource )
152
172
):
153
- self .stream_source = source
154
- if not source .batch_source :
173
+ # Stream source definition
174
+ self .stream_source = self .data_source
175
+ if not self .data_source .batch_source :
155
176
raise ValueError (
156
- f"A batch_source needs to be specified for stream source `{ source .name } `"
177
+ f"A batch_source needs to be specified for stream source `{ self . data_source .name } `"
157
178
)
158
- else :
159
- self .batch_source = source .batch_source
179
+ self .batch_source = self .data_source .batch_source
180
+ elif self .data_source :
181
+ # Batch source definition
182
+ self .batch_source = self .data_source
160
183
else :
161
- self .stream_source = None
162
- self .batch_source = source
184
+ # Derived view source definition
185
+ if not sink_source :
186
+ raise ValueError ("Derived FeatureView must specify `sink_source`." )
187
+ self .batch_source = sink_source
163
188
164
189
# Initialize features and entity columns.
165
190
features : List [Field ] = []
@@ -201,25 +226,26 @@ def __init__(
201
226
)
202
227
203
228
# TODO(felixwang9817): Add more robust validation of features.
204
- cols = [field .name for field in schema ]
205
- for col in cols :
206
- if (
207
- self .batch_source .field_mapping is not None
208
- and col in self .batch_source .field_mapping .keys ()
209
- ):
210
- raise ValueError (
211
- f"The field { col } is mapped to { self .batch_source .field_mapping [col ]} for this data source. "
212
- f"Please either remove this field mapping or use { self .batch_source .field_mapping [col ]} as the "
213
- f"Entity or Feature name."
214
- )
229
+ if self .batch_source is not None :
230
+ cols = [field .name for field in schema ]
231
+ for col in cols :
232
+ if (
233
+ self .batch_source .field_mapping is not None
234
+ and col in self .batch_source .field_mapping .keys ()
235
+ ):
236
+ raise ValueError (
237
+ f"The field { col } is mapped to { self .batch_source .field_mapping [col ]} for this data source. "
238
+ f"Please either remove this field mapping or use { self .batch_source .field_mapping [col ]} as the "
239
+ f"Entity or Feature name."
240
+ )
215
241
216
242
super ().__init__ (
217
243
name = name ,
218
244
features = features ,
219
245
description = description ,
220
246
tags = tags ,
221
247
owner = owner ,
222
- source = source ,
248
+ source = self . batch_source ,
223
249
)
224
250
self .online = online
225
251
self .offline = offline
@@ -348,13 +374,18 @@ def to_proto(self) -> FeatureViewProto:
348
374
meta = self .to_proto_meta ()
349
375
ttl_duration = self .get_ttl_duration ()
350
376
351
- batch_source_proto = self .batch_source .to_proto ()
352
- batch_source_proto .data_source_class_type = f"{ self .batch_source .__class__ .__module__ } .{ self .batch_source .__class__ .__name__ } "
377
+ batch_source_proto = None
378
+ if self .batch_source :
379
+ batch_source_proto = self .batch_source .to_proto ()
380
+ batch_source_proto .data_source_class_type = f"{ self .batch_source .__class__ .__module__ } .{ self .batch_source .__class__ .__name__ } "
353
381
354
382
stream_source_proto = None
355
383
if self .stream_source :
356
384
stream_source_proto = self .stream_source .to_proto ()
357
385
stream_source_proto .data_source_class_type = f"{ self .stream_source .__class__ .__module__ } .{ self .stream_source .__class__ .__name__ } "
386
+ source_view_protos = None
387
+ if self .source_views :
388
+ source_view_protos = [view .to_proto ().spec for view in self .source_views ]
358
389
spec = FeatureViewSpecProto (
359
390
name = self .name ,
360
391
entities = self .entities ,
@@ -368,6 +399,7 @@ def to_proto(self) -> FeatureViewProto:
368
399
offline = self .offline ,
369
400
batch_source = batch_source_proto ,
370
401
stream_source = stream_source_proto ,
402
+ source_views = source_view_protos ,
371
403
)
372
404
373
405
return FeatureViewProto (spec = spec , meta = meta )
@@ -403,12 +435,21 @@ def from_proto(cls, feature_view_proto: FeatureViewProto):
403
435
Returns:
404
436
A FeatureViewProto object based on the feature view protobuf.
405
437
"""
406
- batch_source = DataSource .from_proto (feature_view_proto .spec .batch_source )
438
+ batch_source = (
439
+ DataSource .from_proto (feature_view_proto .spec .batch_source )
440
+ if feature_view_proto .spec .HasField ("batch_source" )
441
+ else None
442
+ )
407
443
stream_source = (
408
444
DataSource .from_proto (feature_view_proto .spec .stream_source )
409
445
if feature_view_proto .spec .HasField ("stream_source" )
410
446
else None
411
447
)
448
+ source_views = [
449
+ FeatureView .from_proto (FeatureViewProto (spec = view_spec , meta = None ))
450
+ for view_spec in feature_view_proto .spec .source_views
451
+ ]
452
+
412
453
feature_view = cls (
413
454
name = feature_view_proto .spec .name ,
414
455
description = feature_view_proto .spec .description ,
@@ -421,7 +462,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto):
421
462
if feature_view_proto .spec .ttl .ToNanoseconds () == 0
422
463
else feature_view_proto .spec .ttl .ToTimedelta ()
423
464
),
424
- source = batch_source ,
465
+ source = batch_source if batch_source else source_views ,
425
466
)
426
467
if stream_source :
427
468
feature_view .stream_source = stream_source
0 commit comments