@@ -26,8 +26,20 @@ def convert_sklearn_ordinal_encoder(
26
26
dimension_idx = 0
27
27
28
28
# handle the 'handle_unknown=use_encoded_value' case
29
+ use_float = (
30
+ False
31
+ if ordinal_op .unknown_value is None
32
+ else isinstance (ordinal_op .unknown_value , float )
33
+ or np .isnan (ordinal_op .unknown_value )
34
+ )
29
35
default_value = (
30
- None if ordinal_op .handle_unknown == "error" else int (ordinal_op .unknown_value )
36
+ None
37
+ if ordinal_op .handle_unknown == "error"
38
+ else (
39
+ float (ordinal_op .unknown_value )
40
+ if use_float
41
+ else int (ordinal_op .unknown_value )
42
+ )
31
43
)
32
44
33
45
for categories in ordinal_op .categories_ :
@@ -103,24 +115,28 @@ def convert_sklearn_ordinal_encoder(
103
115
)
104
116
105
117
# hanlde encoded_missing_value
118
+ key = "values_floats" if use_float else "values_int64s"
119
+ dtype = np .float32 if use_float else np .int64
106
120
if not np .isnan (ordinal_op .encoded_missing_value ) and (
107
121
isinstance (categories [- 1 ], float ) and np .isnan (categories [- 1 ])
108
122
):
109
123
# sklearn always places np.nan as the last entry
110
- # in its cathegories if it was in the training data
124
+ # in its categories if it was in the training data
111
125
# => we simply add the 'ordinal_op.encoded_missing_value'
112
126
# as our last entry in 'values_int64s' if it was in the training data
113
127
encoded_missing_value = np .array (
114
128
[int (ordinal_op .encoded_missing_value )]
115
- ).astype (np . int64 )
116
- attrs ["values_int64s" ] = np .concatenate (
117
- (np .arange (len (categories ) - 1 ).astype (np . int64 ), encoded_missing_value )
129
+ ).astype (dtype )
130
+ attrs [key ] = np .concatenate (
131
+ (np .arange (len (categories ) - 1 ).astype (dtype ), encoded_missing_value )
118
132
)
119
133
else :
120
- attrs ["values_int64s" ] = np .arange (len (categories )).astype (np . int64 )
134
+ attrs [key ] = np .arange (len (categories )).astype (dtype )
121
135
122
- if default_value :
123
- attrs ["default_int64" ] = default_value
136
+ if default_value or (
137
+ isinstance (default_value , float ) and np .isnan (default_value )
138
+ ):
139
+ attrs ["default_float" if use_float else "default_int64" ] = default_value
124
140
125
141
result .append (scope .get_unique_variable_name ("ordinal_output" ))
126
142
label_encoder_output = scope .get_unique_variable_name ("label_encoder" )
0 commit comments