8
8
from dask import dataframe as dd
9
9
10
10
from snorkel .labeling import LFApplier , PandasLFApplier , labeling_function
11
+ from snorkel .labeling .apply .core import ApplierMetadata
11
12
from snorkel .labeling .apply .dask import DaskLFApplier , PandasParallelLFApplier
12
13
from snorkel .preprocess import preprocessor
13
14
from snorkel .preprocess .nlp import SpacyPreprocessor
@@ -59,8 +60,14 @@ def g_np(x: DataPoint, db: List[int]) -> int:
59
60
return 0 if x [1 ] in db else - 1
60
61
61
62
63
+ @labeling_function ()
64
+ def f_bad (x : DataPoint ) -> int :
65
+ return 0 if x .mum > 42 else - 1
66
+
67
+
62
68
DATA = [3 , 43 , 12 , 9 , 3 ]
63
69
L_EXPECTED = np .array ([[- 1 , 0 ], [0 , - 1 ], [- 1 , - 1 ], [- 1 , 0 ], [- 1 , 0 ]])
70
+ L_EXPECTED_BAD = np .array ([[- 1 , - 1 ], [0 , - 1 ], [- 1 , - 1 ], [- 1 , - 1 ], [- 1 , - 1 ]])
64
71
L_PREPROCESS_EXPECTED = np .array ([[- 1 , - 1 ], [0 , 0 ], [- 1 , 0 ], [- 1 , 0 ], [- 1 , - 1 ]])
65
72
66
73
TEXT_DATA = ["Jane" , "Jane plays soccer." , "Jane plays soccer." ]
@@ -75,6 +82,22 @@ def test_lf_applier(self) -> None:
75
82
np .testing .assert_equal (L , L_EXPECTED )
76
83
L = applier .apply (data_points , progress_bar = True )
77
84
np .testing .assert_equal (L , L_EXPECTED )
85
+ L , meta = applier .apply (data_points , return_meta = True )
86
+ np .testing .assert_equal (L , L_EXPECTED )
87
+ self .assertEqual (meta , ApplierMetadata (dict ()))
88
+
89
+ def test_lf_applier_fault (self ) -> None :
90
+ data_points = [SimpleNamespace (num = num ) for num in DATA ]
91
+ applier = LFApplier ([f , f_bad ])
92
+ with self .assertRaises (AttributeError ):
93
+ applier .apply (data_points , progress_bar = False )
94
+ L = applier .apply (data_points , progress_bar = False , fault_tolerant = True )
95
+ np .testing .assert_equal (L , L_EXPECTED_BAD )
96
+ L , meta = applier .apply (
97
+ data_points , progress_bar = False , fault_tolerant = True , return_meta = True
98
+ )
99
+ np .testing .assert_equal (L , L_EXPECTED_BAD )
100
+ self .assertEqual (meta , ApplierMetadata (dict (f_bad = 5 )))
78
101
79
102
def test_lf_applier_preprocessor (self ) -> None :
80
103
data_points = [SimpleNamespace (num = num ) for num in DATA ]
@@ -121,6 +144,22 @@ def test_lf_applier_pandas(self) -> None:
121
144
np .testing .assert_equal (L , L_EXPECTED )
122
145
L = applier .apply (df , progress_bar = True )
123
146
np .testing .assert_equal (L , L_EXPECTED )
147
+ L , meta = applier .apply (df , return_meta = True )
148
+ np .testing .assert_equal (L , L_EXPECTED )
149
+ self .assertEqual (meta , ApplierMetadata (dict ()))
150
+
151
+ def test_lf_applier_pandas_fault (self ) -> None :
152
+ df = pd .DataFrame (dict (num = DATA ))
153
+ applier = PandasLFApplier ([f , f_bad ])
154
+ with self .assertRaises (AttributeError ):
155
+ applier .apply (df , progress_bar = False )
156
+ L = applier .apply (df , progress_bar = False , fault_tolerant = True )
157
+ np .testing .assert_equal (L , L_EXPECTED_BAD )
158
+ L , meta = applier .apply (
159
+ df , progress_bar = False , fault_tolerant = True , return_meta = True
160
+ )
161
+ np .testing .assert_equal (L , L_EXPECTED_BAD )
162
+ self .assertEqual (meta , ApplierMetadata (dict (f_bad = 5 )))
124
163
125
164
def test_lf_applier_pandas_preprocessor (self ) -> None :
126
165
df = pd .DataFrame (dict (num = DATA ))
@@ -189,6 +228,15 @@ def test_lf_applier_dask(self) -> None:
189
228
L = applier .apply (df )
190
229
np .testing .assert_equal (L , L_EXPECTED )
191
230
231
+ def test_lf_applier_dask_fault (self ) -> None :
232
+ df = pd .DataFrame (dict (num = DATA ))
233
+ df = dd .from_pandas (df , npartitions = 2 )
234
+ applier = DaskLFApplier ([f , f_bad ])
235
+ with self .assertRaises (Exception ):
236
+ applier .apply (df )
237
+ L = applier .apply (df , fault_tolerant = True )
238
+ np .testing .assert_equal (L , L_EXPECTED_BAD )
239
+
192
240
def test_lf_applier_dask_preprocessor (self ) -> None :
193
241
df = pd .DataFrame (dict (num = DATA ))
194
242
df = dd .from_pandas (df , npartitions = 2 )
0 commit comments