1
1
import logging
2
2
from collections import Counter
3
- from itertools import chain
3
+ from itertools import chain , permutations
4
4
from typing import Any , Dict , List , NamedTuple , Optional , Set , Tuple , Union
5
5
6
6
import numpy as np
@@ -283,52 +283,57 @@ def _init_params(self) -> None:
283
283
# Build the mask over O^{-1}
284
284
self ._build_mask ()
285
285
286
- def _get_conditional_probs (self , source : Optional [ int ] = None ) -> np .ndarray :
287
- r"""Return the full conditional probabilities table.
286
+ def _get_conditional_probs (self , mu : np . ndarray ) -> np .ndarray :
287
+ r"""Return the estimated conditional probabilities table given parameters mu .
288
288
289
- In cond. prob. table, row i*(k+1) + ly is the conditional probabilities of source i
290
- emmiting label ly (including abstains 0), conditioned on different
291
- values of Y, i.e.:
289
+ Given a parameter vector mu, return the estimated conditional probabilites
290
+ table cprobs, where cprobs is an (m, k+1, k)-dim np.ndarray with:
292
291
293
- c_probs[i*(k+1) + ly, y ] = P(\lambda_i = ly | Y = y )
292
+ cprobs[i, j, k ] = P(\lf_i = j-1 | Y = k )
294
293
295
- Note that this simply involves inferring the kth row by law of total
296
- probability and adding in to mu.
297
-
298
- If ``source`` is not None, returns only the corresponding block.
294
+ where m is the number of LFs, k is the cardinality, and cprobs includes the
295
+ conditional abstain probabilities P(\lf_i = -1 | Y = y).
299
296
300
297
Parameters
301
298
----------
302
- source
303
- Index of source to generate conditional probabilities for, by default None
299
+ mu
300
+ An [m * k, k] np.ndarray with entries in [0, 1]
304
301
305
302
Returns
306
303
-------
307
304
np.ndarray
308
- Conditional probabilities table if source is None, else corresponding block
305
+ An [m, k + 1, k] np.ndarray conditional probabilities table.
309
306
"""
310
- c_probs = np .zeros ((self .m * (self .cardinality + 1 ), self .cardinality ))
311
- mu = self .mu .detach ().clone ().numpy ()
312
-
307
+ cprobs = np .zeros ((self .m , self .cardinality + 1 , self .cardinality ))
313
308
for i in range (self .m ):
314
309
# si = self.c_data[(i,)]['start_index']
315
310
# ei = self.c_data[(i,)]['end_index']
316
311
# mu_i = mu[si:ei, :]
317
312
mu_i = mu [i * self .cardinality : (i + 1 ) * self .cardinality , :]
318
- c_probs [
319
- i * (self .cardinality + 1 ) + 1 : (i + 1 ) * (self .cardinality + 1 ), :
320
- ] = mu_i
313
+ cprobs [i , 1 :, :] = mu_i
321
314
322
315
# The 0th row (corresponding to abstains) is the difference between
323
- # the sums of the other rows and one, by law of total prob
324
- c_probs [i * (self .cardinality + 1 ), :] = 1 - mu_i .sum (axis = 0 )
316
+ # the sums of the other rows and one, by law of total probability
317
+ cprobs [i , 0 , :] = 1 - mu_i .sum (axis = 0 )
318
+ return cprobs
325
319
326
- if source is not None :
327
- return c_probs [
328
- source * (self .cardinality + 1 ) : (source + 1 ) * (self .cardinality + 1 )
329
- ]
330
- else :
331
- return c_probs
320
+ def get_conditional_probs (self ) -> np .ndarray :
321
+ r"""Return the estimated conditional probabilities table.
322
+
323
+ Return the estimated conditional probabilites table cprobs, where cprobs is an
324
+ (m, k+1, k)-dim np.ndarray with:
325
+
326
+ cprobs[i, j, k] = P(\lf_i = j-1 | Y = k)
327
+
328
+ where m is the number of LFs, k is the cardinality, and cprobs includes the
329
+ conditional abstain probabilities P(\lf_i = -1 | Y = y).
330
+
331
+ Returns
332
+ -------
333
+ np.ndarray
334
+ An [m, k + 1, k] np.ndarray conditional probabilities table.
335
+ """
336
+ return self ._get_conditional_probs (self .mu .detach ().numpy ())
332
337
333
338
def get_weights (self ) -> np .ndarray :
334
339
"""Return the vector of learned LF weights for combining LFs.
@@ -347,10 +352,9 @@ def get_weights(self) -> np.ndarray:
347
352
array([0.99, 0.99, 0.99])
348
353
"""
349
354
accs = np .zeros (self .m )
355
+ cprobs = self .get_conditional_probs ()
350
356
for i in range (self .m ):
351
- cps = self ._get_conditional_probs (source = i )[1 :, :]
352
- accs [i ] = np .diag (cps @ self .P .numpy ()).sum ()
353
-
357
+ accs [i ] = np .diag (cprobs [i , 1 :, :] @ self .P .numpy ()).sum ()
354
358
return np .clip (accs / self .coverage , 1e-6 , 1.0 )
355
359
356
360
def predict_proba (self , L : np .ndarray ) -> np .ndarray :
@@ -379,7 +383,7 @@ def predict_proba(self, L: np.ndarray) -> np.ndarray:
379
383
L_shift = L + 1 # convert to {0, 1, ..., k}
380
384
self ._set_constants (L_shift )
381
385
L_aug = self ._get_augmented_label_matrix (L_shift )
382
- mu = self .mu .detach ().clone (). numpy ()
386
+ mu = self .mu .detach ().numpy ()
383
387
jtm = np .ones (L_aug .shape [1 ])
384
388
385
389
# Note: We omit abstains, effectively assuming uniform distribution here
@@ -706,6 +710,96 @@ def _update_lr_scheduler(self, step: int) -> None:
706
710
if min_lr and self .optimizer .param_groups [0 ]["lr" ] < min_lr :
707
711
self .optimizer .param_groups [0 ]["lr" ] = min_lr
708
712
713
+ def _clamp_params (self ) -> None :
714
+ """Clamp the values of the learned parameter vector.
715
+
716
+ Clamp the entries of self.mu to be in [mu_eps, 1 - mu_eps], where mu_eps is
717
+ either set by the user, or defaults to 1 / 10 ** np.ceil(np.log10(self.n)).
718
+
719
+ Note that if mu_eps is set too high, e.g. in sparse settings where LFs
720
+ mostly abstain, this will result in learning conditional probabilities all
721
+ equal to mu_eps (and/or 1 - mu_eps)! See issue #1422.
722
+
723
+ Note: Use user-provided value of mu_eps in train_config, else default to
724
+ mu_eps = 1 / 10 ** np.ceil(np.log10(self.n))
725
+ this rounding is done to make it more obvious when the parameters have been
726
+ clamped.
727
+ """
728
+ if self .train_config .mu_eps is not None :
729
+ mu_eps = self .train_config .mu_eps
730
+ else :
731
+ mu_eps = min (0.01 , 1 / 10 ** np .ceil (np .log10 (self .n )))
732
+ self .mu .data = self .mu .clamp (mu_eps , 1 - mu_eps ) # type: ignore
733
+
734
+ def _count_accurate_lfs (self , mu : np .ndarray ) -> int :
735
+ r"""Count the number of LFs that are estimated to be better than random.
736
+
737
+ Return the number of LFs are estimated to be more accurate than not when not
738
+ abstaining, i.e., where
739
+
740
+ P(\lf = Y) > P(\lf != Y, \lf != -1).
741
+
742
+ Parameters
743
+ ----------
744
+ mu
745
+ An [m * k, k] np.ndarray with entries in [0, 1]
746
+
747
+ Returns
748
+ -------
749
+ int
750
+ Number of LFs better than random
751
+ """
752
+ P = self .P .numpy ()
753
+ cprobs = self ._get_conditional_probs (mu )
754
+ count = 0
755
+ for i in range (self .m ):
756
+ probs = cprobs [i , 1 :] @ P
757
+ if 2 * np .diagonal (probs ).sum () - probs .sum () > 0 :
758
+ count += 1
759
+ return count
760
+
761
+ def _break_col_permutation_symmetry (self ) -> None :
762
+ r"""Heuristically choose amongst (possibly) several valid mu values.
763
+
764
+ If there are several values of mu that equivalently satisfy the optimization
765
+ objective, as there often are due to column permutation symmetries, then pick
766
+ the solution that trusts the user-written LFs most.
767
+
768
+ In more detail, suppose that mu satisfies (minimizes) the two loss objectives:
769
+ 1. O = mu @ P @ mu.T
770
+ 2. diag(O) = sum(mu @ P, axis=1)
771
+ Then any column permutation matrix Z that commutes with P will also equivalently
772
+ satisfy these objectives, and thus is an equally valid (symmetric) solution.
773
+ Therefore, we select the solution where the most LFs are estimated to be more
774
+ accurate than not when not abstaining, i.e., where for the majority of LFs,
775
+
776
+ P(\lf = Y) > P(\lf != Y, \lf != -1).
777
+
778
+ This is the standard assumption we have made in algorithmic and theoretical
779
+ work to date. Note however that this is not the only possible heuristic /
780
+ assumption that we could use, and in practice this may require further
781
+ iteration here.
782
+ """
783
+ mu = self .mu .detach ().numpy ()
784
+ P = self .P .numpy ()
785
+ d , k = mu .shape
786
+
787
+ # Iterate through the possible perumation matrices and track heuristic scores
788
+ Zs = []
789
+ scores = []
790
+ for idxs in permutations (range (k )):
791
+ Z = np .eye (k )[:, idxs ]
792
+ Zs .append (Z )
793
+
794
+ # If Z and P commute, get heuristic score, else skip
795
+ if np .allclose (Z @ P , P @ Z ):
796
+ scores .append (self ._count_accurate_lfs (mu @ Z ))
797
+ else :
798
+ scores .append (- 1 )
799
+
800
+ # Set mu according to highest-scoring permutation
801
+ self .mu .data = torch .Tensor (mu @ Zs [np .argmax (scores )]) # type: ignore
802
+
709
803
def fit (
710
804
self ,
711
805
L_train : np .ndarray ,
@@ -816,18 +910,9 @@ def fit(
816
910
# Update learning rate
817
911
self ._update_lr_scheduler (epoch )
818
912
819
- # Clamp learned parameters
820
- # Note: If mu_eps is set too high, e.g. in sparse settings where LFs
821
- # mostly abstain, this will result in learning conditional probabilities all
822
- # equal to mu_eps (and/or 1 - mu_eps)!
823
- # Note: Use user-provided value, else default to 1 / n', where n' is n rounded
824
- # to the closest power of ten; this rounding is done to make it more obvious
825
- # when the parameters have been clamped.
826
- if self .train_config .mu_eps is not None :
827
- mu_eps = self .train_config .mu_eps
828
- else :
829
- mu_eps = min (0.01 , 1 / 10 ** np .ceil (np .log10 (self .n )))
830
- self .mu .data = self .mu .clamp (mu_eps , 1 - mu_eps ) # type: ignore
913
+ # Post-processing operations on mu
914
+ self ._clamp_params ()
915
+ self ._break_col_permutation_symmetry ()
831
916
832
917
# Return model to eval mode
833
918
self .eval ()
0 commit comments