automl · mfeurer · Dec 6, 2018 · May 3, 2018 · May 8, 2018 · May 9, 2018
diff --git a/autosklearn/ensembles/ensemble_selection.py b/autosklearn/ensembles/ensemble_selection.py
@@ -209,10 +209,23 @@ def _bagging(self, predictions, labels, fraction=0.5, n_bags=20):
         return np.array(order_of_each_bag)
 
     def predict(self, predictions):
-        non_null_weights = (weight for  weight in self.weights_ if weight > 0)
-        for i, weight in enumerate(non_null_weights):
-            predictions[i] *= weight
-        return np.sum(predictions, axis=0)
+        predictions = np.asarray(predictions)
+
+        # if predictions.shape[0] == len(self.weights_),
+        # predictions include those of zero-weight models.
+        if predictions.shape[0] == len(self.weights_):
+            return np.average(predictions, axis=0, weights=self.weights_)
+
+        # if prediction model.shape[0] == len(non_null_weights),
+        # predictions do not include those of zero-weight models.
+        elif predictions.shape[0] == np.count_nonzero(self.weights_):
+            non_null_weights = [w for w in self.weights_ if w > 0]
+            return np.average(predictions, axis=0, weights=non_null_weights)
+
+        # If none of the above applies, then something must have gone wrong.
+        else:
+            raise ValueError("The dimensions of ensemble predictions"
+                             " and ensemble weights do not match!")
 
     def __str__(self):
         return 'Ensemble Selection:\n\tTrajectory: %s\n\tMembers: %s' \

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 from sklearn.base import BaseEstimator
+import numpy as np
 
 from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
 from autosklearn.util.backend import create
@@ -486,6 +487,9 @@ def fit(self, X, y,
             raise ValueError("classification with data of type %s is"
                              " not supported" % target_type)
 
+        # remember target type for using in predict_proba later.
+        self.target_type = target_type
+
         super().fit(
             X=X,
             y=y,
@@ -527,9 +531,25 @@ def predict_proba(self, X, batch_size=None, n_jobs=1):
             The predicted class probabilities.
 
         """
-        return super().predict_proba(
+        pred_proba = super().predict_proba(
             X, batch_size=batch_size, n_jobs=n_jobs)
 
+        # Check if all probabilities sum up to 1.
+        # Assert only if target type is not multilabel-indicator.
+        if self.target_type not in ['multilabel-indicator']:
+            assert(
+                np.allclose(
+                    np.sum(pred_proba, axis=1),
+                    np.ones_like(pred_proba[:, 0]))
+            ), "prediction probability does not sum up to 1!"
+
+        # Check that all probability values lie between 0 and 1.
+        assert(
+            (pred_proba >= 0).all() and (pred_proba <= 1).all()
+        ), "found prediction probability value outside of [0, 1]!"
+
+        return pred_proba
+
     def _get_automl_class(self):
         return AutoMLClassifier
 

diff --git a/test/test_ensemble_builder/test_ensemble.py b/test/test_ensemble_builder/test_ensemble.py
@@ -6,14 +6,14 @@
 import unittest
 import unittest.mock
 
+from autosklearn.metrics import roc_auc, accuracy
+from autosklearn.ensembles.ensemble_selection import EnsembleSelection
+from autosklearn.ensemble_builder import EnsembleBuilder, Y_VALID, Y_TEST
 import numpy as np
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
 
-from autosklearn.ensemble_builder import EnsembleBuilder, Y_ENSEMBLE, Y_VALID, Y_TEST
-from autosklearn.metrics import roc_auc
-
 
 class BackendMock(object):
 
@@ -260,3 +260,68 @@ def testLimit(self):
 
         # it should try to reduce ensemble_nbest until it also failed at 2
         self.assertEqual(ensbuilder.ensemble_nbest,1)
+
+
+class EnsembleSelectionTest(unittest.TestCase):
+    def testPredict(self):
+        # Test that ensemble prediction applies weights correctly to given
+        # predictions. There are two possible cases:
+        # 1) predictions.shape[0] == len(self.weights_). In this case,
+        # predictions include those made by zero-weighted models. Therefore,
+        # we simply apply each weights to the corresponding model preds.
+        # 2) predictions.shape[0] < len(self.weights_). In this case,
+        # predictions exclude those made by zero-weighted models. Therefore,
+        # we first exclude all occurrences of zero in self.weights_, and then
+        # apply the weights.
+        # If none of the above is the case, predict() raises Error.
+        ensemble = EnsembleSelection(ensemble_size=3,
+                                     task_type=1,
+                                     metric=accuracy,
+                                     )
+        # Test for case 1. Create (3, 2, 2) predictions.
+        per_model_pred = np.array([
+            [[0.9, 0.1],
+             [0.4, 0.6]],
+            [[0.8, 0.2],
+             [0.3, 0.7]],
+            [[1.0, 0.0],
+             [0.1, 0.9]]
+        ])
+        # Weights of 3 hypothetical models
+        ensemble.weights_ = [0.7, 0.2, 0.1]
+        pred = ensemble.predict(per_model_pred)
+        truth = np.array([[0.89, 0.11],  # This should be the true prediction.
+                          [0.35, 0.65]])
+        self.assertTrue(np.allclose(pred, truth))
+
+        # Test for case 2.
+        per_model_pred = np.array([
+            [[0.9, 0.1],
+             [0.4, 0.6]],
+            [[0.8, 0.2],
+             [0.3, 0.7]],
+            [[1.0, 0.0],
+             [0.1, 0.9]]
+        ])
+        # The third model now has weight of zero.
+        ensemble.weights_ = [0.7, 0.2, 0.0, 0.1]
+        pred = ensemble.predict(per_model_pred)
+        truth = np.array([[0.89, 0.11],
+                          [0.35, 0.65]])
+        self.assertTrue(np.allclose(pred, truth))
+
+        # Test for error case.
+        per_model_pred = np.array([
+            [[0.9, 0.1],
+             [0.4, 0.6]],
+            [[0.8, 0.2],
+             [0.3, 0.7]],
+            [[1.0, 0.0],
+             [0.1, 0.9]]
+        ])
+        # Now the weights have 2 zero weights and 2 non-zero weights,
+        # which is incompatible.
+        ensemble.weights_ = [0.6, 0.0, 0.0, 0.4]
+
+        with self.assertRaises(ValueError):
+            ensemble.predict(per_model_pred)