Skip to content

Commit ec8835e

Browse files
hyperopt fix (#257)
Co-authored-by: Gabriel Hurtado <[email protected]> Co-authored-by: Balazs Kegl <[email protected]>
1 parent 1676d9d commit ec8835e

File tree

4 files changed

+61
-7
lines changed

4 files changed

+61
-7
lines changed

rampwf/hyperopt/hyperopt.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def python_repr(self):
147147
The string representation of the hyperparameter.
148148
"""
149149
repr = '{} = Hyperparameter(\n'.format(self.name)
150-
repr += '\tdtype={}'.format(str(self.dtype))
150+
repr += "\tdtype='{}'".format(str(self.dtype))
151151
repr += ', default={}'.format(self.default_repr)
152152
repr += ', values={})\n'.format(self.values_repr)
153153
return repr
@@ -341,7 +341,8 @@ def next_hyperparameter_indices(self, df_scores, n_folds):
341341
next_value_indices = [
342342
h.get_index(v) for h, v
343343
in zip(self.hyperparameters, next_values)]
344-
fold_i = incomplete_folds.iloc[0]['fold_i'] % n_folds
344+
# for some reason iloc converts int to float
345+
fold_i = int(incomplete_folds.iloc[0]['fold_i']) % n_folds
345346
# Otherwise select hyperparameter values from those that haven't
346347
# been selected yet, using also prior
347348
else:
@@ -461,8 +462,12 @@ def _save_best_model(self):
461462
else:
462463
best_defaults = official_scores.idxmax()
463464
print('Best hyperparameters: ', best_defaults)
464-
for bd, h in zip(best_defaults, self.hyperparameters):
465-
h.set_default(bd)
465+
try:
466+
for bd, h in zip(best_defaults, self.hyperparameters):
467+
h.set_default(bd)
468+
except(TypeError):
469+
# single hyperparameter
470+
self.hyperparameters[0].set_default(best_defaults)
466471
# Overwrite the submission with the best hyperparameter values
467472
write_hyperparameters(
468473
self.submission_dir, self.submission_dir,
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from sklearn.linear_model import LogisticRegression
2+
from sklearn.impute import SimpleImputer
3+
from sklearn.pipeline import Pipeline
4+
from sklearn.base import BaseEstimator
5+
from rampwf.hyperopt import Hyperparameter
6+
7+
# test with only one hyperparameter
8+
# RAMP START HYPERPARAMETERS
9+
logreg_C = Hyperparameter(
10+
dtype='float', default=1.0, values=[0.01, 0.1, 0.9, 1.0])
11+
# RAMP END HYPERPARAMETERS
12+
13+
14+
class Classifier(BaseEstimator):
15+
def __init__(self):
16+
self.clf = Pipeline([
17+
('imputer',
18+
SimpleImputer(strategy='median')),
19+
('classifier', LogisticRegression(C=float(logreg_C)))
20+
])
21+
22+
def fit(self, X, y):
23+
self.clf.fit(X, y)
24+
25+
def predict_proba(self, X):
26+
return self.clf.predict_proba(X)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pandas as pd
2+
3+
4+
class FeatureExtractor():
5+
def __init__(self):
6+
pass
7+
8+
def fit(self, X_df, y):
9+
pass
10+
11+
def transform(self, X_df):
12+
X_df_new = pd.concat(
13+
[X_df.get(['Fare', 'Age', 'SibSp', 'Parch']),
14+
pd.get_dummies(X_df.Sex, prefix='Sex', drop_first=True),
15+
pd.get_dummies(X_df.Pclass, prefix='Pclass', drop_first=True),
16+
pd.get_dummies(
17+
X_df.Embarked, prefix='Embarked', drop_first=True)],
18+
axis=1)
19+
20+
X_df_new = X_df_new.fillna(-1)
21+
XX = X_df_new.values
22+
return XX

rampwf/hyperopt/tests/test_hyperparameter.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,11 @@ def test_hyperparameter():
3535
assert str(e.value) == 'Default must be among values.'
3636

3737

38-
def test_hyperopt():
38+
@pytest.mark.parametrize("submission", ['starting_kit', 'one_hyper_kit'])
39+
def test_hyperopt(submission):
3940
ramp_kit_dir = os.path.join(
4041
PATH, 'interfaces', 'header_in_files', 'titanic')
41-
submission = 'starting_kit'
4242
run_hyperopt(
43-
ramp_kit_dir, ramp_kit_dir, os.path.join(ramp_kit_dir, 'submissions'),
43+
ramp_kit_dir, ramp_kit_dir,
44+
os.path.join(ramp_kit_dir, 'submissions'),
4445
submission, 'random', 64, is_cleanup=True)

0 commit comments

Comments
 (0)