41
41
42
42
import numpy as np
43
43
44
- from neuraxle .base import BaseStep , ExecutionContext , ForceHandleMixin , ExecutionPhase , _HasChildrenMixin , \
45
- LOGGER_FORMAT , DATE_FORMAT
44
+ from neuraxle .base import BaseStep , ExecutionContext , ForceHandleMixin , ExecutionPhase , _HasChildrenMixin
46
45
from neuraxle .data_container import DataContainer
47
46
from neuraxle .hyperparams .space import HyperparameterSamples , HyperparameterSpace
48
47
from neuraxle .metaopt .callbacks import BaseCallback , CallbackList , ScoringCallback
54
53
class HyperparamsRepository (_Observable [Tuple ['HyperparamsRepository' , Trial ]], ABC ):
55
54
"""
56
55
Hyperparams repository that saves hyperparams, and scores for every AutoML trial.
56
+ Cache folder can be changed to do different round numbers.
57
57
58
58
.. seealso::
59
59
:class:`AutoML`,
@@ -66,10 +66,15 @@ class HyperparamsRepository(_Observable[Tuple['HyperparamsRepository', Trial]],
66
66
:class:`~neuraxle.hyperparams.space.HyperparameterSamples`
67
67
"""
68
68
69
- def __init__ (self , hyperparameter_selection_strategy = None , cache_folder = None , best_retrained_model_folder = None ):
69
+ def __init__ (
70
+ self ,
71
+ hyperparameter_selection_strategy : 'BaseHyperparameterSelectionStrategy' = None ,
72
+ cache_folder : str = None ,
73
+ best_retrained_model_folder : str = None ,
74
+ ):
70
75
super ().__init__ ()
71
76
if cache_folder is None :
72
- cache_folder = ' trials'
77
+ cache_folder = os . path . join ( f' { self . __class__ . __name__ } ' , ' trials')
73
78
if best_retrained_model_folder is None :
74
79
best_retrained_model_folder = os .path .join (cache_folder , 'best' )
75
80
self .best_retrained_model_folder = best_retrained_model_folder
@@ -155,7 +160,7 @@ def save_best_model(self, step: BaseStep):
155
160
self ._save_best_model (step , trial_hash )
156
161
return step
157
162
158
- def new_trial (self , auto_ml_container : 'AutoMLContainer' ):
163
+ def new_trial (self , auto_ml_container : 'AutoMLContainer' ) -> Trial :
159
164
"""
160
165
Create a new trial with the best next hyperparams.
161
166
@@ -164,19 +169,16 @@ def new_trial(self, auto_ml_container: 'AutoMLContainer'):
164
169
:return: trial
165
170
"""
166
171
hyperparams = self .hyperparameter_selection_strategy .find_next_best_hyperparams (auto_ml_container )
167
- logger = self ._create_logger_for_trial (auto_ml_container .trial_number )
168
- logger .info ('\n new trial: {}' .format (json .dumps (hyperparams .to_nested_dict (), sort_keys = True , indent = 4 )))
169
172
170
173
trial = Trial (
174
+ trial_number = auto_ml_container .trial_number ,
171
175
hyperparams = hyperparams ,
172
176
save_trial_function = self .save_trial ,
173
- logger = logger ,
174
177
cache_folder = self .cache_folder ,
175
178
main_metric_name = auto_ml_container .main_scoring_metric_name
176
179
)
177
180
return trial
178
181
179
-
180
182
def _get_trial_hash (self , hp_dict ):
181
183
"""
182
184
Hash hyperparams with md5 to create a trial hash.
@@ -187,19 +189,6 @@ def _get_trial_hash(self, hp_dict):
187
189
current_hyperparameters_hash = hashlib .md5 (str .encode (str (hp_dict ))).hexdigest ()
188
190
return current_hyperparameters_hash
189
191
190
- def _create_logger_for_trial (self , trial_number ) -> logging .Logger :
191
-
192
- os .makedirs (self .cache_folder , exist_ok = True )
193
-
194
- logfile_path = os .path .join (self .cache_folder , f"trial_{ trial_number } .log" )
195
- logger_name = f"trial_{ trial_number } "
196
- logger = logging .getLogger (logger_name )
197
- formatter = logging .Formatter (fmt = LOGGER_FORMAT , datefmt = DATE_FORMAT )
198
- file_handler = logging .FileHandler (filename = logfile_path )
199
- file_handler .setFormatter (formatter )
200
- logger .addHandler (file_handler )
201
- return logger
202
-
203
192
204
193
class InMemoryHyperparamsRepository (HyperparamsRepository ):
205
194
"""
@@ -329,14 +318,14 @@ def _save_trial(self, trial: 'Trial'):
329
318
# Sleeping to have a valid time difference between files when reloading them to sort them by creation time:
330
319
time .sleep (0.1 )
331
320
332
- def new_trial (self , auto_ml_container : 'AutoMLContainer' ):
321
+ def new_trial (self , auto_ml_container : 'AutoMLContainer' ) -> Trial :
333
322
"""
334
323
Create new hyperperams trial json file.
335
324
336
325
:param auto_ml_container: auto ml container
337
326
:return:
338
327
"""
339
- trial = HyperparamsRepository .new_trial (self , auto_ml_container )
328
+ trial : Trial = HyperparamsRepository .new_trial (self , auto_ml_container )
340
329
self ._save_trial (trial )
341
330
342
331
return trial
@@ -346,6 +335,7 @@ def load_all_trials(self, status: 'TRIAL_STATUS' = None) -> 'Trials':
346
335
Load all hyperparameter trials with their corresponding score.
347
336
Reads all the saved trial json files, sorted by creation date.
348
337
338
+ :param status: (optional) filter to select only trials with this status.
349
339
:return: (hyperparams, scores)
350
340
"""
351
341
trials = Trials ()
@@ -370,7 +360,8 @@ def getmtimens(filename):
370
360
if status is None or trial_json ['status' ] == status .value :
371
361
trials .append (Trial .from_json (
372
362
update_trial_function = self .save_trial ,
373
- trial_json = trial_json
363
+ trial_json = trial_json ,
364
+ cache_folder = self .cache_folder
374
365
))
375
366
376
367
return trials
@@ -498,7 +489,14 @@ def __init__(
498
489
hyperparams_repository = InMemoryHyperparamsRepository ()
499
490
self .hyperparams_repository : HyperparamsRepository = hyperparams_repository
500
491
501
- def train (self , pipeline : BaseStep , data_inputs , expected_outputs = None , context : ExecutionContext = None ) -> Trial :
492
+ def train (
493
+ self ,
494
+ pipeline : BaseStep ,
495
+ data_inputs ,
496
+ expected_outputs = None ,
497
+ context : ExecutionContext = None ,
498
+ trial_number = 0
499
+ ) -> Trial :
502
500
"""
503
501
Train pipeline using the validation splitter.
504
502
Track training, and validation metrics for each epoch.
@@ -523,12 +521,12 @@ def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None, context:
523
521
logger = context .logger ,
524
522
hyperparams = pipeline .get_hyperparams (),
525
523
main_metric_name = self .get_main_metric_name (),
526
- save_trial_function = self .hyperparams_repository .save_trial
524
+ save_trial_function = self .hyperparams_repository .save_trial ,
525
+ trial_number = trial_number
527
526
)
528
527
529
528
self .execute_trial (
530
529
pipeline = pipeline ,
531
- trial_number = 1 ,
532
530
repo_trial = repo_trial ,
533
531
context = context ,
534
532
validation_splits = validation_splits ,
@@ -541,7 +539,6 @@ def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None, context:
541
539
def execute_trial (
542
540
self ,
543
541
pipeline : BaseStep ,
544
- trial_number : int ,
545
542
repo_trial : Trial ,
546
543
context : ExecutionContext ,
547
544
validation_splits : List [Tuple [DataContainer , DataContainer ]],
@@ -576,7 +573,7 @@ def execute_trial(
576
573
repo_trial = repo_trial ,
577
574
repo_trial_split_number = repo_trial_split .split_number ,
578
575
validation_splits = validation_splits ,
579
- trial_number = trial_number ,
576
+ trial_number = repo_trial . trial_number ,
580
577
n_trial = n_trial
581
578
)
582
579
@@ -867,7 +864,6 @@ def _attempt_trial(self, trial_number, validation_splits, context: ExecutionCont
867
864
repo_trial_split = self .trainer .execute_trial (
868
865
pipeline = self .pipeline ,
869
866
context = context ,
870
- trial_number = trial_number ,
871
867
repo_trial = repo_trial ,
872
868
validation_splits = validation_splits ,
873
869
n_trial = self .n_trial
@@ -1153,8 +1149,9 @@ class ValidationSplitter(BaseValidationSplitter):
1153
1149
def __init__ (self , test_size : float ):
1154
1150
self .test_size = test_size
1155
1151
1156
- def split (self , data_inputs , current_ids = None , expected_outputs = None , context : ExecutionContext = None ) -> Tuple [
1157
- List , List , List , List ]:
1152
+ def split (
1153
+ self , data_inputs , current_ids = None , expected_outputs = None , context : ExecutionContext = None
1154
+ ) -> Tuple [List , List , List , List ]:
1158
1155
train_data_inputs , train_expected_outputs , train_current_ids , validation_data_inputs , validation_expected_outputs , validation_current_ids = validation_split (
1159
1156
test_size = self .test_size ,
1160
1157
data_inputs = data_inputs ,
0 commit comments