Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 148 additions & 172 deletions autosklearn/automl.py

Large diffs are not rendered by default.

210 changes: 102 additions & 108 deletions autosklearn/smbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from autosklearn.metalearning.mismbo import suggest_via_metalearning
from autosklearn.util.logging_ import get_named_client_logger
from autosklearn.util.parallel import preload_modules
from autosklearn.util.stopwatch import StopWatch

EXCLUDE_META_FEATURES_CLASSIFICATION = {
"Landmark1NN",
Expand Down Expand Up @@ -86,59 +87,67 @@ def _send_warnings_to_log(message, category, filename, lineno, file, line):

# metalearning helpers
def _calculate_metafeatures(
data_feat_type, data_info_task, basename, x_train, y_train, watcher, logger_
data_feat_type,
data_info_task,
basename,
x_train,
y_train,
stopwatch: StopWatch,
logger_,
):
with warnings.catch_warnings():
warnings.showwarning = get_send_warnings_to_logger(logger_)

# == Calculate metafeatures
task_name = "CalculateMetafeatures"
watcher.start_task(task_name)

categorical = {
col: True if feat_type.lower() in {"categorical", "string"} else False
for col, feat_type in data_feat_type.items()
}
with stopwatch.time("Calculate meta-features") as task_timer:

EXCLUDE_META_FEATURES = (
EXCLUDE_META_FEATURES_CLASSIFICATION
if data_info_task in CLASSIFICATION_TASKS
else EXCLUDE_META_FEATURES_REGRESSION
)
categorical = {
col: True if feat_type.lower() in {"categorical", "string"} else False
for col, feat_type in data_feat_type.items()
}

if data_info_task in [
MULTICLASS_CLASSIFICATION,
BINARY_CLASSIFICATION,
MULTILABEL_CLASSIFICATION,
REGRESSION,
MULTIOUTPUT_REGRESSION,
]:
logger_.info("Start calculating metafeatures for %s", basename)
result = calculate_all_metafeatures_with_labels(
x_train,
y_train,
categorical=categorical,
dataset_name=basename,
dont_calculate=EXCLUDE_META_FEATURES,
logger=logger_,
EXCLUDE_META_FEATURES = (
EXCLUDE_META_FEATURES_CLASSIFICATION
if data_info_task in CLASSIFICATION_TASKS
else EXCLUDE_META_FEATURES_REGRESSION
)
for key in list(result.metafeature_values.keys()):
if result.metafeature_values[key].type_ != "METAFEATURE":
del result.metafeature_values[key]

else:
result = None
logger_.info("Metafeatures not calculated")
watcher.stop_task(task_name)
logger_.info(
"Calculating Metafeatures (categorical attributes) took %5.2f",
watcher.wall_elapsed(task_name),
)
if data_info_task in [
MULTICLASS_CLASSIFICATION,
BINARY_CLASSIFICATION,
MULTILABEL_CLASSIFICATION,
REGRESSION,
MULTIOUTPUT_REGRESSION,
]:
logger_.info("Start calculating metafeatures for %s", basename)
result = calculate_all_metafeatures_with_labels(
x_train,
y_train,
categorical=categorical,
dataset_name=basename,
dont_calculate=EXCLUDE_META_FEATURES,
logger=logger_,
)
for key in list(result.metafeature_values.keys()):
if result.metafeature_values[key].type_ != "METAFEATURE":
del result.metafeature_values[key]

else:
result = None
logger_.info("Metafeatures not calculated")

logger_.info(f"{task_timer.name} took {task_timer.wall_duration:5.2f}"),
return result


def _calculate_metafeatures_encoded(
data_feat_type, basename, x_train, y_train, watcher, task, logger_
data_feat_type,
basename,
x_train,
y_train,
stopwatch: StopWatch,
task,
logger_,
):
with warnings.catch_warnings():
warnings.showwarning = get_send_warnings_to_logger(logger_)
Expand All @@ -149,29 +158,25 @@ def _calculate_metafeatures_encoded(
else EXCLUDE_META_FEATURES_REGRESSION
)

task_name = "CalculateMetafeaturesEncoded"
watcher.start_task(task_name)
categorical = {
col: True if feat_type.lower() in {"categorical", "string"} else False
for col, feat_type in data_feat_type.items()
}
with stopwatch.time("Calculate meta-features encoded") as task_timer:
categorical = {
col: True if feat_type.lower() in {"categorical", "string"} else False
for col, feat_type in data_feat_type.items()
}

result = calculate_all_metafeatures_encoded_labels(
x_train,
y_train,
categorical=categorical,
dataset_name=basename,
dont_calculate=EXCLUDE_META_FEATURES,
logger=logger_,
)
for key in list(result.metafeature_values.keys()):
if result.metafeature_values[key].type_ != "METAFEATURE":
del result.metafeature_values[key]
watcher.stop_task(task_name)
logger_.info(
"Calculating Metafeatures (encoded attributes) took %5.2fsec",
watcher.wall_elapsed(task_name),
)
result = calculate_all_metafeatures_encoded_labels(
x_train,
y_train,
categorical=categorical,
dataset_name=basename,
dont_calculate=EXCLUDE_META_FEATURES,
logger=logger_,
)
for key in list(result.metafeature_values.keys()):
if result.metafeature_values[key].type_ != "METAFEATURE":
del result.metafeature_values[key]

logger_.info(f"{task_timer.name} took {task_timer.wall_duration:5.2f}sec")
return result


Expand All @@ -182,12 +187,10 @@ def _get_metalearning_configurations(
configuration_space,
task,
initial_configurations_via_metalearning,
stopwatch: StopWatch,
is_sparse,
watcher,
logger,
):
task_name = "InitialConfigurations"
watcher.start_task(task_name)
try:
metalearning_configurations = suggest_via_metalearning(
meta_base,
Expand All @@ -203,25 +206,8 @@ def _get_metalearning_configurations(
logger.error(str(e))
logger.error(traceback.format_exc())
metalearning_configurations = []
watcher.stop_task(task_name)
return metalearning_configurations


def _print_debug_info_of_init_configuration(
initial_configurations, basename, time_for_task, logger, watcher
):
logger.debug("Initial Configurations: (%d)" % len(initial_configurations))
for initial_configuration in initial_configurations:
logger.debug(initial_configuration)
logger.debug(
"Looking for initial configurations took %5.2fsec",
watcher.wall_elapsed("InitialConfigurations"),
)
logger.info(
"Time left for %s after finding initial configurations: %5.2fsec",
basename,
time_for_task - watcher.wall_elapsed(basename),
)
return metalearning_configurations


def get_smac_object(
Expand Down Expand Up @@ -259,7 +245,7 @@ def get_smac_object(
)


class AutoMLSMBO(object):
class AutoMLSMBO:
def __init__(
self,
config_space,
Expand All @@ -269,7 +255,7 @@ def __init__(
func_eval_time_limit,
memory_limit,
metric,
watcher,
stopwatch: StopWatch,
n_jobs,
dask_client: dask.distributed.Client,
port: int,
Expand Down Expand Up @@ -319,7 +305,7 @@ def __init__(
self.func_eval_time_limit = int(func_eval_time_limit)
self.memory_limit = memory_limit
self.data_memory_limit = data_memory_limit
self.watcher = watcher
self.stopwatch = stopwatch
self.num_metalearning_cfgs = num_metalearning_cfgs
self.config_file = config_file
self.seed = seed
Expand Down Expand Up @@ -364,24 +350,29 @@ def reset_data_manager(self, max_mem=None):
self.task = self.datamanager.info["task"]

def collect_metalearning_suggestions(self, meta_base):
metalearning_configurations = _get_metalearning_configurations(
meta_base=meta_base,
basename=self.dataset_name,
metric=self.metric,
configuration_space=self.config_space,
task=self.task,
is_sparse=self.datamanager.info["is_sparse"],
initial_configurations_via_metalearning=self.num_metalearning_cfgs,
watcher=self.watcher,
logger=self.logger,
)
_print_debug_info_of_init_configuration(
metalearning_configurations,
self.dataset_name,
self.total_walltime_limit,
self.logger,
self.watcher,
)

with self.stopwatch.time("Initial Configurations") as task:
metalearning_configurations = _get_metalearning_configurations(
meta_base=meta_base,
basename=self.dataset_name,
metric=self.metric,
configuration_space=self.config_space,
task=self.task,
is_sparse=self.datamanager.info["is_sparse"],
initial_configurations_via_metalearning=self.num_metalearning_cfgs,
stopwatch=self.stopwatch,
logger=self.logger,
)

self.logger.debug(f"Initial Configurations: {len(metalearning_configurations)}")
for config in metalearning_configurations:
self.logger.debug(config)

self.logger.debug(f"{task.name} took {task.wall_duration:5.2f}sec")

time_since_start = self.stopwatch.time_since(self.dataset_name, "start")
time_left = self.total_walltime_limit - time_since_start
self.logger.info(f"Time left for {task.name}: {time_left:5.2f}s")

return metalearning_configurations

Expand All @@ -404,7 +395,7 @@ def _calculate_metafeatures_with_limits(self, time_limit):
x_train=self.datamanager.data["X_train"],
y_train=self.datamanager.data["Y_train"],
basename=self.dataset_name,
watcher=self.watcher,
stopwatch=self.stopwatch,
logger_=self.logger,
)
except Exception as e:
Expand All @@ -431,7 +422,7 @@ def _calculate_metafeatures_encoded_with_limits(self, time_limit):
x_train=self.datamanager.data["X_train"],
y_train=self.datamanager.data["Y_train"],
basename=self.dataset_name,
watcher=self.watcher,
stopwatch=self.stopwatch,
logger_=self.logger,
)
except Exception as e:
Expand All @@ -441,7 +432,7 @@ def _calculate_metafeatures_encoded_with_limits(self, time_limit):

def run_smbo(self):

self.watcher.start_task("SMBO")
self.stopwatch.start("SMBO")

# == first things first: load the datamanager
self.reset_data_manager()
Expand Down Expand Up @@ -488,8 +479,9 @@ def run_smbo(self):
)
ta = ExecuteTaFuncWithQueue

startup_time = self.watcher.wall_elapsed(self.dataset_name)
startup_time = self.stopwatch.time_since(self.dataset_name, "start")
total_walltime_limit = self.total_walltime_limit - startup_time - 5

scenario_dict = {
"abort_on_first_run_crash": False,
"save-results-instantly": True,
Expand Down Expand Up @@ -564,6 +556,8 @@ def run_smbo(self):
else:
raise NotImplementedError(type(smac.solver.tae_runner))

self.stopwatch.stop("SMBO")

return self.runhistory, self.trajectory, self._budget_type

def get_metalearning_suggestions(self):
Expand Down
Loading