-
Notifications
You must be signed in to change notification settings - Fork 425
Closed
Description
============================= test session starts ==============================
platform linux -- Python 3.12.3, pytest-8.3.2, pluggy-1.5.0 -- /home/adas/mljar/mljar-supervised/venv/bin/python3
cachedir: .pytest_cache
rootdir: /home/adas/mljar/mljar-supervised
configfile: pytest.ini
plugins: cov-5.0.0
collecting ... collected 1 item
tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type AutoML directory: automl_tests
The task is binary_classification with evaluation metric logloss
AutoML will use algorithms: ['CatBoost']
AutoML steps: ['simple_algorithms', 'default_algorithms']
Skip simple_algorithms because no parameters were generated.
* Step default_algorithms will try to check up to 1 model
There was an error during 1_Default_CatBoost training.
Please check automl_tests/errors.md for details.
FAILED
=================================== FAILURES ===================================
_________________ AutoMLDataTypesTest.test_category_data_type __________________
self = <tests.tests_automl.test_data_types.AutoMLDataTypesTest testMethod=test_category_data_type>
def test_category_data_type(self):
X = np.random.rand(self.rows, 3)
X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
y = np.random.randint(0, 2, self.rows)
X["f1"] = X["f1"].astype("category")
automl = AutoML(
results_path=self.automl_dir,
total_time_limit=1,
algorithms=["CatBoost"],
train_ensemble=False,
explain_level=0,
start_random_models=1,
)
> automl.fit(X, y)
tests/tests_automl/test_data_types.py:34:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
supervised/automl.py:432: in fit
return self._fit(X, y, sample_weight, cv, sensitive_features)
supervised/base_automl.py:1237: in _fit
raise e
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = AutoML(algorithms=['CatBoost'], explain_level=0, results_path='automl_tests',
start_random_models=1, total_time_limit=1, train_ensemble=False)
X = f0 f1 f2
0 0.862396 0.424942 0.392733
1 0.746183 0.040427 0.492273
2 0.934285 0....501938 0.523092 0.459691
248 0.854131 0.828239 0.431476
249 0.856862 0.055555 0.973876
[250 rows x 3 columns]
y = 0 1
1 1
2 0
3 0
4 1
..
245 1
246 0
247 0
248 1
249 0
Name: target, Length: 250, dtype: int64
sample_weight = None, cv = None, sensitive_features = None
def _fit(self, X, y, sample_weight=None, cv=None, sensitive_features=None):
"""Fits the AutoML model with data"""
if self._fit_level == "finished":
print(
"This model has already been fitted. You can use predict methods or select a new 'results_path' for a new a 'fit()'."
)
return
# Validate input and build dataframes
X, y, sample_weight, sensitive_features = self._build_dataframe(
X, y, sample_weight, sensitive_features
)
self.n_rows_in_ = X.shape[0]
self.n_features_in_ = X.shape[1]
self.n_classes = len(np.unique(y[~pd.isnull(y)]))
# Get attributes (__init__ params)
self._mode = self._get_mode()
self._ml_task = self._get_ml_task()
self._results_path = self._get_results_path()
self._total_time_limit = self._get_total_time_limit()
self._model_time_limit = self._get_model_time_limit()
self._algorithms = self._get_algorithms()
self._train_ensemble = self._get_train_ensemble()
self._stack_models = self._get_stack_models()
self._eval_metric = self._get_eval_metric()
self._validation_strategy = self._get_validation_strategy()
self._verbose = self._get_verbose()
self._explain_level = self._get_explain_level()
self._golden_features = self._get_golden_features()
self._features_selection = self._get_features_selection()
self._start_random_models = self._get_start_random_models()
self._hill_climbing_steps = self._get_hill_climbing_steps()
self._top_models_to_improve = self._get_top_models_to_improve()
self._boost_on_errors = self._get_boost_on_errors()
self._kmeans_features = self._get_kmeans_features()
self._mix_encoding = self._get_mix_encoding()
self._max_single_prediction_time = self._get_max_single_prediction_time()
self._optuna_time_budget = self._get_optuna_time_budget()
self._optuna_init_params = self._get_optuna_init_params()
self._optuna_verbose = self._get_optuna_verbose()
self._n_jobs = self._get_n_jobs()
self._random_state = self._get_random_state()
if sensitive_features is not None:
self._fairness_metric = self._get_fairness_metric()
self._fairness_threshold = self._get_fairness_threshold()
self._privileged_groups = self._get_privileged_groups()
self._underprivileged_groups = self._get_underprivileged_groups()
self._adjust_validation = False
self._apply_constraints()
if not self._adjust_validation:
# if there is no validation adjustement
# then we can apply stack_models constraints immediately
# if there is validation adjustement
# then we will apply contraints after the adjustement
self._apply_constraints_stack_models()
try:
self.load_progress()
if self._fit_level == "finished":
print(
"This model has already been fitted. You can use predict methods or select a new 'results_path' for a new 'fit()'."
)
return
self._check_can_load()
self.verbose_print(f"AutoML directory: {self._results_path}")
if self._mode == "Optuna":
ttl = int(len(self._algorithms) * self._optuna_time_budget)
self.verbose_print("Expected computing time:")
self.verbose_print(
f"Time for tuning with Optuna: len(algorithms) * optuna_time_budget = {int(len(self._algorithms) * self._optuna_time_budget)} seconds"
)
self.verbose_print(
f"There is no time limit for ML model training after Optuna tuning (total_time_limit parameter is ignored)."
)
self.verbose_print(
f"The task is {self._ml_task} with evaluation metric {self._eval_metric}"
)
self.verbose_print(f"AutoML will use algorithms: {self._algorithms}")
if self._stack_models:
self.verbose_print("AutoML will stack models")
if self._train_ensemble:
self.verbose_print("AutoML will ensemble available models")
self._start_time = time.time()
if self._time_ctrl is not None:
self._start_time -= self._time_ctrl.already_spend()
# Automatic Exloratory Data Analysis
# I disabled EDA, because it won't be supported
# I recomend use pandas_profiling or Sweetviz
# if self._explain_level == 2:
# EDA.compute(X, y, os.path.join(self._results_path, "EDA"))
# Save data
self._save_data(
X.copy(deep=False),
y.copy(deep=False),
None if sample_weight is None else sample_weight.copy(deep=False),
cv,
None
if sensitive_features is None
else sensitive_features.copy(deep=False),
)
tuner = MljarTuner(
self._get_tuner_params(
self._start_random_models,
self._hill_climbing_steps,
self._top_models_to_improve,
),
self._algorithms,
self._ml_task,
self._eval_metric,
self._validation_strategy,
self._explain_level,
self._data_info,
self._golden_features,
self._features_selection,
self._train_ensemble,
self._stack_models,
self._adjust_validation,
self._boost_on_errors,
self._kmeans_features,
self._mix_encoding,
self._optuna_time_budget,
self._optuna_init_params,
self._optuna_verbose,
self._n_jobs,
self._random_state,
self._fairness_metric,
self._fairness_threshold,
self._privileged_groups,
self._underprivileged_groups,
)
self.tuner = tuner
steps = tuner.steps()
self.verbose_print(
f'AutoML steps: {[s for s in steps if "update_" not in s]}'
)
if self._time_ctrl is None:
self._time_ctrl = TimeController(
self._start_time,
self._total_time_limit,
self._model_time_limit,
steps,
self._algorithms,
)
self._time_ctrl.log_time(
"prepare_data",
"prepare_data",
"prepare_data",
time.time() - self._start_time,
)
for step in steps:
self._fit_level = step
start = time.time()
# self._time_start[step] = start
if step in ["stack", "ensemble_stacked"] and not self._stack_models:
continue
if step == "stack":
self.prepare_for_stacking()
if "hill_climbing" in step or step in ["ensemble", "stack"]:
if len(self._models) == 0:
raise AutoMLException(
"No models produced. \nPlease check your data or"
" submit a Github issue at https://github.com/mljar/mljar-supervised/issues/new."
)
generated_params = []
if step in self._all_params:
generated_params = self._all_params[step]
else:
generated_params = tuner.generate_params(
step,
self._models,
self._results_path,
self._stacked_models,
self._total_time_limit,
)
if generated_params is None or not generated_params:
if "_update_" not in step:
self.verbose_print(
f"Skip {step} because no parameters were generated."
)
continue
if generated_params:
if not self._time_ctrl.enough_time_for_step(self._fit_level):
self.verbose_print(f"Skip {step} because of the time limit.")
continue
else:
model_str = "models" if len(generated_params) > 1 else "model"
self.verbose_print(
f"* Step {step} will try to check up to {len(generated_params)} {model_str}"
)
for params in generated_params:
if params.get("status", "") in ["trained", "skipped", "error"]:
self.verbose_print(f"{params['name']}: {params['status']}.")
continue
try:
trained = False
if "ensemble" in step:
trained = self.ensemble_step(
is_stacked=params["is_stacked"]
)
else:
trained = self.train_model(params)
params["status"] = "trained" if trained else "skipped"
params["final_loss"] = self._models[-1].get_final_loss()
params["train_time"] = self._models[-1].get_train_time()
if (
self._adjust_validation
and len(self._models) == 1
and step == "adjust_validation"
):
self._set_adjusted_validation()
except NotTrainedException as e:
params["status"] = "error"
self.verbose_print(
params.get("name") + " not trained. " + str(e)
)
except Exception as e:
import traceback
self._update_errors_report(
params.get("name"), str(e) + "\n" + traceback.format_exc()
)
params["status"] = "error"
self.save_progress(step, generated_params)
if not self._models:
> raise AutoMLException("No models produced.")
E supervised.exceptions.AutoMLException: No models produced.
supervised/base_automl.py:1206: AutoMLException
------------------------------ Captured log call -------------------------------
ERROR supervised.exceptions:exceptions.py:15 No models produced.
=========================== short test summary info ============================
FAILED tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type
============================== 1 failed in 2.12s ===============================
Metadata
Metadata
Assignees
Labels
No labels