warning in test: tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type

```
============================= test session starts ==============================
platform linux -- Python 3.12.3, pytest-8.3.2, pluggy-1.5.0 -- /home/adas/mljar/mljar-supervised/venv/bin/python3
cachedir: .pytest_cache
rootdir: /home/adas/mljar/mljar-supervised
configfile: pytest.ini
plugins: cov-5.0.0
collecting ... collected 1 item

tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type AutoML directory: automl_tests
The task is binary_classification with evaluation metric logloss
AutoML will use algorithms: ['CatBoost']
AutoML steps: ['simple_algorithms', 'default_algorithms']
Skip simple_algorithms because no parameters were generated.
* Step default_algorithms will try to check up to 1 model
There was an error during 1_Default_CatBoost training.
Please check automl_tests/errors.md for details.
FAILED

=================================== FAILURES ===================================
_________________ AutoMLDataTypesTest.test_category_data_type __________________

self = <tests.tests_automl.test_data_types.AutoMLDataTypesTest testMethod=test_category_data_type>

    def test_category_data_type(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows)
    
        X["f1"] = X["f1"].astype("category")
    
        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["CatBoost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
>       automl.fit(X, y)

tests/tests_automl/test_data_types.py:34: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
supervised/automl.py:432: in fit
    return self._fit(X, y, sample_weight, cv, sensitive_features)
supervised/base_automl.py:1237: in _fit
    raise e
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = AutoML(algorithms=['CatBoost'], explain_level=0, results_path='automl_tests',
       start_random_models=1, total_time_limit=1, train_ensemble=False)
X =            f0        f1        f2
0    0.862396  0.424942  0.392733
1    0.746183  0.040427  0.492273
2    0.934285  0....501938  0.523092  0.459691
248  0.854131  0.828239  0.431476
249  0.856862  0.055555  0.973876

[250 rows x 3 columns]
y = 0      1
1      1
2      0
3      0
4      1
      ..
245    1
246    0
247    0
248    1
249    0
Name: target, Length: 250, dtype: int64
sample_weight = None, cv = None, sensitive_features = None

    def _fit(self, X, y, sample_weight=None, cv=None, sensitive_features=None):
        """Fits the AutoML model with data"""
        if self._fit_level == "finished":
            print(
                "This model has already been fitted. You can use predict methods or select a new 'results_path' for a new a 'fit()'."
            )
            return
        # Validate input and build dataframes
        X, y, sample_weight, sensitive_features = self._build_dataframe(
            X, y, sample_weight, sensitive_features
        )
    
        self.n_rows_in_ = X.shape[0]
        self.n_features_in_ = X.shape[1]
        self.n_classes = len(np.unique(y[~pd.isnull(y)]))
    
        # Get attributes (__init__ params)
        self._mode = self._get_mode()
        self._ml_task = self._get_ml_task()
        self._results_path = self._get_results_path()
        self._total_time_limit = self._get_total_time_limit()
        self._model_time_limit = self._get_model_time_limit()
        self._algorithms = self._get_algorithms()
        self._train_ensemble = self._get_train_ensemble()
        self._stack_models = self._get_stack_models()
        self._eval_metric = self._get_eval_metric()
        self._validation_strategy = self._get_validation_strategy()
        self._verbose = self._get_verbose()
        self._explain_level = self._get_explain_level()
        self._golden_features = self._get_golden_features()
        self._features_selection = self._get_features_selection()
        self._start_random_models = self._get_start_random_models()
        self._hill_climbing_steps = self._get_hill_climbing_steps()
        self._top_models_to_improve = self._get_top_models_to_improve()
        self._boost_on_errors = self._get_boost_on_errors()
        self._kmeans_features = self._get_kmeans_features()
        self._mix_encoding = self._get_mix_encoding()
        self._max_single_prediction_time = self._get_max_single_prediction_time()
        self._optuna_time_budget = self._get_optuna_time_budget()
        self._optuna_init_params = self._get_optuna_init_params()
        self._optuna_verbose = self._get_optuna_verbose()
        self._n_jobs = self._get_n_jobs()
        self._random_state = self._get_random_state()
    
        if sensitive_features is not None:
            self._fairness_metric = self._get_fairness_metric()
            self._fairness_threshold = self._get_fairness_threshold()
            self._privileged_groups = self._get_privileged_groups()
            self._underprivileged_groups = self._get_underprivileged_groups()
    
        self._adjust_validation = False
        self._apply_constraints()
        if not self._adjust_validation:
            # if there is no validation adjustement
            # then we can apply stack_models constraints immediately
            # if there is validation adjustement
            # then we will apply contraints after the adjustement
            self._apply_constraints_stack_models()
    
        try:
            self.load_progress()
            if self._fit_level == "finished":
                print(
                    "This model has already been fitted. You can use predict methods or select a new 'results_path' for a new 'fit()'."
                )
                return
            self._check_can_load()
    
            self.verbose_print(f"AutoML directory: {self._results_path}")
            if self._mode == "Optuna":
                ttl = int(len(self._algorithms) * self._optuna_time_budget)
                self.verbose_print("Expected computing time:")
                self.verbose_print(
                    f"Time for tuning with Optuna: len(algorithms) * optuna_time_budget = {int(len(self._algorithms) * self._optuna_time_budget)} seconds"
                )
                self.verbose_print(
                    f"There is no time limit for ML model training after Optuna tuning (total_time_limit parameter is ignored)."
                )
    
            self.verbose_print(
                f"The task is {self._ml_task} with evaluation metric {self._eval_metric}"
            )
            self.verbose_print(f"AutoML will use algorithms: {self._algorithms}")
            if self._stack_models:
                self.verbose_print("AutoML will stack models")
            if self._train_ensemble:
                self.verbose_print("AutoML will ensemble available models")
    
            self._start_time = time.time()
            if self._time_ctrl is not None:
                self._start_time -= self._time_ctrl.already_spend()
    
            # Automatic Exloratory Data Analysis
            # I disabled EDA, because it won't be supported
            # I recomend use pandas_profiling or Sweetviz
            # if self._explain_level == 2:
            #     EDA.compute(X, y, os.path.join(self._results_path, "EDA"))
    
            # Save data
    
            self._save_data(
                X.copy(deep=False),
                y.copy(deep=False),
                None if sample_weight is None else sample_weight.copy(deep=False),
                cv,
                None
                if sensitive_features is None
                else sensitive_features.copy(deep=False),
            )
    
            tuner = MljarTuner(
                self._get_tuner_params(
                    self._start_random_models,
                    self._hill_climbing_steps,
                    self._top_models_to_improve,
                ),
                self._algorithms,
                self._ml_task,
                self._eval_metric,
                self._validation_strategy,
                self._explain_level,
                self._data_info,
                self._golden_features,
                self._features_selection,
                self._train_ensemble,
                self._stack_models,
                self._adjust_validation,
                self._boost_on_errors,
                self._kmeans_features,
                self._mix_encoding,
                self._optuna_time_budget,
                self._optuna_init_params,
                self._optuna_verbose,
                self._n_jobs,
                self._random_state,
                self._fairness_metric,
                self._fairness_threshold,
                self._privileged_groups,
                self._underprivileged_groups,
            )
            self.tuner = tuner
    
            steps = tuner.steps()
            self.verbose_print(
                f'AutoML steps: {[s for s in steps if "update_" not in s]}'
            )
            if self._time_ctrl is None:
                self._time_ctrl = TimeController(
                    self._start_time,
                    self._total_time_limit,
                    self._model_time_limit,
                    steps,
                    self._algorithms,
                )
    
            self._time_ctrl.log_time(
                "prepare_data",
                "prepare_data",
                "prepare_data",
                time.time() - self._start_time,
            )
    
            for step in steps:
                self._fit_level = step
                start = time.time()
                # self._time_start[step] = start
    
                if step in ["stack", "ensemble_stacked"] and not self._stack_models:
                    continue
    
                if step == "stack":
                    self.prepare_for_stacking()
                if "hill_climbing" in step or step in ["ensemble", "stack"]:
                    if len(self._models) == 0:
                        raise AutoMLException(
                            "No models produced. \nPlease check your data or"
                            " submit a Github issue at https://github.com/mljar/mljar-supervised/issues/new."
                        )
    
                generated_params = []
                if step in self._all_params:
                    generated_params = self._all_params[step]
                else:
                    generated_params = tuner.generate_params(
                        step,
                        self._models,
                        self._results_path,
                        self._stacked_models,
                        self._total_time_limit,
                    )
    
                if generated_params is None or not generated_params:
                    if "_update_" not in step:
                        self.verbose_print(
                            f"Skip {step} because no parameters were generated."
                        )
                    continue
                if generated_params:
                    if not self._time_ctrl.enough_time_for_step(self._fit_level):
                        self.verbose_print(f"Skip {step} because of the time limit.")
                        continue
                    else:
                        model_str = "models" if len(generated_params) > 1 else "model"
                        self.verbose_print(
                            f"* Step {step} will try to check up to {len(generated_params)} {model_str}"
                        )
    
                for params in generated_params:
                    if params.get("status", "") in ["trained", "skipped", "error"]:
                        self.verbose_print(f"{params['name']}: {params['status']}.")
                        continue
    
                    try:
                        trained = False
                        if "ensemble" in step:
                            trained = self.ensemble_step(
                                is_stacked=params["is_stacked"]
                            )
                        else:
                            trained = self.train_model(params)
                        params["status"] = "trained" if trained else "skipped"
                        params["final_loss"] = self._models[-1].get_final_loss()
                        params["train_time"] = self._models[-1].get_train_time()
    
                        if (
                            self._adjust_validation
                            and len(self._models) == 1
                            and step == "adjust_validation"
                        ):
                            self._set_adjusted_validation()
    
                    except NotTrainedException as e:
                        params["status"] = "error"
                        self.verbose_print(
                            params.get("name") + " not trained. " + str(e)
                        )
                    except Exception as e:
                        import traceback
    
                        self._update_errors_report(
                            params.get("name"), str(e) + "\n" + traceback.format_exc()
                        )
                        params["status"] = "error"
    
                    self.save_progress(step, generated_params)
    
            if not self._models:
>               raise AutoMLException("No models produced.")
E               supervised.exceptions.AutoMLException: No models produced.

supervised/base_automl.py:1206: AutoMLException
------------------------------ Captured log call -------------------------------
ERROR    supervised.exceptions:exceptions.py:15 No models produced.
=========================== short test summary info ============================
FAILED tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type
============================== 1 failed in 2.12s ===============================
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

warning in test: tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type #750

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

warning in test: tests/tests_automl/test_data_types.py::AutoMLDataTypesTest::test_category_data_type #750

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions