Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ Suggests:
testthat,
tgp,
TH.data,
tidyr,
tsfeatures,
vdiffr,
wavelets,
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ PR: #2638 (@pfistl)
## learners - general

- xgboost: added options 'auto', 'approx' and 'gpu_hist' to param `tree_method` (@albersonmiranda, #2701)
- `getFeatureImportance()` now returns a long data.frame with columns `variable` and `importance`.
Beforehand, a wide data.frame was returned with each variable representing a column (@pat-s, #1755).

## filters - general

Expand Down
3 changes: 2 additions & 1 deletion R/getFeatureImportance.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ getFeatureImportance = function(object, ...) {
# convert named vector to data.frame with columns and set NA to 0
imp[is.na(imp)] = 0L
imp = as.data.frame(t(imp))
rownames(imp) = NULL
imp = tidyr::pivot_longer(imp, tidyr::everything(),
names_to = "variable", values_to = "importance")

makeS3Obj("FeatureImportance",
res = imp,
Expand Down
8 changes: 5 additions & 3 deletions tests/testthat/helper_learners_all.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,11 @@ testThatLearnerCanCalculateImportance = function(lrn, task, hyperpars) {

mod = train(lrn, task)
feat.imp = getFeatureImportance(mod)$res
expect_data_frame(feat.imp, types = rep("numeric", getTaskNFeats(task)),
any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(task))
expect_equal(colnames(feat.imp), mod$features)
expect_data_frame(feat.imp,
types = c("character", "numeric"),
any.missing = FALSE, nrows = getTaskNFeats(task),
ncols = 2)
expect_equal(colnames(feat.imp), c("variable", "importance"))

}

Expand Down
28 changes: 14 additions & 14 deletions tests/testthat/test_base_getFeatureImportance.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,36 @@ test_that("getFeatureImportance", {
lrn = makeLearner("classif.randomForest")
mod = train(lrn, binaryclass.task)
feat.imp = getFeatureImportance(mod, type = 2)$res
expect_data_frame(feat.imp, types = rep("numeric", getTaskNFeats(binaryclass.task)),
any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(binaryclass.task))
expect_equal(colnames(feat.imp), mod$features)
expect_data_frame(feat.imp, types = c("character", "numeric"),
any.missing = FALSE, nrows = 60, ncols = 2)
expect_equal(colnames(feat.imp), c("variable", "importance"))

# type 1 shouldn't
expect_error(getFeatureImportance(mod, type = 1), regexp = ".*importance.*TRUE")

lrn = setHyperPars(lrn, importance = TRUE)
mod = train(lrn, binaryclass.task)
feat.imp = getFeatureImportance(mod, type = 1)$res
expect_data_frame(feat.imp, types = rep("numeric", getTaskNFeats(binaryclass.task)),
any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(binaryclass.task))
expect_equal(colnames(feat.imp), mod$features)
expect_data_frame(feat.imp, types = c("character", "numeric"),
any.missing = FALSE, nrows = 60, ncols = 2)
expect_equal(colnames(feat.imp), c("variable", "importance"))

# regression learner
lrn = makeLearner("regr.gbm")
mod = train(lrn, regr.task)
feat.imp = getFeatureImportance(mod)$res
expect_data_frame(feat.imp, types = rep("numeric", getTaskNFeats(regr.task)),
any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(regr.task))
expect_equal(colnames(feat.imp), mod$features)
expect_data_frame(feat.imp, types = c("character", "numeric"),
any.missing = FALSE, nrows = 13, ncols = 2)
expect_equal(colnames(feat.imp), c("variable", "importance"))

# wrapped learner
lrn = makeFilterWrapper(makeLearner("regr.gbm"), fw.method = "FSelectorRcpp_information.gain", fw.abs = 2,
equal = TRUE)
lrn = makeFilterWrapper(makeLearner("regr.gbm"),
fw.method = "FSelectorRcpp_information.gain", fw.abs = 2, equal = TRUE)
mod = train(lrn, regr.task)
feat.imp = getFeatureImportance(mod)$res
expect_data_frame(feat.imp, types = rep("numeric", getTaskNFeats(regr.task)),
any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(regr.task))
expect_equal(colnames(feat.imp), mod$features)
expect_data_frame(feat.imp, types = c("character", "numeric"),
any.missing = FALSE, nrows = 13, ncols = 2)
expect_equal(colnames(feat.imp), c("variable", "importance"))

# For learners without the possibility to calculate feature importance a
# meaningful error should be returned
Expand Down
7 changes: 4 additions & 3 deletions tests/testthat/test_featsel_praznik.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ test_that("FilterWrapper with praznik mutual information, resample", {
mod = train(lrn, binaryclass.task)
feat.imp = getFeatureImportance(mod)$res
expect_data_frame(feat.imp,
types = rep("numeric", getTaskNFeats(binaryclass.task)),
any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(binaryclass.task))
expect_equal(colnames(feat.imp), mod$features)
types = c("character", "numeric"),
any.missing = FALSE, nrows = getTaskNFeats(binaryclass.task),
ncols = 2)
expect_equal(colnames(feat.imp), c("variable", "importance"))
})