Skip to content

Commit e67cf3f

Browse files
mllgvrodriguezf
authored andcommitted
Bugfix: Aggregation of probabilities (mlr-org#2579)
1 parent cac9f52 commit e67cf3f

11 files changed

+22
-21
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## learners - general
44

55
- `classif.liquidSVM` and `regr.liquidSVM` have been removed because `liquidSVM` has been removed from CRAN.
6+
- fixed a bug that caused an incorrect aggregation of probabilities in some cases. The bug existed since quite some time and was exposed due to the change of `data.table`s default in `rbindlist()`. See #2578 for more information. (@mllg, #2579)
67

78
# mlr 2.14.0
89

R/BenchmarkResult_operators.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,14 @@ getBMRObjects = function(bmr, task.ids = NULL, learner.ids = NULL, fun, as.df =
118118
return(p)
119119
})
120120
if (as.df) {
121-
xs = setDF(rbindlist(xs, fill = TRUE))
121+
xs = setDF(rbindlist(xs, fill = TRUE, use.names = TRUE))
122122
} else {
123123
xs = setNames(xs, learner.ids)
124124
}
125125
return(xs)
126126
})
127127
if (as.df) {
128-
res = setDF(rbindlist(res, fill = TRUE))
128+
res = setDF(rbindlist(res, fill = TRUE, use.names = TRUE))
129129
} else {
130130
res = setNames(res, task.ids)
131131
if (drop) {
@@ -242,7 +242,7 @@ getBMROptResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FA
242242

243243
if (inherits(x$learner, wrapper.class)) {
244244
xs = lapply(x$extract, fun)
245-
xs = setDF(rbindlist(lapply(seq_along(xs), function(i) cbind(iter = i, xs[[i]])), fill = TRUE))
245+
xs = setDF(rbindlist(lapply(seq_along(xs), function(i) cbind(iter = i, xs[[i]])), fill = TRUE, use.names = TRUE))
246246
} else {
247247
NULL
248248
}

R/ResamplePrediction.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ makeResamplePrediction = function(instance, preds.test, preds.train, task.desc)
2222
if (any(tenull)) pr.te = preds.test[!tenull] else pr.te = preds.test
2323
if (any(trnull)) pr.tr = preds.train[!trnull] else pr.tr = preds.train
2424

25-
data = setDF(rbind(
26-
rbindlist(lapply(seq_along(pr.te), function(X) cbind(pr.te[[X]]$data, iter = X, set = "test"))),
27-
rbindlist(lapply(seq_along(pr.tr), function(X) cbind(pr.tr[[X]]$data, iter = X, set = "train")))
28-
))
25+
data = setDF(rbindlist(c(
26+
lapply(seq_along(pr.te), function(X) cbind(pr.te[[X]]$data, iter = X, set = "test")),
27+
lapply(seq_along(pr.tr), function(X) cbind(pr.tr[[X]]$data, iter = X, set = "train"))
28+
), use.names = TRUE))
2929

3030
if (!any(tenull) && instance$desc$predict %in% c("test", "both")) {
3131
p1 = preds.test[[1L]]

R/generateCalibration.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL,
115115
}
116116
list(data = df, proportion = df[, fun(.SD), by = "bin"])
117117
})
118-
data = rbindlist(lapply(out, function(x) x$data), idcol = "Learner")
119-
proportion = rbindlist(lapply(out, function(x) x$proportion), idcol = "Learner")
118+
data = rbindlist(lapply(out, function(x) x$data), idcol = "Learner", use.names = TRUE)
119+
proportion = rbindlist(lapply(out, function(x) x$proportion), idcol = "Learner", use.names = TRUE)
120120
if (length(td$class.levels) == 2L) {
121121
proportion = proportion[, !td$negative, with = FALSE]
122122
data = data[data$Class != td$negative, ]
@@ -166,7 +166,7 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL,
166166
#' names(pred) = c("rpart", "nnet")
167167
#' out = generateCalibrationData(pred, groups = 3)
168168
#' plotCalibration(out)
169-
#'
169+
#'
170170
#' fit = lapply(lrns, train, task = sonar.task)
171171
#' pred = lapply(fit, predict, task = sonar.task)
172172
#' names(pred) = c("rpart", "lda")

R/generatePartialDependence.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@
9797
#' fit = train(lrn, bh.task)
9898
#' pd = generatePartialDependenceData(fit, bh.task, "lstat")
9999
#' plotPartialDependence(pd, data = getTaskData(bh.task))
100-
#'
100+
#'
101101
#' lrn = makeLearner("classif.rpart", predict.type = "prob")
102102
#' fit = train(lrn, iris.task)
103103
#' pd = generatePartialDependenceData(fit, iris.task, "Petal.Width")
@@ -222,7 +222,7 @@ generatePartialDependenceData = function(obj, input, features = NULL,
222222
out = parallelMap(doDerivativeMarginalPrediction, x = features, more.args = args)
223223
}
224224
}
225-
out = rbindlist(out, fill = TRUE)
225+
out = rbindlist(out, fill = TRUE, use.names = TRUE)
226226

227227
if (length(target) == 1L) {
228228
if (!multi.fun) {

R/generateThreshVsPerf.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ generateThreshVsPerfData.list = function(obj, measures, gridsize = 100L, aggrega
9797
out = out[[1L]]
9898
colnames(out)[!colnames(out) %in% c("iter", "threshold", "learner")] = mids
9999
} else {
100-
out = setDF(rbindlist(lapply(out, as.data.table), fill = TRUE, idcol = "learner"))
100+
out = setDF(rbindlist(lapply(out, as.data.table), fill = TRUE, idcol = "learner", use.names = TRUE))
101101
colnames(out)[!colnames(out) %in% c("iter", "threshold", "learner")] = mids
102102
}
103103

@@ -242,11 +242,11 @@ plotThreshVsPerf = function(obj, measures = obj$measures,
242242
#' pred = predict(fit, task = sonar.task)
243243
#' roc = generateThreshVsPerfData(pred, list(fpr, tpr))
244244
#' plotROCCurves(roc)
245-
#'
245+
#'
246246
#' r = bootstrapB632plus(lrn, sonar.task, iters = 3)
247247
#' roc_r = generateThreshVsPerfData(r, list(fpr, tpr), aggregate = FALSE)
248248
#' plotROCCurves(roc_r)
249-
#'
249+
#'
250250
#' r2 = crossval(lrn, sonar.task, iters = 3)
251251
#' roc_l = generateThreshVsPerfData(list(boot = r, cv = r2), list(fpr, tpr), aggregate = FALSE)
252252
#' plotROCCurves(roc_l)

R/getNestedTuneResults.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ getNestedTuneResultsOptPathDf = function(r, trafo = FALSE) {
5252
if (trafo) ops = lapply(ops, trafoOptPath)
5353
op.dfs = lapply(ops, as.data.frame)
5454
op.dfs = setDF(rbindlist(lapply(seq_along(op.dfs), function(i) {
55-
5655
op.dfs[[i]][, "iter"] = i
5756
op.dfs[[i]]
58-
}), fill = TRUE))
57+
}), fill = TRUE, use.names = TRUE))
5958
}

R/listLearners.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ getLearnerTable = function() {
1717
properties = list(row$properties),
1818
note = row$note %??% ""
1919
)
20-
}))
20+
}), use.names = TRUE)
2121

2222
# set learner type (classif, regr, surv, ...)
2323
tab$type = vcapply(stri_split_fixed(tab$id, ".", n = 2L), head, 1L)
@@ -140,7 +140,7 @@ listLearners.character = function(obj = NA_character_, properties = character(0L
140140

141141
tab$package = vcapply(tab$package, collapse)
142142
properties = listLearnerProperties()
143-
tab = cbind(tab, rbindlist(lapply(tab$properties, function(x) setNames(as.list(properties %in% x), properties))))
143+
tab = cbind(tab, rbindlist(lapply(tab$properties, function(x) setNames(as.list(properties %in% x), properties)), use.names = TRUE))
144144
tab$properties = NULL
145145
setnames(tab, "id", "class")
146146
setDF(tab)

R/mergeBenchmarkResults.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ mergeBenchmarkResults = function(bmrs) {
4242
existing.combos = rbindlist(lapply(bmrs, function(bmr) {
4343

4444
getBMRAggrPerformances(bmr, as.df = TRUE)[, c("task.id", "learner.id")]
45-
}))
45+
}), use.names = TRUE)
4646
existing.combos = stri_paste(existing.combos$task.id, existing.combos$learner.id, sep = " - ")
4747
if (!identical(sort(existing.combos), sort(all.combos))) {
4848
dupls = existing.combos[duplicated(existing.combos)]

R/relativeOverfitting.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ estimateRelativeOverfitting.ResamplePrediction = function(predish, measures, tas
6161
pred.train = makePrediction(task$task.desc, row.names(data), data$id, data$truth, predish$predict.type, predish$predict.threshold, data$response, predish$time[i])
6262

6363
estimateRelativeOverfitting(pred.test, measures, task, pred.train = pred.train, iter = i)
64-
}))
64+
}), use.names = TRUE)
6565
}
6666

6767
#' @export

0 commit comments

Comments
 (0)