Skip to content

Commit 7e8242e

Browse files
Tao ChenKernel Patches Daemon
authored andcommitted
bpf: Hold the perf callchain entry until used completely
As Alexei noted, get_perf_callchain() return values may be reused if a task is preempted after the BPF program enters migrate disable mode. The perf_callchain_entres has a small stack of entries, and we can reuse it as follows: 1. get the perf callchain entry 2. BPF use... 3. put the perf callchain entry And Peter suggested that get_recursion_context used with preemption disabled, so we should disable preemption at BPF side. Signed-off-by: Tao Chen <[email protected]>
1 parent d1f0532 commit 7e8242e

File tree

1 file changed

+55
-12
lines changed

1 file changed

+55
-12
lines changed

kernel/bpf/stackmap.c

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,13 +210,14 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
210210
}
211211

212212
static struct perf_callchain_entry *
213-
get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
213+
get_callchain_entry_for_task(int *rctx, struct task_struct *task, u32 max_depth)
214214
{
215215
#ifdef CONFIG_STACKTRACE
216216
struct perf_callchain_entry *entry;
217-
int rctx;
218217

219-
entry = get_callchain_entry(&rctx);
218+
preempt_disable();
219+
entry = get_callchain_entry(rctx);
220+
preempt_enable();
220221

221222
if (!entry)
222223
return NULL;
@@ -238,8 +239,6 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
238239
to[i] = (u64)(from[i]);
239240
}
240241

241-
put_callchain_entry(rctx);
242-
243242
return entry;
244243
#else /* CONFIG_STACKTRACE */
245244
return NULL;
@@ -320,6 +319,34 @@ static long __bpf_get_stackid(struct bpf_map *map,
320319
return id;
321320
}
322321

322+
static struct perf_callchain_entry *
323+
bpf_get_perf_callchain(int *rctx, struct pt_regs *regs, bool kernel, bool user,
324+
int max_stack, bool crosstask)
325+
{
326+
struct perf_callchain_entry_ctx ctx;
327+
struct perf_callchain_entry *entry;
328+
329+
preempt_disable();
330+
entry = get_callchain_entry(rctx);
331+
preempt_enable();
332+
333+
if (unlikely(!entry))
334+
return NULL;
335+
336+
__init_perf_callchain_ctx(&ctx, entry, max_stack, false);
337+
if (kernel)
338+
__get_perf_callchain_kernel(&ctx, regs);
339+
if (user && !crosstask)
340+
__get_perf_callchain_user(&ctx, regs);
341+
342+
return entry;
343+
}
344+
345+
static void bpf_put_perf_callchain(int rctx)
346+
{
347+
put_callchain_entry(rctx);
348+
}
349+
323350
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
324351
u64, flags)
325352
{
@@ -328,20 +355,24 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
328355
struct perf_callchain_entry *trace;
329356
bool kernel = !user;
330357
u32 max_depth;
358+
int rctx, ret;
331359

332360
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
333361
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
334362
return -EINVAL;
335363

336364
max_depth = stack_map_calculate_max_depth(map->value_size, elem_size, flags);
337-
trace = get_perf_callchain(regs, kernel, user, max_depth,
338-
false, false);
365+
trace = bpf_get_perf_callchain(&rctx, regs, kernel, user, max_depth,
366+
false);
339367

340368
if (unlikely(!trace))
341369
/* couldn't fetch the stack trace */
342370
return -EFAULT;
343371

344-
return __bpf_get_stackid(map, trace, flags);
372+
ret = __bpf_get_stackid(map, trace, flags);
373+
bpf_put_perf_callchain(rctx);
374+
375+
return ret;
345376
}
346377

347378
const struct bpf_func_proto bpf_get_stackid_proto = {
@@ -435,6 +466,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
435466
bool kernel = !user;
436467
int err = -EINVAL;
437468
u64 *ips;
469+
int rctx;
438470

439471
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
440472
BPF_F_USER_BUILD_ID)))
@@ -467,18 +499,26 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
467499
trace = trace_in;
468500
trace->nr = min_t(u32, trace->nr, max_depth);
469501
} else if (kernel && task) {
470-
trace = get_callchain_entry_for_task(task, max_depth);
502+
trace = get_callchain_entry_for_task(&rctx, task, max_depth);
471503
} else {
472-
trace = get_perf_callchain(regs, kernel, user, max_depth,
473-
crosstask, false);
504+
trace = bpf_get_perf_callchain(&rctx, regs, kernel, user, max_depth,
505+
crosstask);
474506
}
475507

476-
if (unlikely(!trace) || trace->nr < skip) {
508+
if (unlikely(!trace)) {
477509
if (may_fault)
478510
rcu_read_unlock();
479511
goto err_fault;
480512
}
481513

514+
if (trace->nr < skip) {
515+
if (may_fault)
516+
rcu_read_unlock();
517+
if (!trace_in)
518+
bpf_put_perf_callchain(rctx);
519+
goto err_fault;
520+
}
521+
482522
trace_nr = trace->nr - skip;
483523
copy_len = trace_nr * elem_size;
484524

@@ -497,6 +537,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
497537
if (may_fault)
498538
rcu_read_unlock();
499539

540+
if (!trace_in)
541+
bpf_put_perf_callchain(rctx);
542+
500543
if (user_build_id)
501544
stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
502545

0 commit comments

Comments
 (0)