From aeb8fe0283d4d3b0f27a87c5f5c938e7324f7d8f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 31 May 2024 21:45:00 +0200 Subject: bpf: Fix bpf_session_cookie BTF_ID in special_kfunc_set list The bpf_session_cookie is unavailable for !CONFIG_FPROBE as reported by Sebastian [1]. To fix that we remove CONFIG_FPROBE ifdef for session kfuncs, which is fine, because there's filter for session programs. Then based on bpf_trace.o dependency: obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o we add bpf_session_cookie BTF_ID in special_kfunc_set list dependency on CONFIG_BPF_EVENTS. [1] https://lore.kernel.org/bpf/20240531071557.MvfIqkn7@linutronix.de/T/#m71c6d5ec71db2967288cb79acedc15cc5dbfeec5 Reported-by: Sebastian Andrzej Siewior Tested-by: Sebastian Andrzej Siewior Suggested-by: Alexei Starovoitov Fixes: 5c919acef8514 ("bpf: Add support for kprobe session cookie") Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20240531194500.2967187-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ kernel/trace/bpf_trace.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48f3a9acdef3..36ef8e96787e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11128,7 +11128,11 @@ BTF_ID(func, bpf_iter_css_task_new) #else BTF_ID_UNUSED #endif +#ifdef CONFIG_BPF_EVENTS BTF_ID(func, bpf_session_cookie) +#else +BTF_ID_UNUSED +#endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6249dac61701..d1daeab1bbc1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3517,7 +3517,6 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) } #endif /* CONFIG_UPROBES */ -#ifdef CONFIG_FPROBE __bpf_kfunc_start_defs(); __bpf_kfunc bool bpf_session_is_return(void) @@ -3566,4 +3565,3 @@ static int __init bpf_kprobe_multi_kfuncs_init(void) } late_initcall(bpf_kprobe_multi_kfuncs_init); -#endif -- cgit From 2317dc2c22cc353b699c7d1db47b2fe91f54055c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 29 May 2024 12:19:01 +0200 Subject: bpf, devmap: Remove unnecessary if check in for loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iterator variable dst cannot be NULL and the if check can be removed. Remove it and fix the following Coccinelle/coccicheck warning reported by itnull.cocci: ERROR: iterator variable bound on line 762 cannot be NULL Signed-off-by: Thorsten Blum Signed-off-by: Daniel Borkmann Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240529101900.103913-2-thorsten.blum@toblux.com --- kernel/bpf/devmap.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 4e2cdbb5629f..7f3b34452243 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -760,9 +760,6 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, for (i = 0; i < dtab->n_buckets; i++) { head = dev_map_index_hash(dtab, i); hlist_for_each_entry_safe(dst, next, head, index_hlist) { - if (!dst) - continue; - if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; -- cgit From 2884dc7d08d98a89d8d65121524bb7533183a63a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Jun 2024 11:27:03 -0700 Subject: bpf: Fix a potential use-after-free in bpf_link_free() After commit 1a80dbcb2dba, bpf_link can be freed by link->ops->dealloc_deferred, but the code still tests and uses link->ops->dealloc afterward, which leads to a use-after-free as reported by syzbot. Actually, one of them should be sufficient, so just call one of them instead of both. Also add a WARN_ON() in case of any problematic implementation. Fixes: 1a80dbcb2dba ("bpf: support deferring bpf_link dealloc to after RCU grace period") Reported-by: syzbot+1989ee16d94720836244@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240602182703.207276-1-xiyou.wangcong@gmail.com --- kernel/bpf/syscall.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2222c3ff88e7..f45ed6adc092 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2998,6 +2998,7 @@ static int bpf_obj_get(const union bpf_attr *attr) void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) { + WARN_ON(ops->dealloc && ops->dealloc_deferred); atomic64_set(&link->refcnt, 1); link->type = type; link->id = 0; @@ -3056,16 +3057,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + const struct bpf_link_ops *ops = link->ops; bool sleepable = false; bpf_link_free_id(link->id); if (link->prog) { sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ - link->ops->release(link); + ops->release(link); bpf_prog_put(link->prog); } - if (link->ops->dealloc_deferred) { + if (ops->dealloc_deferred) { /* schedule BPF link deallocation; if underlying BPF program * is sleepable, we need to first wait for RCU tasks trace * sync, then go through "classic" RCU grace period @@ -3074,9 +3076,8 @@ static void bpf_link_free(struct bpf_link *link) call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); else call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); - } - if (link->ops->dealloc) - link->ops->dealloc(link); + } else if (ops->dealloc) + ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) -- cgit From 74751ef5c1912ebd3e65c3b65f45587e05ce5d36 Mon Sep 17 00:00:00 2001 From: Haifeng Xu Date: Mon, 13 May 2024 10:39:48 +0000 Subject: perf/core: Fix missing wakeup when waiting for context reference In our production environment, we found many hung tasks which are blocked for more than 18 hours. Their call traces are like this: [346278.191038] __schedule+0x2d8/0x890 [346278.191046] schedule+0x4e/0xb0 [346278.191049] perf_event_free_task+0x220/0x270 [346278.191056] ? init_wait_var_entry+0x50/0x50 [346278.191060] copy_process+0x663/0x18d0 [346278.191068] kernel_clone+0x9d/0x3d0 [346278.191072] __do_sys_clone+0x5d/0x80 [346278.191076] __x64_sys_clone+0x25/0x30 [346278.191079] do_syscall_64+0x5c/0xc0 [346278.191083] ? syscall_exit_to_user_mode+0x27/0x50 [346278.191086] ? do_syscall_64+0x69/0xc0 [346278.191088] ? irqentry_exit_to_user_mode+0x9/0x20 [346278.191092] ? irqentry_exit+0x19/0x30 [346278.191095] ? exc_page_fault+0x89/0x160 [346278.191097] ? asm_exc_page_fault+0x8/0x30 [346278.191102] entry_SYSCALL_64_after_hwframe+0x44/0xae The task was waiting for the refcount become to 1, but from the vmcore, we found the refcount has already been 1. It seems that the task didn't get woken up by perf_event_release_kernel() and got stuck forever. The below scenario may cause the problem. Thread A Thread B ... ... perf_event_free_task perf_event_release_kernel ... acquire event->child_mutex ... get_ctx ... release event->child_mutex acquire ctx->mutex ... perf_free_event (acquire/release event->child_mutex) ... release ctx->mutex wait_var_event acquire ctx->mutex acquire event->child_mutex # move existing events to free_list release event->child_mutex release ctx->mutex put_ctx ... ... In this case, all events of the ctx have been freed, so we couldn't find the ctx in free_list and Thread A will miss the wakeup. It's thus necessary to add a wakeup after dropping the reference. Fixes: 1cf8dfe8a661 ("perf/core: Fix race between close() and fork()") Signed-off-by: Haifeng Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker Acked-by: Mark Rutland Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20240513103948.33570-1-haifeng.xu@shopee.com --- kernel/events/core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index f0128c5ff278..8f908f077935 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5384,6 +5384,7 @@ int perf_event_release_kernel(struct perf_event *event) again: mutex_lock(&event->child_mutex); list_for_each_entry(child, &event->child_list, child_list) { + void *var = NULL; /* * Cannot change, child events are not migrated, see the @@ -5424,11 +5425,23 @@ again: * this can't be the last reference. */ put_event(event); + } else { + var = &ctx->refcount; } mutex_unlock(&event->child_mutex); mutex_unlock(&ctx->mutex); put_ctx(ctx); + + if (var) { + /* + * If perf_event_free_task() has deleted all events from the + * ctx while the child_mutex got released above, make sure to + * notify about the preceding put_ctx(). + */ + smp_mb(); /* pairs with wait_var_event() */ + wake_up_var(var); + } goto again; } mutex_unlock(&event->child_mutex); -- cgit