From c69993ecdd4dfde2b7da08b022052a33b203da07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Oct 2025 15:17:05 +0200 Subject: perf: Support deferred user unwind Add support for deferred userspace unwind to perf. Where perf currently relies on in-place stack unwinding; from NMI context and all that. This moves the userspace part of the unwind to right before the return-to-userspace. This has two distinct benefits, the biggest is that it moves the unwind to a faultable context. It becomes possible to fault in debug info (.eh_frame, SFrame etc.) that might not otherwise be readily available. And secondly, it de-duplicates the user callchain where multiple samples happen during the same kernel entry. To facilitate this the perf interface is extended with a new record type: PERF_RECORD_CALLCHAIN_DEFERRED and two new attribute flags: perf_event_attr::defer_callchain - to request the user unwind be deferred perf_event_attr::defer_output - to request PERF_RECORD_CALLCHAIN_DEFERRED records The existing PERF_RECORD_SAMPLE callchain section gets a new context type: PERF_CONTEXT_USER_DEFERRED After which will come a single entry, denoting the 'cookie' of the deferred callchain that should be attached here, matching the 'cookie' field of the above mentioned PERF_RECORD_CALLCHAIN_DEFERRED. The 'defer_callchain' flag is expected on all events with PERF_SAMPLE_CALLCHAIN. The 'defer_output' flag is expect on the event responsible for collecting side-band events (like mmap, comm etc.). Setting 'defer_output' on multiple events will get you duplicated PERF_RECORD_CALLCHAIN_DEFERRED records. Based on earlier patches by Josh and Steven. Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251023150002.GR4067720@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 2 +- include/linux/unwind_deferred.h | 12 ------------ include/linux/unwind_deferred_types.h | 13 +++++++++++++ 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fd1d91017b99..9870d768db4c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1720,7 +1720,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark); + u32 max_stack, bool crosstask, bool add_mark, u64 defer_cookie); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern struct perf_callchain_entry *get_callchain_entry(int *rctx); diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h index f4743c8cff4c..bc7ae7d21900 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -6,18 +6,6 @@ #include #include -struct unwind_work; - -typedef void (*unwind_callback_t)(struct unwind_work *work, - struct unwind_stacktrace *trace, - u64 cookie); - -struct unwind_work { - struct list_head list; - unwind_callback_t func; - int bit; -}; - #ifdef CONFIG_UNWIND_USER enum { diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index 0a4c8ddbbc57..18fa3932f61c 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -39,4 +39,17 @@ struct unwind_task_info { union unwind_task_id id; }; +struct unwind_work; +struct unwind_stacktrace; + +typedef void (*unwind_callback_t)(struct unwind_work *work, + struct unwind_stacktrace *trace, + u64 cookie); + +struct unwind_work { + struct list_head list; + unwind_callback_t func; + int bit; +}; + #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ -- cgit