summaryrefslogtreecommitdiff
path: root/include/linux/rseq.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/rseq.h')
-rw-r--r--include/linux/rseq.h214
1 files changed, 122 insertions, 92 deletions
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 69553e7c14c1..2266f4dc77b6 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -3,134 +3,164 @@
#define _LINUX_RSEQ_H
#ifdef CONFIG_RSEQ
-
-#include <linux/preempt.h>
#include <linux/sched.h>
-#ifdef CONFIG_MEMBARRIER
-# define RSEQ_EVENT_GUARD irq
-#else
-# define RSEQ_EVENT_GUARD preempt
-#endif
-
-/*
- * Map the event mask on the user-space ABI enum rseq_cs_flags
- * for direct mask checks.
- */
-enum rseq_event_mask_bits {
- RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
- RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
- RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
-};
-
-enum rseq_event_mask {
- RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
- RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
- RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
-};
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
- if (t->rseq)
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
-}
+#include <uapi/linux/rseq.h>
-void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
+void __rseq_handle_slowpath(struct pt_regs *regs);
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
+/* Invoked from resume_user_mode_work() */
+static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
- if (current->rseq)
- __rseq_handle_notify_resume(ksig, regs);
+ if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
+ if (current->rseq.event.slowpath)
+ __rseq_handle_slowpath(regs);
+ } else {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
+ __rseq_handle_slowpath(regs);
+ }
}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
-{
- scoped_guard(RSEQ_EVENT_GUARD)
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
- rseq_handle_notify_resume(ksig, regs);
-}
+void __rseq_signal_deliver(int sig, struct pt_regs *regs);
-/* rseq_preempt() requires preemption to be disabled. */
-static inline void rseq_preempt(struct task_struct *t)
+/*
+ * Invoked from signal delivery to fixup based on the register context before
+ * switching to the signal delivery context.
+ */
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
- __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ } else {
+ if (current->rseq.event.has_rseq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ }
}
-/* rseq_migrate() requires preemption to be disabled. */
-static inline void rseq_migrate(struct task_struct *t)
+static inline void rseq_raise_notify_resume(struct task_struct *t)
{
- __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ set_tsk_thread_flag(t, TIF_RSEQ);
}
-/*
- * If parent process has a registered restartable sequences area, the
- * child inherits. Unregister rseq for a clone with CLONE_VM set.
- */
-static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
+/* Invoked from context switch to force evaluation on exit to user */
+static __always_inline void rseq_sched_switch_event(struct task_struct *t)
{
- if (clone_flags & CLONE_VM) {
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ struct rseq_event *ev = &t->rseq.event;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /*
+ * Avoid a boat load of conditionals by using simple logic
+ * to determine whether NOTIFY_RESUME needs to be raised.
+ *
+ * It's required when the CPU or MM CID has changed or
+ * the entry was from user space.
+ */
+ bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
+
+ if (raise) {
+ ev->sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
} else {
- t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
- t->rseq_sig = current->rseq_sig;
- t->rseq_event_mask = current->rseq_event_mask;
+ if (ev->has_rseq) {
+ t->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
}
}
-static inline void rseq_execve(struct task_struct *t)
+/*
+ * Invoked from __set_task_cpu() when a task migrates or from
+ * mm_cid_schedin() when the CID changes to enforce an IDs update.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t)
{
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ t->rseq.event.ids_changed = true;
}
-#else
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
-}
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
+/* Enforce a full update after RSEQ registration and when execve() failed */
+static inline void rseq_force_update(void)
{
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.ids_changed = true;
+ current->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(current);
+ }
}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
+
+/*
+ * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
+ * which clears TIF_NOTIFY_RESUME on architectures that don't use the
+ * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
+ *
+ * To avoid updating user space RSEQ in that case just to do it eventually
+ * again before returning to user space, because __rseq_handle_slowpath()
+ * does nothing when invoked with NULL register state.
+ *
+ * After returning from guest mode, before exiting to userspace, hypervisors
+ * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
+ */
+static inline void rseq_virt_userspace_exit(void)
{
+ /*
+ * The generic optimization for deferring RSEQ updates until the next
+ * exit relies on having a dedicated TIF_RSEQ.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
+ current->rseq.event.sched_switch)
+ rseq_raise_notify_resume(current);
}
-static inline void rseq_preempt(struct task_struct *t)
+
+static inline void rseq_reset(struct task_struct *t)
{
+ memset(&t->rseq, 0, sizeof(t->rseq));
+ t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
}
-static inline void rseq_migrate(struct task_struct *t)
+
+static inline void rseq_execve(struct task_struct *t)
{
+ rseq_reset(t);
}
+
+/*
+ * If parent process has a registered restartable sequences area, the
+ * child inherits. Unregister rseq for a clone with CLONE_VM set.
+ *
+ * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault
+ * on the COW page on exit to user space, when the child stays on the same
+ * CPU as the parent. That's obviously not guaranteed, but in overcommit
+ * scenarios it is more likely and optimizes for the fork/exec case without
+ * taking the fault.
+ */
static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
{
-}
-static inline void rseq_execve(struct task_struct *t)
-{
+ if (clone_flags & CLONE_VM)
+ rseq_reset(t);
+ else
+ t->rseq = current->rseq;
}
-#endif
+#else /* CONFIG_RSEQ */
+static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
+static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
+static inline void rseq_force_update(void) { }
+static inline void rseq_virt_userspace_exit(void) { }
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
+static inline void rseq_execve(struct task_struct *t) { }
+#endif /* !CONFIG_RSEQ */
#ifdef CONFIG_DEBUG_RSEQ
-
void rseq_syscall(struct pt_regs *regs);
-
-#else
-
-static inline void rseq_syscall(struct pt_regs *regs)
-{
-}
-
-#endif
+#else /* CONFIG_DEBUG_RSEQ */
+static inline void rseq_syscall(struct pt_regs *regs) { }
+#endif /* !CONFIG_DEBUG_RSEQ */
#endif /* _LINUX_RSEQ_H */