diff options
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r-- | arch/x86/kernel/fpu/context.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 128 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/regset.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/signal.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 177 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.h | 31 |
7 files changed, 251 insertions, 161 deletions
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index f6d856bd50bc..10d0a720659c 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -53,7 +53,7 @@ static inline void fpregs_activate(struct fpu *fpu) /* Internal helper for switch_fpu_return() and signal frame setup */ static inline void fpregs_restore_userregs(void) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); int cpu = smp_processor_id(); if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) @@ -67,7 +67,7 @@ static inline void fpregs_restore_userregs(void) * If PKRU is enabled, then the PKRU value is already * correct because it was either set in switch_to() or in * flush_thread(). So it is excluded because it might be - * not up to date in current->thread.fpu.xsave state. + * not up to date in current->thread.fpu->xsave state. * * XFD state is handled in restore_fpregs_from_fpstate(). */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 91d6341f281f..ea138583dd92 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -11,6 +11,7 @@ #include <asm/fpu/sched.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> +#include <asm/msr.h> #include <asm/traps.h> #include <asm/irq_regs.h> @@ -43,14 +44,27 @@ struct fpu_state_config fpu_user_cfg __ro_after_init; */ struct fpstate init_fpstate __ro_after_init; -/* Track in-kernel FPU usage */ -static DEFINE_PER_CPU(bool, in_kernel_fpu); +/* + * Track FPU initialization and kernel-mode usage. 'true' means the FPU is + * initialized and is not currently being used by the kernel: + */ +DEFINE_PER_CPU(bool, kernel_fpu_allowed); /* * Track which context is using the FPU on the CPU: */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); +#ifdef CONFIG_X86_DEBUG_FPU +struct fpu *x86_task_fpu(struct task_struct *task) +{ + if (WARN_ON_ONCE(task->flags & PF_KTHREAD)) + return NULL; + + return (void *)task + sizeof(*task); +} +#endif + /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? @@ -61,15 +75,18 @@ bool irq_fpu_usable(void) return false; /* - * In kernel FPU usage already active? This detects any explicitly - * nested usage in task or softirq context, which is unsupported. It - * also detects attempted usage in a hardirq that has interrupted a - * kernel-mode FPU section. + * Return false in the following cases: + * + * - FPU is not yet initialized. This can happen only when the call is + * coming from CPU onlining, for example for microcode checksumming. + * - The kernel is already using the FPU, either because of explicit + * nesting (which should never be done), or because of implicit + * nesting when a hardirq interrupted a kernel-mode FPU section. + * + * The single boolean check below handles both cases: */ - if (this_cpu_read(in_kernel_fpu)) { - WARN_ON_FPU(!in_hardirq()); + if (!this_cpu_read(kernel_fpu_allowed)) return false; - } /* * When not in NMI or hard interrupt context, FPU can be used in: @@ -202,7 +219,7 @@ void fpu_reset_from_exception_fixup(void) #if IS_ENABLED(CONFIG_KVM) static void __fpstate_reset(struct fpstate *fpstate, u64 xfd); -static void fpu_init_guest_permissions(struct fpu_guest *gfpu) +static void fpu_lock_guest_permissions(void) { struct fpu_state_perm *fpuperm; u64 perm; @@ -211,15 +228,13 @@ static void fpu_init_guest_permissions(struct fpu_guest *gfpu) return; spin_lock_irq(¤t->sighand->siglock); - fpuperm = ¤t->group_leader->thread.fpu.guest_perm; + fpuperm = &x86_task_fpu(current->group_leader)->guest_perm; perm = fpuperm->__state_perm; /* First fpstate allocation locks down permissions. */ WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED); spin_unlock_irq(¤t->sighand->siglock); - - gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED; } bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) @@ -240,7 +255,6 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) gfpu->fpstate = fpstate; gfpu->xfeatures = fpu_kernel_cfg.default_features; - gfpu->perm = fpu_kernel_cfg.default_features; /* * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state @@ -255,7 +269,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size)) gfpu->uabi_size = fpu_user_cfg.default_size; - fpu_init_guest_permissions(gfpu); + fpu_lock_guest_permissions(); return true; } @@ -263,16 +277,16 @@ EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); void fpu_free_guest_fpstate(struct fpu_guest *gfpu) { - struct fpstate *fps = gfpu->fpstate; + struct fpstate *fpstate = gfpu->fpstate; - if (!fps) + if (!fpstate) return; - if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use)) + if (WARN_ON_ONCE(!fpstate->is_valloc || !fpstate->is_guest || fpstate->in_use)) return; gfpu->fpstate = NULL; - vfree(fps); + vfree(fpstate); } EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); @@ -323,12 +337,12 @@ EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); */ void fpu_sync_guest_vmexit_xfd_state(void) { - struct fpstate *fps = current->thread.fpu.fpstate; + struct fpstate *fpstate = x86_task_fpu(current)->fpstate; lockdep_assert_irqs_disabled(); if (fpu_state_size_dynamic()) { - rdmsrl(MSR_IA32_XFD, fps->xfd); - __this_cpu_write(xfd_state, fps->xfd); + rdmsrq(MSR_IA32_XFD, fpstate->xfd); + __this_cpu_write(xfd_state, fpstate->xfd); } } EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state); @@ -337,7 +351,7 @@ EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state); int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) { struct fpstate *guest_fps = guest_fpu->fpstate; - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); struct fpstate *cur_fps = fpu->fpstate; fpregs_lock(); @@ -431,14 +445,15 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) fpregs_lock(); WARN_ON_FPU(!irq_fpu_usable()); - WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, true); + /* Toggle kernel_fpu_allowed to false: */ + WARN_ON_FPU(!this_cpu_read(kernel_fpu_allowed)); + this_cpu_write(kernel_fpu_allowed, false); if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); - save_fpregs_to_fpstate(¤t->thread.fpu); + save_fpregs_to_fpstate(x86_task_fpu(current)); } __cpu_invalidate_fpregs_state(); @@ -453,9 +468,10 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { - WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); + /* Toggle kernel_fpu_allowed back to true: */ + WARN_ON_FPU(this_cpu_read(kernel_fpu_allowed)); + this_cpu_write(kernel_fpu_allowed, true); - this_cpu_write(in_kernel_fpu, false); if (!irqs_disabled()) fpregs_unlock(); } @@ -467,7 +483,7 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end); */ void fpu_sync_fpstate(struct fpu *fpu) { - WARN_ON_FPU(fpu != ¤t->thread.fpu); + WARN_ON_FPU(fpu != x86_task_fpu(current)); fpregs_lock(); trace_x86_fpu_before_save(fpu); @@ -552,7 +568,7 @@ void fpstate_reset(struct fpu *fpu) static inline void fpu_inherit_perms(struct fpu *dst_fpu) { if (fpu_state_size_dynamic()) { - struct fpu *src_fpu = ¤t->group_leader->thread.fpu; + struct fpu *src_fpu = x86_task_fpu(current->group_leader); spin_lock_irq(¤t->sighand->siglock); /* Fork also inherits the permissions of the parent */ @@ -572,7 +588,7 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp) if (!ssp) return 0; - xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave, + xstate = get_xsave_addr(&x86_task_fpu(dst)->fpstate->regs.xsave, XFEATURE_CET_USER); /* @@ -593,8 +609,16 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp) int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal, unsigned long ssp) { - struct fpu *src_fpu = ¤t->thread.fpu; - struct fpu *dst_fpu = &dst->thread.fpu; + /* + * We allocate the new FPU structure right after the end of the task struct. + * task allocation size already took this into account. + * + * This is safe because task_struct size is a multiple of cacheline size, + * thus x86_task_fpu() will always be cacheline aligned as well. + */ + struct fpu *dst_fpu = (void *)dst + sizeof(*dst); + + BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0); /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; @@ -657,19 +681,22 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal, if (update_fpu_shstk(dst, ssp)) return 1; - trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); return 0; } /* - * Whitelist the FPU register state embedded into task_struct for hardened - * usercopy. + * While struct fpu is no longer part of struct thread_struct, it is still + * allocated after struct task_struct in the "task_struct" kmem cache. But + * since FPU is expected to be part of struct thread_struct, we have to + * adjust for it here. */ void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { - *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); + /* The allocation follows struct task_struct. */ + *offset = sizeof(struct task_struct) - offsetof(struct task_struct, thread); + *offset += offsetof(struct fpu, __fpstate.regs); *size = fpu_kernel_cfg.default_size; } @@ -682,11 +709,18 @@ void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size) * a state-restore is coming: either an explicit one, * or a reschedule. */ -void fpu__drop(struct fpu *fpu) +void fpu__drop(struct task_struct *tsk) { + struct fpu *fpu; + + if (test_tsk_thread_flag(tsk, TIF_NEED_FPU_LOAD)) + return; + + fpu = x86_task_fpu(tsk); + preempt_disable(); - if (fpu == ¤t->thread.fpu) { + if (fpu == x86_task_fpu(current)) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" @@ -718,9 +752,9 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask) /* * Reset current->fpu memory state to the init values. */ -static void fpu_reset_fpregs(void) +static void fpu_reset_fpstate_regs(void) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); fpregs_lock(); __fpu_invalidate_fpregs_state(fpu); @@ -749,11 +783,11 @@ static void fpu_reset_fpregs(void) */ void fpu__clear_user_states(struct fpu *fpu) { - WARN_ON_FPU(fpu != ¤t->thread.fpu); + WARN_ON_FPU(fpu != x86_task_fpu(current)); fpregs_lock(); if (!cpu_feature_enabled(X86_FEATURE_FPU)) { - fpu_reset_fpregs(); + fpu_reset_fpstate_regs(); fpregs_unlock(); return; } @@ -782,8 +816,8 @@ void fpu__clear_user_states(struct fpu *fpu) void fpu_flush_thread(void) { - fpstate_reset(¤t->thread.fpu); - fpu_reset_fpregs(); + fpstate_reset(x86_task_fpu(current)); + fpu_reset_fpstate_regs(); } /* * Load FPU context before returning to userspace. @@ -823,7 +857,7 @@ void fpregs_lock_and_load(void) */ void fpregs_assert_state_consistent(void) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); if (test_thread_flag(TIF_NEED_FPU_LOAD)) return; @@ -835,7 +869,7 @@ EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); void fpregs_mark_activate(void) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); fpregs_activate(fpu); fpu->last_cpu = smp_processor_id(); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 998a08f17e33..99db41bf9fa6 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void) /* Flush out any pending x87 state: */ #ifdef CONFIG_MATH_EMULATION if (!boot_cpu_has(X86_FEATURE_FPU)) - fpstate_init_soft(¤t->thread.fpu.fpstate->regs.soft); + ; else #endif asm volatile ("fninit"); @@ -51,6 +51,9 @@ void fpu__init_cpu(void) { fpu__init_cpu_generic(); fpu__init_cpu_xstate(); + + /* Start allowing kernel-mode FPU: */ + this_cpu_write(kernel_fpu_allowed, true); } static bool __init fpu__probe_without_cpuid(void) @@ -73,6 +76,8 @@ static bool __init fpu__probe_without_cpuid(void) static void __init fpu__init_system_early_generic(void) { + set_thread_flag(TIF_NEED_FPU_LOAD); + if (!boot_cpu_has(X86_FEATURE_CPUID) && !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { if (fpu__probe_without_cpuid()) @@ -94,7 +99,6 @@ static void __init fpu__init_system_early_generic(void) * Boot time FPU feature detection code: */ unsigned int mxcsr_feature_mask __ro_after_init = 0xffffffffu; -EXPORT_SYMBOL_GPL(mxcsr_feature_mask); static void __init fpu__init_system_mxcsr(void) { @@ -150,11 +154,13 @@ static void __init fpu__init_task_struct_size(void) { int task_size = sizeof(struct task_struct); + task_size += sizeof(struct fpu); + /* * Subtract off the static size of the register state. * It potentially has a bunch of padding. */ - task_size -= sizeof(current->thread.fpu.__fpstate.regs); + task_size -= sizeof(union fpregs_state); /* * Add back the dynamically-calculated register state @@ -164,14 +170,9 @@ static void __init fpu__init_task_struct_size(void) /* * We dynamically size 'struct fpu', so we require that - * it be at the end of 'thread_struct' and that - * 'thread_struct' be at the end of 'task_struct'. If - * you hit a compile error here, check the structure to - * see if something got added to the end. + * 'state' be at the end of 'it: */ CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate); - CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); - CHECK_MEMBER_AT_END_OF(struct task_struct, thread); arch_task_struct_size = task_size; } @@ -204,7 +205,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) fpu_kernel_cfg.default_size = size; fpu_user_cfg.max_size = size; fpu_user_cfg.default_size = size; - fpstate_reset(¤t->thread.fpu); } /* @@ -213,7 +213,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ void __init fpu__init_system(void) { - fpstate_reset(¤t->thread.fpu); fpu__init_system_early_generic(); /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 887b0b8e21e3..0986c2200adc 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -45,7 +45,7 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r */ static void sync_fpstate(struct fpu *fpu) { - if (fpu == ¤t->thread.fpu) + if (fpu == x86_task_fpu(current)) fpu_sync_fpstate(fpu); } @@ -63,7 +63,7 @@ static void fpu_force_restore(struct fpu *fpu) * Only stopped child tasks can be used to modify the FPU * state in the fpstate buffer: */ - WARN_ON_FPU(fpu == ¤t->thread.fpu); + WARN_ON_FPU(fpu == x86_task_fpu(current)); __fpu_invalidate_fpregs_state(fpu); } @@ -71,7 +71,7 @@ static void fpu_force_restore(struct fpu *fpu) int xfpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; @@ -91,7 +91,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); struct fxregs_state newstate; int ret; @@ -133,7 +133,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) return -ENODEV; - sync_fpstate(&target->thread.fpu); + sync_fpstate(x86_task_fpu(target)); copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE); return 0; @@ -143,7 +143,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); struct xregs_state *tmpbuf = NULL; int ret; @@ -187,7 +187,7 @@ int ssp_active(struct task_struct *target, const struct user_regset *regset) int ssp_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); struct cet_user_state *cetregs; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || @@ -214,7 +214,7 @@ int ssp_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); struct xregs_state *xsave = &fpu->fpstate->regs.xsave; struct cet_user_state *cetregs; unsigned long user_ssp; @@ -368,7 +368,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env, void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave); + __convert_from_fxsr(env, tsk, &x86_task_fpu(tsk)->fpstate->regs.fxsave); } void convert_to_fxsr(struct fxregs_state *fxsave, @@ -401,7 +401,7 @@ void convert_to_fxsr(struct fxregs_state *fxsave, int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); struct user_i387_ia32_struct env; struct fxregs_state fxsave, *fx; @@ -433,7 +433,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct fpu *fpu = &target->thread.fpu; + struct fpu *fpu = x86_task_fpu(target); struct user_i387_ia32_struct env; int ret; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 6c69cb28b298..c3ec2512f2bb 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -43,13 +43,13 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, * fpstate layout with out copying the extended state information * in the memory layout. */ - if (__get_user(magic2, (__u32 __user *)(fpstate + current->thread.fpu.fpstate->user_size))) + if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size))) return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) return true; setfx: - trace_x86_fpu_xstate_check_failed(¤t->thread.fpu); + trace_x86_fpu_xstate_check_failed(x86_task_fpu(current)); /* Set the parameters for fx only state */ fx_sw->magic1 = 0; @@ -64,13 +64,13 @@ setfx: static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { - struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave; + struct xregs_state *xsave = &x86_task_fpu(tsk)->fpstate->regs.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - fxsave(&tsk->thread.fpu.fpstate->regs.fxsave); + fxsave(&x86_task_fpu(tsk)->fpstate->regs.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); @@ -114,7 +114,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, { struct xregs_state __user *x = buf; struct _fpx_sw_bytes sw_bytes = {}; - u32 xfeatures; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ @@ -128,12 +127,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, (__u32 __user *)(buf + fpstate->user_size)); /* - * Read the xfeatures which we copied (directly from the cpu or - * from the state in task struct) to the user buffers. - */ - err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures); - - /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. This will * enable us capturing any changes(during sigreturn) to @@ -144,9 +137,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ - xfeatures |= XFEATURE_MASK_FPSSE; - - err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); + err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE); return !err; } @@ -184,7 +175,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pk bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru) { struct task_struct *tsk = current; - struct fpstate *fpstate = tsk->thread.fpu.fpstate; + struct fpstate *fpstate = x86_task_fpu(tsk)->fpstate; bool ia32_fxstate = (buf != buf_fx); int ret; @@ -272,7 +263,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, */ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); int ret; /* Restore enabled features only. */ @@ -332,7 +323,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; + struct fpu *fpu = x86_task_fpu(tsk); struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; @@ -452,7 +443,7 @@ static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate) */ bool fpu__restore_sig(void __user *buf, int ia32_frame) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); void __user *buf_fx = buf; bool ia32_fxstate = false; bool success = false; @@ -499,7 +490,7 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) { - unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate); + unsigned long frame_size = xstate_sigframe_size(x86_task_fpu(current)->fpstate); *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6a41d1610d8b..9aa9ac8399ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -14,13 +14,15 @@ #include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <linux/coredump.h> +#include <linux/sort.h> #include <asm/fpu/api.h> #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/xcr.h> -#include <asm/cpuid.h> +#include <asm/cpuid/api.h> +#include <asm/msr.h> #include <asm/tlbflush.h> #include <asm/prctl.h> #include <asm/elf.h> @@ -62,6 +64,7 @@ static const char *xfeature_names[] = "unknown xstate feature", "AMX Tile config", "AMX Tile data", + "APX registers", "unknown xstate feature", }; @@ -80,6 +83,7 @@ static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, + [XFEATURE_APX] = X86_FEATURE_APX, }; static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = @@ -88,6 +92,31 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; +/* + * Ordering of xstate components in uncompacted format: The xfeature + * number does not necessarily indicate its position in the XSAVE buffer. + * This array defines the traversal order of xstate features. + */ +static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init = + { [ 0 ... XFEATURE_MAX - 1] = -1}; + +static inline unsigned int next_xfeature_order(unsigned int i, u64 mask) +{ + for (; xfeature_uncompact_order[i] != -1; i++) { + if (mask & BIT_ULL(xfeature_uncompact_order[i])) + break; + } + + return i; +} + +/* Iterate xstate features in uncompacted order: */ +#define for_each_extended_xfeature_in_order(i, mask) \ + for (i = 0; \ + i = next_xfeature_order(i, mask), \ + xfeature_uncompact_order[i] != -1; \ + i++) + #define XSTATE_FLAG_SUPERVISOR BIT(0) #define XSTATE_FLAG_ALIGNED64 BIT(1) @@ -199,7 +228,7 @@ void fpu__init_cpu_xstate(void) * MSR_IA32_XSS sets supervisor states managed by XSAVES. */ if (boot_cpu_has(X86_FEATURE_XSAVES)) { - wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | + wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } } @@ -209,16 +238,20 @@ static bool xfeature_enabled(enum xfeature xfeature) return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); } +static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2) +{ + return xstate_offsets[*(unsigned int *)xfeature1] - + xstate_offsets[*(unsigned int *)xfeature2]; +} + /* * Record the offsets and sizes of various xstates contained - * in the XSAVE state memory layout. + * in the XSAVE state memory layout. Also, create an ordered + * list of xfeatures for handling out-of-order offsets. */ static void __init setup_xstate_cache(void) { - u32 eax, ebx, ecx, edx, i; - /* start at the beginning of the "extended state" */ - unsigned int last_good_offset = offsetof(struct xregs_state, - extended_state_area); + u32 eax, ebx, ecx, edx, xfeature, i = 0; /* * The FP xstates and SSE xstates are legacy states. They are always * in the fixed offsets in the xsave area in either compacted form @@ -232,31 +265,30 @@ static void __init setup_xstate_cache(void) xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); - for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { - cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx); + for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) { + cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx); - xstate_sizes[i] = eax; - xstate_flags[i] = ecx; + xstate_sizes[xfeature] = eax; + xstate_flags[xfeature] = ecx; /* * If an xfeature is supervisor state, the offset in EBX is * invalid, leave it to -1. */ - if (xfeature_is_supervisor(i)) + if (xfeature_is_supervisor(xfeature)) continue; - xstate_offsets[i] = ebx; + xstate_offsets[xfeature] = ebx; - /* - * In our xstate size checks, we assume that the highest-numbered - * xstate feature has the highest offset in the buffer. Ensure - * it does. - */ - WARN_ONCE(last_good_offset > xstate_offsets[i], - "x86/fpu: misordered xstate at %d\n", last_good_offset); - - last_good_offset = xstate_offsets[i]; + /* Populate the list of xfeatures before sorting */ + xfeature_uncompact_order[i++] = xfeature; } + + /* + * Sort xfeatures by their offsets to support out-of-order + * offsets in the uncompacted format. + */ + sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL); } /* @@ -340,7 +372,8 @@ static __init void os_xrstor_booting(struct xregs_state *xstate) XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER | \ - XFEATURE_MASK_XTILE) + XFEATURE_MASK_XTILE | \ + XFEATURE_MASK_APX) /* * setup the xstate image representing the init state @@ -540,6 +573,7 @@ static bool __init check_xstate_against_struct(int nr) case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); + case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state); case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; default: XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); @@ -552,13 +586,20 @@ static bool __init check_xstate_against_struct(int nr) static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) { unsigned int topmost = fls64(xfeatures) - 1; - unsigned int offset = xstate_offsets[topmost]; + unsigned int offset, i; if (topmost <= XFEATURE_SSE) return sizeof(struct xregs_state); - if (compacted) + if (compacted) { offset = xfeature_get_offset(xfeatures, topmost); + } else { + /* Walk through the xfeature order to pick the last */ + for_each_extended_xfeature_in_order(i, xfeatures) + topmost = xfeature_uncompact_order[i]; + offset = xstate_offsets[topmost]; + } + return offset + xstate_sizes[topmost]; } @@ -639,7 +680,7 @@ static unsigned int __init get_xsave_compacted_size(void) return get_compacted_size(); /* Disable independent features. */ - wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); + wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor()); /* * Ask the hardware what size is required of the buffer. @@ -648,7 +689,7 @@ static unsigned int __init get_xsave_compacted_size(void) size = get_compacted_size(); /* Re-enable independent features so XSAVES will work on them again. */ - wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); + wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); return size; } @@ -711,6 +752,8 @@ static int __init init_xstate_size(void) */ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) { + pr_info("x86/fpu: XSAVE disabled\n"); + fpu_kernel_cfg.max_features = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); @@ -727,7 +770,7 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) */ init_fpstate.xfd = 0; - fpstate_reset(¤t->thread.fpu); + fpstate_reset(x86_task_fpu(current)); } /* @@ -775,6 +818,17 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) goto out_disable; } + if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX && + fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) { + /* + * This is a problematic CPU configuration where two + * conflicting state components are both enumerated. + */ + pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n", + fpu_kernel_cfg.max_features); + goto out_disable; + } + fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & XFEATURE_MASK_INDEPENDENT; @@ -834,9 +888,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) if (err) goto out_disable; - /* Reset the state for the current task */ - fpstate_reset(¤t->thread.fpu); - /* * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: @@ -852,7 +903,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) init_fpstate.xfeatures = fpu_kernel_cfg.default_features; if (init_fpstate.size > sizeof(init_fpstate.regs)) { - pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n", + pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n", sizeof(init_fpstate.regs), init_fpstate.size); goto out_disable; } @@ -864,7 +915,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) * xfeatures mask. */ if (xfeatures != fpu_kernel_cfg.max_features) { - pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n", + pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n", xfeatures, fpu_kernel_cfg.max_features); goto out_disable; } @@ -904,12 +955,12 @@ void fpu__resume_cpu(void) * of XSAVES and MSR_IA32_XSS. */ if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { - wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | + wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } if (fpu_state_size_dynamic()) - wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd); + wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd); } /* @@ -1071,10 +1122,9 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; struct xregs_state *xsave = &fpstate->regs.xsave; + unsigned int zerofrom, i, xfeature; struct xstate_header header; - unsigned int zerofrom; u64 mask; - int i; memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; @@ -1143,15 +1193,16 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, */ mask = header.xfeatures; - for_each_extended_xfeature(i, mask) { + for_each_extended_xfeature_in_order(i, mask) { + xfeature = xfeature_uncompact_order[i]; /* * If there was a feature or alignment gap, zero the space * in the destination buffer. */ - if (zerofrom < xstate_offsets[i]) - membuf_zero(&to, xstate_offsets[i] - zerofrom); + if (zerofrom < xstate_offsets[xfeature]) + membuf_zero(&to, xstate_offsets[xfeature] - zerofrom); - if (i == XFEATURE_PKRU) { + if (xfeature == XFEATURE_PKRU) { struct pkru_state pkru = {0}; /* * PKRU is not necessarily up to date in the @@ -1161,14 +1212,14 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, membuf_write(&to, &pkru, sizeof(pkru)); } else { membuf_write(&to, - __raw_xsave_addr(xsave, i), - xstate_sizes[i]); + __raw_xsave_addr(xsave, xfeature), + xstate_sizes[xfeature]); } /* * Keep track of the last copied state in the non-compacted * target buffer for gap zeroing. */ - zerofrom = xstate_offsets[i] + xstate_sizes[i]; + zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature]; } out: @@ -1191,8 +1242,8 @@ out: void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { - __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, - tsk->thread.fpu.fpstate->user_xfeatures, + __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate, + x86_task_fpu(tsk)->fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } @@ -1332,7 +1383,7 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru); + return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru); } static bool validate_independent_components(u64 mask) @@ -1398,9 +1449,9 @@ void xrstors(struct xregs_state *xstate, u64 mask) } #if IS_ENABLED(CONFIG_KVM) -void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature) +void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature) { - void *addr = get_xsave_addr(&fps->regs.xsave, xfeature); + void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature); if (addr) memset(addr, 0, xstate_sizes[xfeature]); @@ -1426,7 +1477,7 @@ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) * The XFD MSR does not match fpstate->xfd. That's invalid when * the passed in fpstate is current's fpstate. */ - if (fpstate->xfd == current->thread.fpu.fpstate->xfd) + if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd) return false; /* @@ -1503,7 +1554,7 @@ void fpstate_free(struct fpu *fpu) static int fpstate_realloc(u64 xfeatures, unsigned int ksize, unsigned int usize, struct fpu_guest *guest_fpu) { - struct fpu *fpu = ¤t->thread.fpu; + struct fpu *fpu = x86_task_fpu(current); struct fpstate *curfps, *newfps = NULL; unsigned int fpsize; bool in_use; @@ -1596,7 +1647,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) * AVX512. */ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); - struct fpu *fpu = ¤t->group_leader->thread.fpu; + struct fpu *fpu = x86_task_fpu(current->group_leader); struct fpu_state_perm *perm; unsigned int ksize, usize; u64 mask; @@ -1606,16 +1657,20 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) if ((permitted & requested) == requested) return 0; - /* Calculate the resulting kernel state size */ + /* + * Calculate the resulting kernel state size. Note, @permitted also + * contains supervisor xfeatures even though supervisor are always + * permitted for kernel and guest FPUs, and never permitted for user + * FPUs. + */ mask = permitted | requested; - /* Take supervisor states into account on the host */ - if (!guest) - mask |= xfeatures_mask_supervisor(); ksize = xstate_calculate_size(mask, compacted); - /* Calculate the resulting user state size */ - mask &= XFEATURE_MASK_USER_SUPPORTED; - usize = xstate_calculate_size(mask, false); + /* + * Calculate the resulting user state size. Take care not to clobber + * the supervisor xfeatures in the new mask! + */ + usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false); if (!guest) { ret = validate_sigaltstack(usize); @@ -1699,7 +1754,7 @@ int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) return -EPERM; } - fpu = ¤t->group_leader->thread.fpu; + fpu = x86_task_fpu(current->group_leader); perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; ksize = perm->__state_size; usize = perm->__user_state_size; @@ -1804,7 +1859,7 @@ long fpu_xstate_prctl(int option, unsigned long arg2) */ static void avx512_status(struct seq_file *m, struct task_struct *task) { - unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp); + unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); long delta; if (!timestamp) { diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 0fd34f53f025..52ce19289989 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -5,6 +5,7 @@ #include <asm/cpufeature.h> #include <asm/fpu/xstate.h> #include <asm/fpu/xcr.h> +#include <asm/msr.h> #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u64, xfd_state); @@ -22,7 +23,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) static inline u64 xstate_get_group_perm(bool guest) { - struct fpu *fpu = ¤t->group_leader->thread.fpu; + struct fpu *fpu = x86_task_fpu(current->group_leader); struct fpu_state_perm *perm; /* Pairs with WRITE_ONCE() in xstate_request_perm() */ @@ -69,21 +70,31 @@ static inline u64 xfeatures_mask_independent(void) return fpu_kernel_cfg.independent_features; } +static inline int set_xfeature_in_sigframe(struct xregs_state __user *xbuf, u64 mask) +{ + u64 xfeatures; + int err; + + /* Read the xfeatures value already saved in the user buffer */ + err = __get_user(xfeatures, &xbuf->header.xfeatures); + xfeatures |= mask; + err |= __put_user(xfeatures, &xbuf->header.xfeatures); + + return err; +} + /* * Update the value of PKRU register that was already pushed onto the signal frame. */ -static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru) +static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru) { - u64 xstate_bv; int err; if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) return 0; /* Mark PKRU as in-use so that it is restored correctly. */ - xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU; - - err = __put_user(xstate_bv, &buf->header.xfeatures); + err = set_xfeature_in_sigframe(buf, XFEATURE_MASK_PKRU); if (err) return err; @@ -171,7 +182,7 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #ifdef CONFIG_X86_64 static inline void xfd_set_state(u64 xfd) { - wrmsrl(MSR_IA32_XFD, xfd); + wrmsrq(MSR_IA32_XFD, xfd); __this_cpu_write(xfd_state, xfd); } @@ -288,7 +299,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr * internally, e.g. PKRU. That's user space ABI and also required * to allow the signal handler to modify PKRU. */ - struct fpstate *fpstate = current->thread.fpu.fpstate; + struct fpstate *fpstate = x86_task_fpu(current)->fpstate; u64 mask = fpstate->user_xfeatures; u32 lmask; u32 hmask; @@ -307,7 +318,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr clac(); if (!err) - err = update_pkru_in_sigframe(buf, mask, pkru); + err = update_pkru_in_sigframe(buf, pkru); return err; } @@ -322,7 +333,7 @@ static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 u32 hmask = mask >> 32; int err; - xfd_validate_state(current->thread.fpu.fpstate, mask, true); + xfd_validate_state(x86_task_fpu(current)->fpstate, mask, true); stac(); XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); |