diff options
Diffstat (limited to 'arch/x86/kernel/traps.c')
-rw-r--r-- | arch/x86/kernel/traps.c | 169 |
1 files changed, 134 insertions, 35 deletions
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2dbadf347b5f..94c0236963c6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -68,6 +68,7 @@ #include <asm/vdso.h> #include <asm/tdx.h> #include <asm/cfi.h> +#include <asm/msr.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -94,10 +95,20 @@ __always_inline int is_valid_bugaddr(unsigned long addr) /* * Check for UD1 or UD2, accounting for Address Size Override Prefixes. - * If it's a UD1, get the ModRM byte to pass along to UBSan. + * If it's a UD1, further decode to determine its use: + * + * FineIBT: ea (bad) + * FineIBT: f0 75 f9 lock jne . - 6 + * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax + * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax + * static_call: 0f b9 cc ud1 %esp,%ecx + * + * Notably UBSAN uses EAX, static_call uses ECX. */ -__always_inline int decode_bug(unsigned long addr, u32 *imm) +__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) { + unsigned long start = addr; + bool lock = false; u8 v; if (addr < TASK_SIZE_MAX) @@ -106,28 +117,67 @@ __always_inline int decode_bug(unsigned long addr, u32 *imm) v = *(u8 *)(addr++); if (v == INSN_ASOP) v = *(u8 *)(addr++); - if (v != OPCODE_ESCAPE) + + if (v == INSN_LOCK) { + lock = true; + v = *(u8 *)(addr++); + } + + switch (v) { + case 0x70 ... 0x7f: /* Jcc.d8 */ + addr += 1; /* d8 */ + *len = addr - start; + WARN_ON_ONCE(!lock); + return BUG_LOCK; + + case 0xea: + *len = addr - start; + return BUG_EA; + + case OPCODE_ESCAPE: + break; + + default: return BUG_NONE; + } v = *(u8 *)(addr++); - if (v == SECOND_BYTE_OPCODE_UD2) + if (v == SECOND_BYTE_OPCODE_UD2) { + *len = addr - start; return BUG_UD2; + } - if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1) + if (v != SECOND_BYTE_OPCODE_UD1) return BUG_NONE; - /* Retrieve the immediate (type value) for the UBSAN UD1 */ - v = *(u8 *)(addr++); - if (X86_MODRM_RM(v) == 4) - addr++; - *imm = 0; - if (X86_MODRM_MOD(v) == 1) - *imm = *(u8 *)addr; - else if (X86_MODRM_MOD(v) == 2) - *imm = *(u32 *)addr; - else - WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v)); + v = *(u8 *)(addr++); /* ModRM */ + + if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4) + addr++; /* SIB */ + + /* Decode immediate, if present */ + switch (X86_MODRM_MOD(v)) { + case 0: if (X86_MODRM_RM(v) == 5) + addr += 4; /* RIP + disp32 */ + break; + + case 1: *imm = *(s8 *)addr; + addr += 1; + break; + + case 2: *imm = *(s32 *)addr; + addr += 4; + break; + + case 3: break; + } + + /* record instruction length */ + *len = addr - start; + + if (X86_MODRM_REG(v) == 0) /* EAX */ + return BUG_UD1_UBSAN; return BUG_UD1; } @@ -257,11 +307,12 @@ static inline void handle_invalid_op(struct pt_regs *regs) static noinstr bool handle_bug(struct pt_regs *regs) { + unsigned long addr = regs->ip; bool handled = false; - int ud_type; - u32 imm; + int ud_type, ud_len; + s32 ud_imm; - ud_type = decode_bug(regs->ip, &imm); + ud_type = decode_bug(addr, &ud_imm, &ud_len); if (ud_type == BUG_NONE) return handled; @@ -281,15 +332,47 @@ static noinstr bool handle_bug(struct pt_regs *regs) */ if (regs->flags & X86_EFLAGS_IF) raw_local_irq_enable(); - if (ud_type == BUG_UD2) { - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || - handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { - regs->ip += LEN_UD2; + + switch (ud_type) { + case BUG_UD2: + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + handled = true; + break; + } + fallthrough; + + case BUG_EA: + case BUG_LOCK: + if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { handled = true; + break; + } + break; + + case BUG_UD1_UBSAN: + if (IS_ENABLED(CONFIG_UBSAN_TRAP)) { + pr_crit("%s at %pS\n", + report_ubsan_failure(regs, ud_imm), + (void *)regs->ip); } - } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) { - pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip); + break; + + default: + break; + } + + /* + * When continuing, and regs->ip hasn't changed, move it to the next + * instruction. When not continuing execution, restore the instruction + * pointer. + */ + if (handled) { + if (regs->ip == addr) + regs->ip += ud_len; + } else { + regs->ip = addr; } + if (regs->flags & X86_EFLAGS_IF) raw_local_irq_disable(); instrumentation_end(); @@ -380,6 +463,21 @@ __visible void __noreturn handle_stack_overflow(struct pt_regs *regs, #endif /* + * Prevent the compiler and/or objtool from marking the !CONFIG_X86_ESPFIX64 + * version of exc_double_fault() as noreturn. Otherwise the noreturn mismatch + * between configs triggers objtool warnings. + * + * This is a temporary hack until we have compiler or plugin support for + * annotating noreturns. + */ +#ifdef CONFIG_X86_ESPFIX64 +#define always_true() true +#else +bool always_true(void); +bool __weak always_true(void) { return true; } +#endif + +/* * Runs on an IST stack for x86_64 and on a special task stack for x86_32. * * On x86_64, this is more or less a normal kernel entry. Notwithstanding the @@ -514,7 +612,8 @@ DEFINE_IDTENTRY_DF(exc_double_fault) pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code); die("double fault", regs, error_code); - panic("Machine halted."); + if (always_true()) + panic("Machine halted."); instrumentation_end(); } @@ -651,7 +750,7 @@ static bool try_fixup_enqcmd_gp(void) if (current->pasid_activated) return false; - wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + wrmsrq(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); current->pasid_activated = 1; return true; @@ -784,16 +883,16 @@ static void do_int3_user(struct pt_regs *regs) DEFINE_IDTENTRY_RAW(exc_int3) { /* - * poke_int3_handler() is completely self contained code; it does (and + * smp_text_poke_int3_handler() is completely self contained code; it does (and * must) *NOT* call out to anything, lest it hits upon yet another * INT3. */ - if (poke_int3_handler(regs)) + if (smp_text_poke_int3_handler(regs)) return; /* * irqentry_enter_from_user_mode() uses static_branch_{,un}likely() - * and therefore can trigger INT3, hence poke_int3_handler() must + * and therefore can trigger INT3, hence smp_text_poke_int3_handler() must * be done before. If the entry came from kernel mode, then use * nmi_enter() because the INT3 could have been hit in any context * including NMI. @@ -1022,9 +1121,9 @@ static noinstr void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) */ unsigned long debugctl; - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl |= DEBUGCTLMSR_BTF; - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); } /* @@ -1197,7 +1296,7 @@ DEFINE_IDTENTRY_RAW(exc_debug) static void math_error(struct pt_regs *regs, int trapnr) { struct task_struct *task = current; - struct fpu *fpu = &task->thread.fpu; + struct fpu *fpu = x86_task_fpu(task); int si_code; char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; @@ -1288,11 +1387,11 @@ static bool handle_xfd_event(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD)) return false; - rdmsrl(MSR_IA32_XFD_ERR, xfd_err); + rdmsrq(MSR_IA32_XFD_ERR, xfd_err); if (!xfd_err) return false; - wrmsrl(MSR_IA32_XFD_ERR, 0); + wrmsrq(MSR_IA32_XFD_ERR, 0); /* Die if that happens in kernel space */ if (WARN_ON(!user_mode(regs))) |