summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst8
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst4
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt11
-rw-r--r--Documentation/arch/x86/usb-legacy-support.rst11
-rw-r--r--Makefile3
-rw-r--r--arch/arm/include/asm/io.h2
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/arm/mm/nommu.c2
-rw-r--r--arch/riscv/include/asm/io.h2
-rw-r--r--arch/um/kernel/um_arch.c11
-rw-r--r--arch/x86/Kconfig202
-rw-r--r--arch/x86/Kconfig.cpu105
-rw-r--r--arch/x86/Kconfig.cpufeatures201
-rw-r--r--arch/x86/Makefile51
-rw-r--r--arch/x86/Makefile_32.cpu5
-rw-r--r--arch/x86/boot/boot.h4
-rw-r--r--arch/x86/boot/compressed/misc.c14
-rw-r--r--arch/x86/boot/cpucheck.c3
-rw-r--r--arch/x86/boot/cpuflags.c1
-rw-r--r--arch/x86/boot/mkcpustr.c3
-rw-r--r--arch/x86/configs/xen.config2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S2
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S7
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S1
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S9
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S7
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S5
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S5
-rw-r--r--arch/x86/entry/Makefile8
-rw-r--r--arch/x86/entry/calling.h1
-rw-r--r--arch/x86/entry/common.c524
-rw-r--r--arch/x86/entry/entry.S6
-rw-r--r--arch/x86/entry/entry_32.S4
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/entry_64_compat.S4
-rw-r--r--arch/x86/entry/entry_64_fred.S1
-rw-r--r--arch/x86/entry/syscall_32.c332
-rw-r--r--arch/x86/entry/syscall_64.c111
-rw-r--r--arch/x86/entry/syscall_x32.c25
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/entry/vdso/extable.h2
-rw-r--r--arch/x86/entry/vdso/vma.c3
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/events/intel/core.c35
-rw-r--r--arch/x86/events/intel/p4.c7
-rw-r--r--arch/x86/events/intel/p6.c26
-rw-r--r--arch/x86/events/perf_event.h19
-rw-r--r--arch/x86/hyperv/mmu.c1
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/alternative.h45
-rw-r--r--arch/x86/include/asm/amd_nb.h1
-rw-r--r--arch/x86/include/asm/amd_node.h24
-rw-r--r--arch/x86/include/asm/apic.h4
-rw-r--r--arch/x86/include/asm/arch_hweight.h14
-rw-r--r--arch/x86/include/asm/asm-prototypes.h4
-rw-r--r--arch/x86/include/asm/asm.h21
-rw-r--r--arch/x86/include/asm/atomic.h14
-rw-r--r--arch/x86/include/asm/atomic64_32.h98
-rw-r--r--arch/x86/include/asm/atomic64_64.h14
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/bug.h8
-rw-r--r--arch/x86/include/asm/cfi.h26
-rw-r--r--arch/x86/include/asm/cmpxchg.h24
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h6
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h4
-rw-r--r--arch/x86/include/asm/cpu.h15
-rw-r--r--arch/x86/include/asm/cpu_device_id.h130
-rw-r--r--arch/x86/include/asm/cpufeature.h81
-rw-r--r--arch/x86/include/asm/cpufeatures.h9
-rw-r--r--arch/x86/include/asm/cpuid.h216
-rw-r--r--arch/x86/include/asm/cpuid/api.h210
-rw-r--r--arch/x86/include/asm/cpuid/types.h32
-rw-r--r--arch/x86/include/asm/cpumask.h4
-rw-r--r--arch/x86/include/asm/current.h40
-rw-r--r--arch/x86/include/asm/desc.h1
-rw-r--r--arch/x86/include/asm/desc_defs.h4
-rw-r--r--arch/x86/include/asm/disabled-features.h161
-rw-r--r--arch/x86/include/asm/dwarf2.h2
-rw-r--r--arch/x86/include/asm/elf.h5
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/frame.h10
-rw-r--r--arch/x86/include/asm/fred.h4
-rw-r--r--arch/x86/include/asm/fsgsbase.h4
-rw-r--r--arch/x86/include/asm/ftrace.h24
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/ibt.h25
-rw-r--r--arch/x86/include/asm/idtentry.h6
-rw-r--r--arch/x86/include/asm/init.h4
-rw-r--r--arch/x86/include/asm/inst.h2
-rw-r--r--arch/x86/include/asm/intel-family.h50
-rw-r--r--arch/x86/include/asm/io.h3
-rw-r--r--arch/x86/include/asm/irq_stack.h12
-rw-r--r--arch/x86/include/asm/irqflags.h10
-rw-r--r--arch/x86/include/asm/jump_label.h4
-rw-r--r--arch/x86/include/asm/kasan.h2
-rw-r--r--arch/x86/include/asm/kexec.h62
-rw-r--r--arch/x86/include/asm/linkage.h24
-rw-r--r--arch/x86/include/asm/mem_encrypt.h4
-rw-r--r--arch/x86/include/asm/mmu.h12
-rw-r--r--arch/x86/include/asm/mmu_context.h10
-rw-r--r--arch/x86/include/asm/mshyperv.h18
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/msr.h4
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/nops.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h17
-rw-r--r--arch/x86/include/asm/orc_types.h4
-rw-r--r--arch/x86/include/asm/page.h4
-rw-r--r--arch/x86/include/asm/page_32.h4
-rw-r--r--arch/x86/include/asm/page_32_types.h8
-rw-r--r--arch/x86/include/asm/page_64.h9
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/page_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h19
-rw-r--r--arch/x86/include/asm/paravirt_types.h20
-rw-r--r--arch/x86/include/asm/percpu.h76
-rw-r--r--arch/x86/include/asm/pgalloc.h5
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h4
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h4
-rw-r--r--arch/x86/include/asm/pgtable-invert.h4
-rw-r--r--arch/x86/include/asm/pgtable.h12
-rw-r--r--arch/x86/include/asm/pgtable_32.h4
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h6
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h12
-rw-r--r--arch/x86/include/asm/preempt.h25
-rw-r--r--arch/x86/include/asm/processor.h66
-rw-r--r--arch/x86/include/asm/prom.h4
-rw-r--r--arch/x86/include/asm/proto.h3
-rw-r--r--arch/x86/include/asm/pti.h4
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/purgatory.h4
-rw-r--r--arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--arch/x86/include/asm/realmode.h4
-rw-r--r--arch/x86/include/asm/required-features.h105
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/runtime-const.h13
-rw-r--r--arch/x86/include/asm/segment.h8
-rw-r--r--arch/x86/include/asm/set_memory.h2
-rw-r--r--arch/x86/include/asm/setup.h7
-rw-r--r--arch/x86/include/asm/setup_data.h4
-rw-r--r--arch/x86/include/asm/shared/tdx.h4
-rw-r--r--arch/x86/include/asm/shstk.h4
-rw-r--r--arch/x86/include/asm/signal.h8
-rw-r--r--arch/x86/include/asm/smap.h6
-rw-r--r--arch/x86/include/asm/smp.h24
-rw-r--r--arch/x86/include/asm/special_insns.h22
-rw-r--r--arch/x86/include/asm/sta2x11.h13
-rw-r--r--arch/x86/include/asm/stackprotector.h36
-rw-r--r--arch/x86/include/asm/string_64.h2
-rw-r--r--arch/x86/include/asm/tdx.h4
-rw-r--r--arch/x86/include/asm/thread_info.h12
-rw-r--r--arch/x86/include/asm/tlb.h138
-rw-r--r--arch/x86/include/asm/tlbbatch.h5
-rw-r--r--arch/x86/include/asm/tlbflush.h72
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/unwind_hints.h4
-rw-r--r--arch/x86/include/asm/vdso/getrandom.h4
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h4
-rw-r--r--arch/x86/include/asm/vdso/processor.h4
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h4
-rw-r--r--arch/x86/include/asm/vermagic.h4
-rw-r--r--arch/x86/include/asm/xen/interface.h10
-rw-r--r--arch/x86/include/asm/xen/interface_32.h4
-rw-r--r--arch/x86/include/asm/xen/interface_64.h4
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h4
-rw-r--r--arch/x86/include/uapi/asm/e820.h4
-rw-r--r--arch/x86/include/uapi/asm/ldt.h4
-rw-r--r--arch/x86/include/uapi/asm/msr.h4
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h6
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h4
-rw-r--r--arch/x86/include/uapi/asm/setup_data.h4
-rw-r--r--arch/x86/include/uapi/asm/signal.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/cstate.c19
-rw-r--r--arch/x86/kernel/acpi/madt_playdead.S1
-rw-r--r--arch/x86/kernel/acpi/madt_wakeup.c73
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S1
-rw-r--r--arch/x86/kernel/alternative.c645
-rw-r--r--arch/x86/kernel/amd_nb.c1
-rw-r--r--arch/x86/kernel/amd_node.c149
-rw-r--r--arch/x86/kernel/apic/Makefile3
-rw-r--r--arch/x86/kernel/apic/apic.c7
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c105
-rw-r--r--arch/x86/kernel/apic/ipi.c30
-rw-r--r--arch/x86/kernel/apic/local.h13
-rw-r--r--arch/x86/kernel/apic/probe_32.c29
-rw-r--r--arch/x86/kernel/asm-offsets.c5
-rw-r--r--arch/x86/kernel/asm-offsets_64.c6
-rw-r--r--arch/x86/kernel/callthunks.c13
-rw-r--r--arch/x86/kernel/cfi.c26
-rw-r--r--arch/x86/kernel/cpu/amd.c30
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c31
-rw-r--r--arch/x86/kernel/cpu/common.c237
-rw-r--r--arch/x86/kernel/cpu/cpu.h8
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c35
-rw-r--r--arch/x86/kernel/cpu/debugfs.c4
-rw-r--r--arch/x86/kernel/cpu/hygon.c16
-rw-r--r--arch/x86/kernel/cpu/intel.c297
-rw-r--r--arch/x86/kernel/cpu/match.c30
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c17
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c6
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c4
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/early_printk.c49
-rw-r--r--arch/x86/kernel/ftrace.c30
-rw-r--r--arch/x86/kernel/ftrace_64.S5
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_64.S24
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c5
-rw-r--r--arch/x86/kernel/irq_32.c51
-rw-r--r--arch/x86/kernel/irq_64.c8
-rw-r--r--arch/x86/kernel/irqflags.S1
-rw-r--r--arch/x86/kernel/kprobes/core.c11
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/module.c81
-rw-r--r--arch/x86/kernel/nmi.c42
-rw-r--r--arch/x86/kernel/paravirt.c48
-rw-r--r--arch/x86/kernel/process.c10
-rw-r--r--arch/x86/kernel/process_32.c11
-rw-r--r--arch/x86/kernel/process_64.c31
-rw-r--r--arch/x86/kernel/reboot.c12
-rw-r--r--arch/x86/kernel/setup.c66
-rw-r--r--arch/x86/kernel/setup_percpu.c15
-rw-r--r--arch/x86/kernel/signal_32.c62
-rw-r--r--arch/x86/kernel/smpboot.c92
-rw-r--r--arch/x86/kernel/traps.c132
-rw-r--r--arch/x86/kernel/tsc_msr.c2
-rw-r--r--arch/x86/kernel/verify_cpu.S4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S43
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h18
-rw-r--r--arch/x86/lib/Makefile5
-rw-r--r--arch/x86/lib/bhi.S147
-rw-r--r--arch/x86/lib/clear_page_64.S9
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S2
-rw-r--r--arch/x86/lib/copy_page_64.S3
-rw-r--r--arch/x86/lib/copy_user_64.S3
-rw-r--r--arch/x86/lib/copy_user_uncached_64.S2
-rw-r--r--arch/x86/lib/getuser.S16
-rw-r--r--arch/x86/lib/hweight.S3
-rw-r--r--arch/x86/lib/memmove_64.S3
-rw-r--r--arch/x86/lib/memset_64.S3
-rw-r--r--arch/x86/lib/msr-reg.S3
-rw-r--r--arch/x86/lib/putuser.S9
-rw-r--r--arch/x86/lib/retpoline.S3
-rw-r--r--arch/x86/math-emu/control_w.h2
-rw-r--r--arch/x86/math-emu/exception.h6
-rw-r--r--arch/x86/math-emu/fpu_emu.h6
-rw-r--r--arch/x86/math-emu/status_w.h6
-rw-r--r--arch/x86/mm/ident_map.c14
-rw-r--r--arch/x86/mm/init.c9
-rw-r--r--arch/x86/mm/init_32.c9
-rw-r--r--arch/x86/mm/ioremap.c8
-rw-r--r--arch/x86/mm/kaslr.c10
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S1
-rw-r--r--arch/x86/mm/mmap.c9
-rw-r--r--arch/x86/mm/pat/cpa-test.c2
-rw-r--r--arch/x86/mm/pat/memtype.c6
-rw-r--r--arch/x86/mm/pat/set_memory.c243
-rw-r--r--arch/x86/mm/pgtable.c54
-rw-r--r--arch/x86/mm/tlb.c429
-rw-r--r--arch/x86/net/bpf_jit_comp.c34
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c233
-rw-r--r--arch/x86/platform/pvh/head.S14
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/realmode/rm/realmode.h4
-rw-r--r--arch/x86/realmode/rm/wakeup.h2
-rwxr-xr-xarch/x86/tools/cpufeaturemasks.awk88
-rw-r--r--arch/x86/tools/relocs.c147
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/enlighten_pv.c69
-rw-r--r--arch/x86/xen/mmu_pv.c1
-rw-r--r--arch/x86/xen/smp_pv.c2
-rw-r--r--arch/x86/xen/xen-asm.S5
-rw-r--r--arch/x86/xen/xen-head.S12
-rw-r--r--drivers/acpi/processor_idle.c2
-rw-r--r--drivers/cpufreq/intel_pstate.c16
-rw-r--r--drivers/idle/intel_idle.c14
-rw-r--r--drivers/misc/mei/Kconfig2
-rw-r--r--drivers/pci/Kconfig6
-rw-r--r--drivers/platform/x86/amd/hsmp/Kconfig2
-rw-r--r--drivers/platform/x86/amd/hsmp/acpi.c7
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.c1
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.h3
-rw-r--r--drivers/platform/x86/amd/hsmp/plat.c36
-rw-r--r--drivers/powercap/intel_rapl_common.c2
-rw-r--r--drivers/staging/media/atomisp/include/linux/atomisp_platform.h4
-rw-r--r--drivers/thermal/intel/intel_tcc.c2
-rw-r--r--drivers/virt/coco/sev-guest/sev-guest.c1
-rw-r--r--include/acpi/processor.h5
-rw-r--r--include/asm-generic/sections.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h48
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/linux/cfi.h2
-rw-r--r--include/linux/compiler.h10
-rw-r--r--include/linux/execmem.h31
-rw-r--r--include/linux/mod_devicetable.h2
-rw-r--r--include/linux/module.h16
-rw-r--r--include/linux/moduleloader.h4
-rw-r--r--include/linux/objtool.h4
-rw-r--r--include/linux/percpu-defs.h17
-rw-r--r--include/linux/preempt.h1
-rw-r--r--include/linux/sizes.h8
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--init/Kconfig5
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--kernel/cfi.c4
-rw-r--r--kernel/iomem.c5
-rw-r--r--kernel/kallsyms.c12
-rw-r--r--kernel/module/main.c81
-rw-r--r--kernel/module/strict_rwx.c9
-rw-r--r--kernel/sysctl.c56
-rw-r--r--kernel/trace/bpf_trace.c21
-rw-r--r--lib/atomic64_test.c2
-rw-r--r--lib/zstd/common/portability_macros.h2
-rw-r--r--mm/execmem.c39
-rw-r--r--mm/percpu.c4
-rw-r--r--mm/vmstat.c2
-rwxr-xr-xscripts/gcc-x86_32-has-stack-protector.sh8
-rwxr-xr-xscripts/gcc-x86_64-has-stack-protector.sh4
-rw-r--r--scripts/gdb/linux/cpus.py2
-rw-r--r--scripts/kallsyms.c72
-rwxr-xr-xscripts/link-vmlinux.sh4
-rwxr-xr-xscripts/min-tool-version.sh4
-rw-r--r--tools/arch/x86/include/asm/asm.h8
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h8
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h161
-rw-r--r--tools/arch/x86/include/asm/msr-index.h2
-rw-r--r--tools/arch/x86/include/asm/nops.h2
-rw-r--r--tools/arch/x86/include/asm/orc_types.h4
-rw-r--r--tools/arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--tools/arch/x86/include/asm/required-features.h105
-rw-r--r--tools/objtool/arch/x86/decode.c1
-rw-r--r--tools/objtool/check.c12
-rw-r--r--tools/objtool/noreturns.h2
-rwxr-xr-xtools/perf/check-headers.sh2
-rw-r--r--tools/power/x86/turbostat/turbostat.c2
-rw-r--r--tools/testing/selftests/x86/lam.c151
346 files changed, 5002 insertions, 4671 deletions
diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
index 0585d02b9a6c..ad15417d39f9 100644
--- a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
+++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
@@ -29,14 +29,6 @@ Below is the list of affected Intel processors [#f1]_:
RAPTORLAKE_S 06_BFH
=================== ============
-As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and
-RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as
-vulnerable in Linux because they share the same family/model with an affected
-part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or
-CPUID.HYBRID. This information could be used to distinguish between the
-affected and unaffected parts, but it is deemed not worth adding complexity as
-the reporting is fixed automatically when these parts enumerate RFDS_NO.
-
Mitigation
==========
Intel released a microcode update that enables software to clear sensitive
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index 5376890adbeb..1f7f14c6e184 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -180,10 +180,6 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
1) On i386, enable high memory support under "Processor type and
features"::
- CONFIG_HIGHMEM64G=y
-
- or::
-
CONFIG_HIGHMEM4G
2) With CONFIG_SMP=y, usually nr_cpus=1 need specified on the kernel
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 60f98de36f82..866427da6add 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -416,10 +416,6 @@
Format: { quiet (default) | verbose | debug }
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
- For X86-32, this can also be used to specify an APIC
- driver name.
- Format: apic=driver_name
- Examples: apic=bigsmp
apic_extnmi= [APIC,X86,EARLY] External NMI delivery setting
Format: { bsp (default) | all | none }
@@ -7679,13 +7675,6 @@
16 - SIGBUS faults
Example: user_debug=31
- userpte=
- [X86,EARLY] Flags controlling user PTE allocations.
-
- nohigh = do not allocate PTE pages in
- HIGHMEM regardless of setting
- of CONFIG_HIGHPTE.
-
vdso= [X86,SH,SPARC]
On X86_32, this is an alias for vdso32=. Otherwise:
diff --git a/Documentation/arch/x86/usb-legacy-support.rst b/Documentation/arch/x86/usb-legacy-support.rst
index e01c08b7c981..b17bf122270a 100644
--- a/Documentation/arch/x86/usb-legacy-support.rst
+++ b/Documentation/arch/x86/usb-legacy-support.rst
@@ -20,11 +20,7 @@ It has several drawbacks, though:
features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may
not be available.
-2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause
- system crashes, because the SMM BIOS is not expecting to be in PAE mode.
- The Intel E7505 is a typical machine where this happens.
-
-3) If AMD64 64-bit mode is enabled, again system crashes often happen,
+2) If AMD64 64-bit mode is enabled, again system crashes often happen,
because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The
BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit
yet.
@@ -38,11 +34,6 @@ Problem 1)
compiled-in, too.
Problem 2)
- can currently only be solved by either disabling HIGHMEM64G
- in the kernel config or USB Legacy support in the BIOS. A BIOS update
- could help, but so far no such update exists.
-
-Problem 3)
is usually fixed by a BIOS update. Check the board
manufacturers web site. If an update is not available, disable USB
Legacy support in the BIOS. If this alone doesn't help, try also adding
diff --git a/Makefile b/Makefile
index 8b6764d44a61..d138b17b8840 100644
--- a/Makefile
+++ b/Makefile
@@ -1014,6 +1014,9 @@ CC_FLAGS_CFI := -fsanitize=kcfi
ifdef CONFIG_CFI_ICALL_NORMALIZE_INTEGERS
CC_FLAGS_CFI += -fsanitize-cfi-icall-experimental-normalize-integers
endif
+ifdef CONFIG_FINEIBT_BHI
+ CC_FLAGS_CFI += -fsanitize-kcfi-arity
+endif
ifdef CONFIG_RUST
# Always pass -Zsanitizer-cfi-normalize-integers as CONFIG_RUST selects
# CONFIG_CFI_ICALL_NORMALIZE_INTEGERS.
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1815748f5d2a..bae5edf348ef 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -381,7 +381,7 @@ void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
void iounmap(volatile void __iomem *io_addr);
#define iounmap iounmap
-void *arch_memremap_wb(phys_addr_t phys_addr, size_t size);
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
#define arch_memremap_wb arch_memremap_wb
/*
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 89f1c97f3079..748698e91a4b 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -436,7 +436,7 @@ void __arm_iomem_set_ro(void __iomem *ptr, size_t size)
set_memory_ro((unsigned long)ptr, PAGE_ALIGN(size) / PAGE_SIZE);
}
-void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
{
return (__force void *)arch_ioremap_caller(phys_addr, size,
MT_MEMORY_RW,
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 1a8f6914ee59..d638cc87807e 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -248,7 +248,7 @@ void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size)
EXPORT_SYMBOL_GPL(pci_remap_cfgspace);
#endif
-void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
{
return (void *)phys_addr;
}
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 1c5c641075d2..0257f4aa7ff4 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -136,7 +136,7 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw())
#include <asm-generic/io.h>
#ifdef CONFIG_MMU
-#define arch_memremap_wb(addr, size) \
+#define arch_memremap_wb(addr, size, flags) \
((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL))
#endif
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 79ea97d4797e..8be91974e786 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -440,25 +440,24 @@ void __init arch_cpu_finalize_init(void)
os_check_bugs();
}
-void apply_seal_endbr(s32 *start, s32 *end, struct module *mod)
+void apply_seal_endbr(s32 *start, s32 *end)
{
}
-void apply_retpolines(s32 *start, s32 *end, struct module *mod)
+void apply_retpolines(s32 *start, s32 *end)
{
}
-void apply_returns(s32 *start, s32 *end, struct module *mod)
+void apply_returns(s32 *start, s32 *end)
{
}
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, struct module *mod)
+ s32 *start_cfi, s32 *end_cfi)
{
}
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
- struct module *mod)
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0e27ebd7e36a..98bd4935280c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -85,6 +85,7 @@ config X86
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_EXECMEM_ROX if X86_64
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -132,7 +133,7 @@ config X86
select ARCH_SUPPORTS_AUTOFDO_CLANG
select ARCH_SUPPORTS_PROPELLER_CLANG if X86_64
select ARCH_USE_BUILTIN_BSWAP
- select ARCH_USE_CMPXCHG_LOCKREF if X86_CMPXCHG64
+ select ARCH_USE_CMPXCHG_LOCKREF if X86_CX8
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
@@ -232,7 +233,7 @@ config X86
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select HAVE_EISA
+ select HAVE_EISA if X86_32
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
@@ -277,7 +278,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
@@ -285,7 +286,7 @@ config X86
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_SETUP_PER_CPU_AREA
select HAVE_SOFTIRQ_ON_OWN_STACK
- select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
+ select HAVE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
select HAVE_STATIC_CALL
select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL
@@ -426,15 +427,6 @@ config PGTABLE_LEVELS
default 3 if X86_PAE
default 2
-config CC_HAS_SANE_STACKPROTECTOR
- bool
- default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT
- default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC) $(CLANG_FLAGS))
- help
- We have to make sure stack protector is unconditionally disabled if
- the compiler produces broken code or if it does not let us control
- the segment on 32-bit kernels.
-
menu "Processor type and features"
config SMP
@@ -530,12 +522,6 @@ config X86_FRED
ring transitions and exception/interrupt handling if the
system supports it.
-config X86_BIGSMP
- bool "Support for big SMP systems with more than 8 CPUs"
- depends on SMP && X86_32
- help
- This option is needed for the systems that have more than 8 CPUs.
-
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
default y
@@ -553,13 +539,12 @@ config X86_EXTENDED_PLATFORM
AMD Elan
RDC R-321x SoC
SGI 320/540 (Visual Workstation)
- STA2X11-based (e.g. Northville)
- Moorestown MID devices
64-bit platforms (CONFIG_64BIT=y):
Numascale NumaChip
ScaleMP vSMP
SGI Ultraviolet
+ Merrifield/Moorefield MID devices
If you have one of these systems, or if you want to build a
generic distribution kernel, say Y here - otherwise say N.
@@ -604,8 +589,31 @@ config X86_UV
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
-# Following is an alphabetically sorted list of 32 bit extended platforms
-# Please maintain the alphabetic order if and when there are additions
+config X86_INTEL_MID
+ bool "Intel Z34xx/Z35xx MID platform support"
+ depends on X86_EXTENDED_PLATFORM
+ depends on X86_PLATFORM_DEVICES
+ depends on PCI
+ depends on X86_64 || (EXPERT && PCI_GOANY)
+ depends on X86_IO_APIC
+ select I2C
+ select DW_APB_TIMER
+ select INTEL_SCU_PCI
+ help
+ Select to build a kernel capable of supporting 64-bit Intel MID
+ (Mobile Internet Device) platform systems which do not have
+ the PCI legacy interfaces.
+
+ The only supported devices are the 22nm Merrified (Z34xx)
+ and Moorefield (Z35xx) SoC used in the Intel Edison board and
+ a small number of Android devices such as the Asus Zenfone 2,
+ Asus FonePad 8 and Dell Venue 7.
+
+ If you are building for a PC class system or non-MID tablet
+ SoCs like Bay Trail (Z36xx/Z37xx), say N here.
+
+ Intel MID platforms are based on an Intel processor and chipset which
+ consume less power than most of the x86 derivatives.
config X86_GOLDFISH
bool "Goldfish (Virtual Platform)"
@@ -615,6 +623,9 @@ config X86_GOLDFISH
for Android development. Unless you are building for the Android
Goldfish emulator say N here.
+# Following is an alphabetically sorted list of 32 bit extended platforms
+# Please maintain the alphabetic order if and when there are additions
+
config X86_INTEL_CE
bool "CE4100 TV platform"
depends on PCI
@@ -630,24 +641,6 @@ config X86_INTEL_CE
This option compiles in support for the CE4100 SOC for settop
boxes and media devices.
-config X86_INTEL_MID
- bool "Intel MID platform support"
- depends on X86_EXTENDED_PLATFORM
- depends on X86_PLATFORM_DEVICES
- depends on PCI
- depends on X86_64 || (PCI_GOANY && X86_32)
- depends on X86_IO_APIC
- select I2C
- select DW_APB_TIMER
- select INTEL_SCU_PCI
- help
- Select to build a kernel capable of supporting Intel MID (Mobile
- Internet Device) platform systems which do not have the PCI legacy
- interfaces. If you are building for a PC class system say N here.
-
- Intel MID platforms are based on an Intel processor and chipset which
- consume less power than most of the x86 derivatives.
-
config X86_INTEL_QUARK
bool "Intel Quark platform support"
depends on X86_32
@@ -729,18 +722,6 @@ config X86_RDC321X
as R-8610-(G).
If you don't have one of these chips, you should say N here.
-config X86_32_NON_STANDARD
- bool "Support non-standard 32-bit SMP architectures"
- depends on X86_32 && SMP
- depends on X86_EXTENDED_PLATFORM
- help
- This option compiles in the bigsmp and STA2X11 default
- subarchitectures. It is intended for a generic binary
- kernel. If you select them all, kernel will probe it one by
- one and will fallback to default.
-
-# Alphabetically sorted list of Non standard 32 bit platforms
-
config X86_SUPPORTS_MEMORY_FAILURE
def_bool y
# MCE code calls memory_failure():
@@ -750,19 +731,6 @@ config X86_SUPPORTS_MEMORY_FAILURE
depends on X86_64 || !SPARSEMEM
select ARCH_SUPPORTS_MEMORY_FAILURE
-config STA2X11
- bool "STA2X11 Companion Chip Support"
- depends on X86_32_NON_STANDARD && PCI
- select SWIOTLB
- select MFD_STA2X11
- select GPIOLIB
- help
- This adds support for boards based on the STA2X11 IO-Hub,
- a.k.a. "ConneXt". The chip is used in place of the standard
- PC chipset, so all "standard" peripherals are missing. If this
- option is selected the kernel will still be able to boot on
- standard PC machines.
-
config X86_32_IRIS
tristate "Eurobraille/Iris poweroff module"
depends on X86_32
@@ -1012,8 +980,7 @@ config NR_CPUS_RANGE_BEGIN
config NR_CPUS_RANGE_END
int
depends on X86_32
- default 64 if SMP && X86_BIGSMP
- default 8 if SMP && !X86_BIGSMP
+ default 8 if SMP
default 1 if !SMP
config NR_CPUS_RANGE_END
@@ -1026,7 +993,6 @@ config NR_CPUS_RANGE_END
config NR_CPUS_DEFAULT
int
depends on X86_32
- default 32 if X86_BIGSMP
default 8 if SMP
default 1 if !SMP
@@ -1102,7 +1068,7 @@ config UP_LATE_INIT
config X86_UP_APIC
bool "Local APIC support on uniprocessors" if !PCI_MSI
default PCI_MSI
- depends on X86_32 && !SMP && !X86_32_NON_STANDARD
+ depends on X86_32 && !SMP
help
A local APIC (Advanced Programmable Interrupt Controller) is an
integrated interrupt controller in the CPU. If you have a single-CPU
@@ -1127,7 +1093,7 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
- depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
+ depends on X86_64 || SMP || X86_UP_APIC || PCI_MSI
select IRQ_DOMAIN_HIERARCHY
config ACPI_MADT_WAKEUP
@@ -1396,15 +1362,11 @@ config X86_CPUID
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid.
-choice
- prompt "High Memory Support"
- default HIGHMEM4G
+config HIGHMEM4G
+ bool "High Memory Support"
depends on X86_32
-
-config NOHIGHMEM
- bool "off"
help
- Linux can use up to 64 Gigabytes of physical memory on x86 systems.
+ Linux can use up to 4 Gigabytes of physical memory on x86 systems.
However, the address space of 32-bit x86 processors is only 4
Gigabytes large. That means that, if you have a large amount of
physical memory, not all of it can be "permanently mapped" by the
@@ -1420,38 +1382,9 @@ config NOHIGHMEM
possible.
If the machine has between 1 and 4 Gigabytes physical RAM, then
- answer "4GB" here.
-
- If more than 4 Gigabytes is used then answer "64GB" here. This
- selection turns Intel PAE (Physical Address Extension) mode on.
- PAE implements 3-level paging on IA32 processors. PAE is fully
- supported by Linux, PAE mode is implemented on all recent Intel
- processors (Pentium Pro and better). NOTE: If you say "64GB" here,
- then the kernel will not boot on CPUs that don't support PAE!
-
- The actual amount of total physical memory will either be
- auto detected or can be forced by using a kernel command line option
- such as "mem=256M". (Try "man bootparam" or see the documentation of
- your boot loader (lilo or loadlin) about how to pass options to the
- kernel at boot time.)
-
- If unsure, say "off".
-
-config HIGHMEM4G
- bool "4GB"
- help
- Select this if you have a 32-bit processor and between 1 and 4
- gigabytes of physical RAM.
-
-config HIGHMEM64G
- bool "64GB"
- depends on X86_HAVE_PAE
- select X86_PAE
- help
- Select this if you have a 32-bit processor and more than 4
- gigabytes of physical RAM.
+ answer "Y" here.
-endchoice
+ If unsure, say N.
choice
prompt "Memory split" if EXPERT
@@ -1497,14 +1430,12 @@ config PAGE_OFFSET
depends on X86_32
config HIGHMEM
- def_bool y
- depends on X86_32 && (HIGHMEM64G || HIGHMEM4G)
+ def_bool HIGHMEM4G
config X86_PAE
bool "PAE (Physical Address Extension) Support"
depends on X86_32 && X86_HAVE_PAE
select PHYS_ADDR_T_64BIT
- select SWIOTLB
help
PAE is required for NX support, and furthermore enables
larger swapspace support for non-overcommit purposes. It
@@ -1574,8 +1505,7 @@ config AMD_MEM_ENCRYPT
config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
depends on SMP
- depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
- default y if X86_BIGSMP
+ depends on X86_64
select USE_PERCPU_NUMA_NODE_ID
select OF_NUMA if OF
help
@@ -1588,9 +1518,6 @@ config NUMA
For 64-bit this is recommended if the system is Intel Core i7
(or later), AMD Opteron, or EM64T NUMA.
- For 32-bit this is only needed if you boot a 32-bit
- kernel on a 64-bit NUMA platform.
-
Otherwise, you should say N.
config AMD_NUMA
@@ -1629,7 +1556,7 @@ config ARCH_FLATMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
+ depends on X86_64 || NUMA || X86_32
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
@@ -1675,15 +1602,6 @@ config X86_PMEM_LEGACY
Say Y if unsure.
-config HIGHPTE
- bool "Allocate 3rd-level pagetables from highmem"
- depends on HIGHMEM
- help
- The VM uses one page table entry for each page of physical memory.
- For systems with a lot of RAM, this can be wasteful of precious
- low memory. Setting this option will put user-space page table
- entries in high memory.
-
config X86_CHECK_BIOS_CORRUPTION
bool "Check for low memory corruption"
help
@@ -2451,18 +2369,20 @@ config CC_HAS_NAMED_AS
def_bool $(success,echo 'int __seg_fs fs; int __seg_gs gs;' | $(CC) -x c - -S -o /dev/null)
depends on CC_IS_GCC
+#
+# -fsanitize=kernel-address (KASAN) and -fsanitize=thread (KCSAN)
+# are incompatible with named address spaces with GCC < 13.3
+# (see GCC PR sanitizer/111736 and also PR sanitizer/115172).
+#
+
config CC_HAS_NAMED_AS_FIXED_SANITIZERS
- def_bool CC_IS_GCC && GCC_VERSION >= 130300
+ def_bool y
+ depends on !(KASAN || KCSAN) || GCC_VERSION >= 130300
+ depends on !(UBSAN_BOOL && KASAN) || GCC_VERSION >= 140200
config USE_X86_SEG_SUPPORT
- def_bool y
- depends on CC_HAS_NAMED_AS
- #
- # -fsanitize=kernel-address (KASAN) and -fsanitize=thread
- # (KCSAN) are incompatible with named address spaces with
- # GCC < 13.3 - see GCC PR sanitizer/111736.
- #
- depends on !(KASAN || KCSAN) || CC_HAS_NAMED_AS_FIXED_SANITIZERS
+ def_bool CC_HAS_NAMED_AS
+ depends on CC_HAS_NAMED_AS_FIXED_SANITIZERS
config CC_HAS_SLS
def_bool $(cc-option,-mharden-sls=all)
@@ -2473,6 +2393,10 @@ config CC_HAS_RETURN_THUNK
config CC_HAS_ENTRY_PADDING
def_bool $(cc-option,-fpatchable-function-entry=16,16)
+config CC_HAS_KCFI_ARITY
+ def_bool $(cc-option,-fsanitize=kcfi -fsanitize-kcfi-arity)
+ depends on CC_IS_CLANG && !RUST
+
config FUNCTION_PADDING_CFI
int
default 59 if FUNCTION_ALIGNMENT_64B
@@ -2498,6 +2422,10 @@ config FINEIBT
depends on X86_KERNEL_IBT && CFI_CLANG && MITIGATION_RETPOLINE
select CALL_PADDING
+config FINEIBT_BHI
+ def_bool y
+ depends on FINEIBT && CC_HAS_KCFI_ARITY
+
config HAVE_CALL_THUNKS
def_bool y
depends on CC_HAS_ENTRY_PADDING && MITIGATION_RETHUNK && OBJTOOL
@@ -3202,4 +3130,6 @@ config HAVE_ATOMIC_IOMAP
source "arch/x86/kvm/Kconfig"
+source "arch/x86/Kconfig.cpufeatures"
+
source "arch/x86/Kconfig.assembler"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2a7279d80460..753b8763abae 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# Put here option for CPU selection and depending optimization
choice
- prompt "Processor family"
- default M686 if X86_32
- default GENERIC_CPU if X86_64
+ prompt "x86-32 Processor family"
+ depends on X86_32
+ default M686
help
This is the processor type of your CPU. This information is
used for optimizing purposes. In order to compile a kernel
@@ -31,7 +31,6 @@ choice
- "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
- "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
- "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
- - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs.
- "Crusoe" for the Transmeta Crusoe series.
- "Efficeon" for the Transmeta Efficeon series.
- "Winchip-C6" for original IDT Winchip.
@@ -42,13 +41,10 @@ choice
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
- "VIA C7" for VIA C7.
- - "Intel P4" for the Pentium 4/Netburst microarchitecture.
- - "Core 2/newer Xeon" for all core2 and newer Intel CPUs.
- "Intel Atom" for the Atom-microarchitecture CPUs.
- - "Generic-x86-64" for a kernel which runs on any x86-64 CPU.
See each option's help text for additional details. If you don't know
- what to do, choose "486".
+ what to do, choose "Pentium-Pro".
config M486SX
bool "486SX"
@@ -114,11 +110,11 @@ config MPENTIUMIII
extensions.
config MPENTIUMM
- bool "Pentium M"
+ bool "Pentium M/Pentium Dual Core/Core Solo/Core Duo"
depends on X86_32
help
Select this for Intel Pentium M (not Pentium-4 M)
- notebook chips.
+ "Merom" Core Solo/Duo notebook chips
config MPENTIUM4
bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
@@ -139,22 +135,10 @@ config MPENTIUM4
-Mobile Pentium 4
-Mobile Pentium 4 M
-Extreme Edition (Gallatin)
- -Prescott
- -Prescott 2M
- -Cedar Mill
- -Presler
- -Smithfiled
Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename:
-Foster
-Prestonia
-Gallatin
- -Nocona
- -Irwindale
- -Cranford
- -Potomac
- -Paxville
- -Dempsey
-
config MK6
bool "K6/K6-II/K6-III"
@@ -172,13 +156,6 @@ config MK7
some extended instructions, and passes appropriate optimization
flags to GCC.
-config MK8
- bool "Opteron/Athlon64/Hammer/K8"
- help
- Select this for an AMD Opteron or Athlon64 Hammer-family processor.
- Enables use of some extended instructions, and passes appropriate
- optimization flags to GCC.
-
config MCRUSOE
bool "Crusoe"
depends on X86_32
@@ -258,42 +235,14 @@ config MVIAC7
Select this for a VIA C7. Selecting this uses the correct cache
shift and tells gcc to treat the CPU as a 686.
-config MPSC
- bool "Intel P4 / older Netburst based Xeon"
- depends on X86_64
- help
- Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
- Xeon CPUs with Intel 64bit which is compatible with x86-64.
- Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
- Netburst core and shouldn't use this option. You can distinguish them
- using the cpu family field
- in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
-
-config MCORE2
- bool "Core 2/newer Xeon"
- help
-
- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
- 53xx) CPUs. You can distinguish newer from older Xeons by the CPU
- family in /proc/cpuinfo. Newer ones have 6 and older ones 15
- (not a typo)
-
config MATOM
bool "Intel Atom"
help
-
Select this for the Intel Atom platform. Intel Atom CPUs have an
in-order pipelining architecture and thus can benefit from
accordingly optimized code. Use a recent GCC with specific Atom
support in order to fully benefit from selecting this option.
-config GENERIC_CPU
- bool "Generic-x86-64"
- depends on X86_64
- help
- Generic x86-64 CPU.
- Run equally well on all x86-64 CPUs.
-
endchoice
config X86_GENERIC
@@ -317,8 +266,8 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
- default "7" if MPENTIUM4 || MPSC
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+ default "7" if MPENTIUM4
+ default "6" if MK7 || MPENTIUMM || MATOM || MVIAC7 || X86_GENERIC || X86_64
default "4" if MELAN || M486SX || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
@@ -336,51 +285,35 @@ config X86_ALIGNMENT_16
config X86_INTEL_USERCOPY
def_bool y
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK7 || MEFFICEON
config X86_USE_PPRO_CHECKSUM
def_bool y
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
-
-#
-# P6_NOPs are a relatively minor optimization that require a family >=
-# 6 processor, except that it is broken on certain VIA chips.
-# Furthermore, AMD chips prefer a totally different sequence of NOPs
-# (which work on all CPUs). In addition, it looks like Virtual PC
-# does not understand them.
-#
-# As a result, disallow these if we're not compiling for X86_64 (these
-# NOPs do work on all x86-64 capable chips); the list of processors in
-# the right-hand clause are the cores that benefit from this optimization.
-#
-config X86_P6_NOP
- def_bool y
- depends on X86_64
- depends on (MCORE2 || MPENTIUM4 || MPSC)
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MATOM
config X86_TSC
def_bool y
- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MATOM) || X86_64
config X86_HAVE_PAE
def_bool y
- depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
+ depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC7 || MATOM || X86_64
-config X86_CMPXCHG64
+config X86_CX8
def_bool y
- depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7
+ depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 || MGEODEGX1 || MGEODE_LX
# this should be set for all -march=.. options where the compiler
# generates cmov.
config X86_CMOV
def_bool y
- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
+ depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || MATOM || MGEODE_LX || X86_64)
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
- default "5" if X86_32 && X86_CMPXCHG64
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MK7)
+ default "5" if X86_32 && X86_CX8
default "4"
config X86_DEBUGCTLMSR
@@ -401,6 +334,10 @@ menuconfig PROCESSOR_SELECT
This lets you choose what x86 vendor support code your kernel
will include.
+config BROADCAST_TLB_FLUSH
+ def_bool y
+ depends on CPU_SUP_AMD && 64BIT
+
config CPU_SUP_INTEL
default y
bool "Support Intel processors" if PROCESSOR_SELECT
diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures
new file mode 100644
index 000000000000..e12d5b7e39a2
--- /dev/null
+++ b/arch/x86/Kconfig.cpufeatures
@@ -0,0 +1,201 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# x86 feature bits (see arch/x86/include/asm/cpufeatures.h) that are
+# either REQUIRED to be enabled, or DISABLED (always ignored) for this
+# particular compile-time configuration. The tests for these features
+# are turned into compile-time constants via the generated
+# <asm/cpufeaturemasks.h>.
+#
+# The naming of these variables *must* match asm/cpufeatures.h, e.g.,
+# X86_FEATURE_ALWAYS <==> X86_REQUIRED_FEATURE_ALWAYS
+# X86_FEATURE_FRED <==> X86_DISABLED_FEATURE_FRED
+#
+# And these REQUIRED and DISABLED config options are manipulated in an
+# AWK script as the following example:
+#
+# +----------------------+
+# | X86_FRED = y ? |
+# +----------------------+
+# / \
+# Y / \ N
+# +-------------------------------------+ +-------------------------------+
+# | X86_DISABLED_FEATURE_FRED undefined | | X86_DISABLED_FEATURE_FRED = y |
+# +-------------------------------------+ +-------------------------------+
+# |
+# |
+# +-------------------------------------------+ |
+# | X86_FEATURE_FRED: feature word 12, bit 17 | ---->|
+# +-------------------------------------------+ |
+# |
+# |
+# +-------------------------------+
+# | set bit 17 of DISABLED_MASK12 |
+# +-------------------------------+
+#
+
+config X86_REQUIRED_FEATURE_ALWAYS
+ def_bool y
+
+config X86_REQUIRED_FEATURE_NOPL
+ def_bool y
+ depends on X86_64 || X86_P6_NOP
+
+config X86_REQUIRED_FEATURE_CX8
+ def_bool y
+ depends on X86_CX8
+
+# this should be set for all -march=.. options where the compiler
+# generates cmov.
+config X86_REQUIRED_FEATURE_CMOV
+ def_bool y
+ depends on X86_CMOV
+
+# this should be set for all -march= options where the compiler
+# generates movbe.
+config X86_REQUIRED_FEATURE_MOVBE
+ def_bool y
+ depends on MATOM
+
+config X86_REQUIRED_FEATURE_CPUID
+ def_bool y
+ depends on X86_64
+
+config X86_REQUIRED_FEATURE_UP
+ def_bool y
+ depends on !SMP
+
+config X86_REQUIRED_FEATURE_FPU
+ def_bool y
+ depends on !MATH_EMULATION
+
+config X86_REQUIRED_FEATURE_PAE
+ def_bool y
+ depends on X86_64 || X86_PAE
+
+config X86_REQUIRED_FEATURE_PSE
+ def_bool y
+ depends on X86_64 && !PARAVIRT_XXL
+
+config X86_REQUIRED_FEATURE_PGE
+ def_bool y
+ depends on X86_64 && !PARAVIRT_XXL
+
+config X86_REQUIRED_FEATURE_MSR
+ def_bool y
+ depends on X86_64
+
+config X86_REQUIRED_FEATURE_FXSR
+ def_bool y
+ depends on X86_64
+
+config X86_REQUIRED_FEATURE_XMM
+ def_bool y
+ depends on X86_64
+
+config X86_REQUIRED_FEATURE_XMM2
+ def_bool y
+ depends on X86_64
+
+config X86_REQUIRED_FEATURE_LM
+ def_bool y
+ depends on X86_64
+
+config X86_DISABLED_FEATURE_UMIP
+ def_bool y
+ depends on !X86_UMIP
+
+config X86_DISABLED_FEATURE_VME
+ def_bool y
+ depends on X86_64
+
+config X86_DISABLED_FEATURE_K6_MTRR
+ def_bool y
+ depends on X86_64
+
+config X86_DISABLED_FEATURE_CYRIX_ARR
+ def_bool y
+ depends on X86_64
+
+config X86_DISABLED_FEATURE_CENTAUR_MCR
+ def_bool y
+ depends on X86_64
+
+config X86_DISABLED_FEATURE_PCID
+ def_bool y
+ depends on !X86_64
+
+config X86_DISABLED_FEATURE_PKU
+ def_bool y
+ depends on !X86_INTEL_MEMORY_PROTECTION_KEYS
+
+config X86_DISABLED_FEATURE_OSPKE
+ def_bool y
+ depends on !X86_INTEL_MEMORY_PROTECTION_KEYS
+
+config X86_DISABLED_FEATURE_LA57
+ def_bool y
+ depends on !X86_5LEVEL
+
+config X86_DISABLED_FEATURE_PTI
+ def_bool y
+ depends on !MITIGATION_PAGE_TABLE_ISOLATION
+
+config X86_DISABLED_FEATURE_RETPOLINE
+ def_bool y
+ depends on !MITIGATION_RETPOLINE
+
+config X86_DISABLED_FEATURE_RETPOLINE_LFENCE
+ def_bool y
+ depends on !MITIGATION_RETPOLINE
+
+config X86_DISABLED_FEATURE_RETHUNK
+ def_bool y
+ depends on !MITIGATION_RETHUNK
+
+config X86_DISABLED_FEATURE_UNRET
+ def_bool y
+ depends on !MITIGATION_UNRET_ENTRY
+
+config X86_DISABLED_FEATURE_CALL_DEPTH
+ def_bool y
+ depends on !MITIGATION_CALL_DEPTH_TRACKING
+
+config X86_DISABLED_FEATURE_LAM
+ def_bool y
+ depends on !ADDRESS_MASKING
+
+config X86_DISABLED_FEATURE_ENQCMD
+ def_bool y
+ depends on !INTEL_IOMMU_SVM
+
+config X86_DISABLED_FEATURE_SGX
+ def_bool y
+ depends on !X86_SGX
+
+config X86_DISABLED_FEATURE_XENPV
+ def_bool y
+ depends on !XEN_PV
+
+config X86_DISABLED_FEATURE_TDX_GUEST
+ def_bool y
+ depends on !INTEL_TDX_GUEST
+
+config X86_DISABLED_FEATURE_USER_SHSTK
+ def_bool y
+ depends on !X86_USER_SHADOW_STACK
+
+config X86_DISABLED_FEATURE_IBT
+ def_bool y
+ depends on !X86_KERNEL_IBT
+
+config X86_DISABLED_FEATURE_FRED
+ def_bool y
+ depends on !X86_FRED
+
+config X86_DISABLED_FEATURE_SEV_SNP
+ def_bool y
+ depends on !KVM_AMD_SEV
+
+config X86_DISABLED_FEATURE_INVLPGB
+ def_bool y
+ depends on !BROADCAST_TLB_FLUSH
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3ba7e185924e..0fc7e8fd1a2e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -142,14 +142,7 @@ ifeq ($(CONFIG_X86_32),y)
KBUILD_CFLAGS += -ffreestanding
endif
- ifeq ($(CONFIG_STACKPROTECTOR),y)
- ifeq ($(CONFIG_SMP),y)
- KBUILD_CFLAGS += -mstack-protector-guard-reg=fs \
- -mstack-protector-guard-symbol=__ref_stack_chk_guard
- else
- KBUILD_CFLAGS += -mstack-protector-guard=global
- endif
- endif
+ percpu_seg := fs
else
BITS := 64
UTS_MACHINE := x86_64
@@ -180,25 +173,24 @@ else
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
- # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
- cflags-$(CONFIG_MK8) += -march=k8
- cflags-$(CONFIG_MPSC) += -march=nocona
- cflags-$(CONFIG_MCORE2) += -march=core2
- cflags-$(CONFIG_MATOM) += -march=atom
- cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
- KBUILD_CFLAGS += $(cflags-y)
-
- rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8
- rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona
- rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2
- rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom
- rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
- KBUILD_RUSTFLAGS += $(rustflags-y)
+ KBUILD_CFLAGS += -march=x86-64 -mtune=generic
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic
KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel
KBUILD_RUSTFLAGS += -Cno-redzone=y
KBUILD_RUSTFLAGS += -Ccode-model=kernel
+
+ percpu_seg := gs
+endif
+
+ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifeq ($(CONFIG_SMP),y)
+ KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg)
+ KBUILD_CFLAGS += -mstack-protector-guard-symbol=__ref_stack_chk_guard
+ else
+ KBUILD_CFLAGS += -mstack-protector-guard=global
+ endif
endif
#
@@ -279,6 +271,21 @@ archheaders:
$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
###
+# <asm/cpufeaturemasks.h> header generation
+
+cpufeaturemasks.hdr := arch/x86/include/generated/asm/cpufeaturemasks.h
+cpufeaturemasks.awk := $(srctree)/arch/x86/tools/cpufeaturemasks.awk
+cpufeatures_hdr := $(srctree)/arch/x86/include/asm/cpufeatures.h
+targets += $(cpufeaturemasks.hdr)
+quiet_cmd_gen_featuremasks = GEN $@
+ cmd_gen_featuremasks = $(AWK) -f $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) > $@
+
+$(cpufeaturemasks.hdr): $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) FORCE
+ $(shell mkdir -p $(dir $@))
+ $(call if_changed,gen_featuremasks)
+archprepare: $(cpufeaturemasks.hdr)
+
+###
# Kernel objects
libs-y += arch/x86/lib/
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 94834c4b5e5e..af7de9a42752 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -24,7 +24,6 @@ cflags-$(CONFIG_MK6) += -march=k6
# Please note, that patches that add -march=athlon-xp and friends are pointless.
# They make zero difference whatsosever to performance at this time.
cflags-$(CONFIG_MK7) += -march=athlon
-cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
@@ -32,9 +31,7 @@ cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
cflags-$(CONFIG_MVIAC7) += -march=i686
-cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
-cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+cflags-$(CONFIG_MATOM) += -march=atom
# AMD Elan support
cflags-$(CONFIG_MELAN) += -march=i486
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 0f24f7ebec9b..38f17a1e1e36 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -16,7 +16,7 @@
#define STACK_SIZE 1024 /* Minimum number of bytes for stack */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stdarg.h>
#include <linux/types.h>
@@ -327,6 +327,6 @@ void probe_cards(int unsafe);
/* video-vesa.c */
void vesa_store_edid(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 0d37420cad02..1cdcd4aaf395 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -235,7 +235,7 @@ static void handle_relocations(void *output, unsigned long output_len,
/*
* Process relocations: 32 bit relocations first then 64 bit after.
- * Three sets of binary relocations are added to the end of the kernel
+ * Two sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel
* address of the location which needs to be updated stored as a
* 32-bit value which is sign extended to 64 bits.
@@ -245,8 +245,6 @@ static void handle_relocations(void *output, unsigned long output_len,
* kernel bits...
* 0 - zero terminator for 64 bit relocations
* 64 bit relocation repeated
- * 0 - zero terminator for inverse 32 bit relocations
- * 32 bit inverse relocation repeated
* 0 - zero terminator for 32 bit relocations
* 32 bit relocation repeated
*
@@ -263,16 +261,6 @@ static void handle_relocations(void *output, unsigned long output_len,
*(uint32_t *)ptr += delta;
}
#ifdef CONFIG_X86_64
- while (*--reloc) {
- long extended = *reloc;
- extended += map;
-
- ptr = (unsigned long)extended;
- if (ptr < min_addr || ptr > max_addr)
- error("inverse 32-bit relocation outside of kernel!\n");
-
- *(int32_t *)ptr -= delta;
- }
for (reloc--; *reloc; reloc--) {
long extended = *reloc;
extended += map;
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 0aae4d4ed615..f82de8de5dc6 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -22,10 +22,11 @@
# include "boot.h"
#endif
#include <linux/types.h>
+#include <asm/cpufeaturemasks.h>
#include <asm/intel-family.h>
#include <asm/processor-flags.h>
-#include <asm/required-features.h>
#include <asm/msr-index.h>
+
#include "string.h"
#include "msr.h"
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index d75237ba7ce9..0cabdacb2a2f 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -3,7 +3,6 @@
#include "bitops.h"
#include <asm/processor-flags.h>
-#include <asm/required-features.h>
#include <asm/msr-index.h>
#include "cpuflags.h"
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index da0ccc5de538..22d730b227e3 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -12,8 +12,6 @@
#include <stdio.h>
-#include "../include/asm/required-features.h"
-#include "../include/asm/disabled-features.h"
#include "../include/asm/cpufeatures.h"
#include "../include/asm/vmxfeatures.h"
#include "../kernel/cpu/capflags.c"
@@ -23,6 +21,7 @@ int main(void)
int i, j;
const char *str;
+ printf("#include <asm/cpufeaturemasks.h>\n\n");
printf("static const char x86_cap_strs[] =\n");
for (i = 0; i < NCAPINTS; i++) {
diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config
index 581296255b39..d5d091e03bd3 100644
--- a/arch/x86/configs/xen.config
+++ b/arch/x86/configs/xen.config
@@ -1,6 +1,4 @@
# global x86 required specific stuff
-# On 32-bit HIGHMEM4G is not allowed
-CONFIG_HIGHMEM64G=y
CONFIG_64BIT=y
# These enable us to allow some of the
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index eb153eff9331..b37881bb9f15 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/frame.h>
#define STATE1 %xmm0
@@ -1071,6 +1072,7 @@ SYM_FUNC_END(_aesni_inc)
* size_t len, u8 *iv)
*/
SYM_FUNC_START(aesni_ctr_enc)
+ ANNOTATE_NOENDBR
FRAME_BEGIN
cmp $16, LEN
jb .Lctr_enc_just_ret
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 646477a13e11..1dfef28c1266 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -16,6 +16,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/frame.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -882,7 +883,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
jmp .Ldec_max24;
SYM_FUNC_END(__camellia_dec_blk16)
-SYM_FUNC_START(camellia_ecb_enc_16way)
+SYM_TYPED_FUNC_START(camellia_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -907,7 +908,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way)
RET;
SYM_FUNC_END(camellia_ecb_enc_16way)
-SYM_FUNC_START(camellia_ecb_dec_16way)
+SYM_TYPED_FUNC_START(camellia_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -937,7 +938,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way)
RET;
SYM_FUNC_END(camellia_ecb_dec_16way)
-SYM_FUNC_START(camellia_cbc_dec_16way)
+SYM_TYPED_FUNC_START(camellia_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index a0eb94e53b1b..b1c9b9450555 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/frame.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 816b6bb8bded..824cb94de6c2 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
.file "camellia-x86_64-asm_64.S"
.text
@@ -177,7 +178,7 @@
bswapq RAB0; \
movq RAB0, 4*2(RIO);
-SYM_FUNC_START(__camellia_enc_blk)
+SYM_TYPED_FUNC_START(__camellia_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -224,7 +225,7 @@ SYM_FUNC_START(__camellia_enc_blk)
RET;
SYM_FUNC_END(__camellia_enc_blk)
-SYM_FUNC_START(camellia_dec_blk)
+SYM_TYPED_FUNC_START(camellia_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -411,7 +412,7 @@ SYM_FUNC_END(camellia_dec_blk)
bswapq RAB1; \
movq RAB1, 12*2(RIO);
-SYM_FUNC_START(__camellia_enc_blk_2way)
+SYM_TYPED_FUNC_START(__camellia_enc_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -460,7 +461,7 @@ SYM_FUNC_START(__camellia_enc_blk_2way)
RET;
SYM_FUNC_END(__camellia_enc_blk_2way)
-SYM_FUNC_START(camellia_dec_blk_2way)
+SYM_TYPED_FUNC_START(camellia_dec_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 97e283621851..84e47f7f6188 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/frame.h>
#include "glue_helper-asm-avx.S"
@@ -656,7 +657,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
RET;
SYM_FUNC_END(__serpent_dec_blk8_avx)
-SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+SYM_TYPED_FUNC_START(serpent_ecb_enc_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -674,7 +675,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
RET;
SYM_FUNC_END(serpent_ecb_enc_8way_avx)
-SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+SYM_TYPED_FUNC_START(serpent_ecb_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -692,7 +693,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
RET;
SYM_FUNC_END(serpent_ecb_dec_8way_avx)
-SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+SYM_TYPED_FUNC_START(serpent_cbc_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index d2288bf38a8a..071e90e7f0d8 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
.file "twofish-x86_64-asm-3way.S"
.text
@@ -220,7 +221,7 @@
rorq $32, RAB2; \
outunpack3(mov, RIO, 2, RAB, 2);
-SYM_FUNC_START(__twofish_enc_blk_3way)
+SYM_TYPED_FUNC_START(__twofish_enc_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -269,7 +270,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
RET;
SYM_FUNC_END(__twofish_enc_blk_3way)
-SYM_FUNC_START(twofish_dec_blk_3way)
+SYM_TYPED_FUNC_START(twofish_dec_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 775af290cd19..e08b4ba07b93 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -8,6 +8,7 @@
.text
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm-offsets.h>
#define a_offset 0
@@ -202,7 +203,7 @@
xor %r8d, d ## D;\
ror $1, d ## D;
-SYM_FUNC_START(twofish_enc_blk)
+SYM_TYPED_FUNC_START(twofish_enc_blk)
pushq R1
/* %rdi contains the ctx address */
@@ -255,7 +256,7 @@ SYM_FUNC_START(twofish_enc_blk)
RET
SYM_FUNC_END(twofish_enc_blk)
-SYM_FUNC_START(twofish_dec_blk)
+SYM_TYPED_FUNC_START(twofish_dec_blk)
pushq R1
/* %rdi contains the ctx address */
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index ce1cc1622385..72cae8e0ce85 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -7,12 +7,13 @@ KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
-CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
-CFLAGS_common.o += -fno-stack-protector
+CFLAGS_syscall_32.o += -fno-stack-protector
+CFLAGS_syscall_64.o += -fno-stack-protector
obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
-obj-y += common.o
obj-y += vdso/
obj-y += vsyscall/
@@ -23,4 +24,3 @@ CFLAGS_REMOVE_entry_fred.o += -pg $(CC_FLAGS_FTRACE)
obj-$(CONFIG_X86_FRED) += entry_64_fred.o entry_fred.o
obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
-obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index ea81770629ee..cb0911c5dc5d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -431,6 +431,7 @@ For 32-bit we have the following conventions - kernel is built with
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func
SYM_FUNC_START(\name)
+ ANNOTATE_NOENDBR
pushq %rbp
movq %rsp, %rbp
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
deleted file mode 100644
index 14db5b85114c..000000000000
--- a/arch/x86/entry/common.c
+++ /dev/null
@@ -1,524 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * common.c - C code for kernel entry and exit
- * Copyright (c) 2015 Andrew Lutomirski
- *
- * Based on asm and ptrace code by many authors. The code here originated
- * in ptrace.c and signal.c.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/entry-common.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/export.h>
-#include <linux/nospec.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/init.h>
-
-#ifdef CONFIG_XEN_PV
-#include <xen/xen-ops.h>
-#include <xen/events.h>
-#endif
-
-#include <asm/apic.h>
-#include <asm/desc.h>
-#include <asm/traps.h>
-#include <asm/vdso.h>
-#include <asm/cpufeature.h>
-#include <asm/fpu/api.h>
-#include <asm/nospec-branch.h>
-#include <asm/io_bitmap.h>
-#include <asm/syscall.h>
-#include <asm/irq_stack.h>
-
-#ifdef CONFIG_X86_64
-
-static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
-{
- /*
- * Convert negative numbers to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int unr = nr;
-
- if (likely(unr < NR_syscalls)) {
- unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = x64_sys_call(regs, unr);
- return true;
- }
- return false;
-}
-
-static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
-{
- /*
- * Adjust the starting offset of the table, and convert numbers
- * < __X32_SYSCALL_BIT to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int xnr = nr - __X32_SYSCALL_BIT;
-
- if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
- xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call(regs, xnr);
- return true;
- }
- return false;
-}
-
-/* Returns true to return using SYSRET, or false to use IRET */
-__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
-{
- add_random_kstack_offset();
- nr = syscall_enter_from_user_mode(regs, nr);
-
- instrumentation_begin();
-
- if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
- /* Invalid system call, but still a system call. */
- regs->ax = __x64_sys_ni_syscall(regs);
- }
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
-
- /*
- * Check that the register state is valid for using SYSRET to exit
- * to userspace. Otherwise use the slower but fully capable IRET
- * exit path.
- */
-
- /* XEN PV guests always use the IRET path */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
- return false;
-
- /* SYSRET requires RCX == RIP and R11 == EFLAGS */
- if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
- return false;
-
- /* CS and SS must match the values set in MSR_STAR */
- if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
- return false;
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * TASK_SIZE_MAX covers all user-accessible addresses other than
- * the deprecated vsyscall page.
- */
- if (unlikely(regs->ip >= TASK_SIZE_MAX))
- return false;
-
- /*
- * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET.
- */
- if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
- return false;
-
- /* Use SYSRET to exit to userspace */
- return true;
-}
-#endif
-
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-static __always_inline int syscall_32_enter(struct pt_regs *regs)
-{
- if (IS_ENABLED(CONFIG_IA32_EMULATION))
- current_thread_info()->status |= TS_COMPAT;
-
- return (int)regs->orig_ax;
-}
-
-#ifdef CONFIG_IA32_EMULATION
-bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED);
-
-static int ia32_emulation_override_cmdline(char *arg)
-{
- return kstrtobool(arg, &__ia32_enabled);
-}
-early_param("ia32_emulation", ia32_emulation_override_cmdline);
-#endif
-
-/*
- * Invoke a 32-bit syscall. Called with IRQs on in CT_STATE_KERNEL.
- */
-static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
-{
- /*
- * Convert negative numbers to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int unr = nr;
-
- if (likely(unr < IA32_NR_syscalls)) {
- unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call(regs, unr);
- } else if (nr != -1) {
- regs->ax = __ia32_sys_ni_syscall(regs);
- }
-}
-
-#ifdef CONFIG_IA32_EMULATION
-static __always_inline bool int80_is_external(void)
-{
- const unsigned int offs = (0x80 / 32) * 0x10;
- const u32 bit = BIT(0x80 % 32);
-
- /* The local APIC on XENPV guests is fake */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
- return false;
-
- /*
- * If vector 0x80 is set in the APIC ISR then this is an external
- * interrupt. Either from broken hardware or injected by a VMM.
- *
- * Note: In guest mode this is only valid for secure guests where
- * the secure module fully controls the vAPIC exposed to the guest.
- */
- return apic_read(APIC_ISR + offs) & bit;
-}
-
-/**
- * do_int80_emulation - 32-bit legacy syscall C entry from asm
- * @regs: syscall arguments in struct pt_args on the stack.
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls. Instances of INT $0x80 can be found inline in
- * various programs and libraries. It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method. Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path. It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * The arguments for the INT $0x80 based syscall are on stack in the
- * pt_regs structure:
- * eax: system call number
- * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
- */
-__visible noinstr void do_int80_emulation(struct pt_regs *regs)
-{
- int nr;
-
- /* Kernel does not use INT $0x80! */
- if (unlikely(!user_mode(regs))) {
- irqentry_enter(regs);
- instrumentation_begin();
- panic("Unexpected external interrupt 0x80\n");
- }
-
- /*
- * Establish kernel context for instrumentation, including for
- * int80_is_external() below which calls into the APIC driver.
- * Identical for soft and external interrupts.
- */
- enter_from_user_mode(regs);
-
- instrumentation_begin();
- add_random_kstack_offset();
-
- /* Validate that this is a soft interrupt to the extent possible */
- if (unlikely(int80_is_external()))
- panic("Unexpected external interrupt 0x80\n");
-
- /*
- * The low level idtentry code pushed -1 into regs::orig_ax
- * and regs::ax contains the syscall number.
- *
- * User tracing code (ptrace or signal handlers) might assume
- * that the regs::orig_ax contains a 32-bit number on invoking
- * a 32-bit syscall.
- *
- * Establish the syscall convention by saving the 32bit truncated
- * syscall number in regs::orig_ax and by invalidating regs::ax.
- */
- regs->orig_ax = regs->ax & GENMASK(31, 0);
- regs->ax = -ENOSYS;
-
- nr = syscall_32_enter(regs);
-
- local_irq_enable();
- nr = syscall_enter_from_user_mode_work(regs, nr);
- do_syscall_32_irqs_on(regs, nr);
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
-}
-
-#ifdef CONFIG_X86_FRED
-/*
- * A FRED-specific INT80 handler is warranted for the follwing reasons:
- *
- * 1) As INT instructions and hardware interrupts are separate event
- * types, FRED does not preclude the use of vector 0x80 for external
- * interrupts. As a result, the FRED setup code does not reserve
- * vector 0x80 and calling int80_is_external() is not merely
- * suboptimal but actively incorrect: it could cause a system call
- * to be incorrectly ignored.
- *
- * 2) It is called only for handling vector 0x80 of event type
- * EVENT_TYPE_SWINT and will never be called to handle any external
- * interrupt (event type EVENT_TYPE_EXTINT).
- *
- * 3) FRED has separate entry flows depending on if the event came from
- * user space or kernel space, and because the kernel does not use
- * INT insns, the FRED kernel entry handler fred_entry_from_kernel()
- * falls through to fred_bad_type() if the event type is
- * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling
- * an INT insn, it can only be from a user level.
- *
- * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will
- * likely take a different approach if it is ever needed: it
- * probably belongs in either fred_intx()/ fred_other() or
- * asm_fred_entrypoint_user(), depending on if this ought to be done
- * for all entries from userspace or only system
- * calls.
- *
- * 5) INT $0x80 is the fast path for 32-bit system calls under FRED.
- */
-DEFINE_FREDENTRY_RAW(int80_emulation)
-{
- int nr;
-
- enter_from_user_mode(regs);
-
- instrumentation_begin();
- add_random_kstack_offset();
-
- /*
- * FRED pushed 0 into regs::orig_ax and regs::ax contains the
- * syscall number.
- *
- * User tracing code (ptrace or signal handlers) might assume
- * that the regs::orig_ax contains a 32-bit number on invoking
- * a 32-bit syscall.
- *
- * Establish the syscall convention by saving the 32bit truncated
- * syscall number in regs::orig_ax and by invalidating regs::ax.
- */
- regs->orig_ax = regs->ax & GENMASK(31, 0);
- regs->ax = -ENOSYS;
-
- nr = syscall_32_enter(regs);
-
- local_irq_enable();
- nr = syscall_enter_from_user_mode_work(regs, nr);
- do_syscall_32_irqs_on(regs, nr);
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
-}
-#endif
-#else /* CONFIG_IA32_EMULATION */
-
-/* Handles int $0x80 on a 32bit kernel */
-__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
-{
- int nr = syscall_32_enter(regs);
-
- add_random_kstack_offset();
- /*
- * Subtlety here: if ptrace pokes something larger than 2^31-1 into
- * orig_ax, the int return value truncates it. This matches
- * the semantics of syscall_get_nr().
- */
- nr = syscall_enter_from_user_mode(regs, nr);
- instrumentation_begin();
-
- do_syscall_32_irqs_on(regs, nr);
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
-}
-#endif /* !CONFIG_IA32_EMULATION */
-
-static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
-{
- int nr = syscall_32_enter(regs);
- int res;
-
- add_random_kstack_offset();
- /*
- * This cannot use syscall_enter_from_user_mode() as it has to
- * fetch EBP before invoking any of the syscall entry work
- * functions.
- */
- syscall_enter_from_user_mode_prepare(regs);
-
- instrumentation_begin();
- /* Fetch EBP from where the vDSO stashed it. */
- if (IS_ENABLED(CONFIG_X86_64)) {
- /*
- * Micro-optimization: the pointer we're following is
- * explicitly 32 bits, so it can't be out of range.
- */
- res = __get_user(*(u32 *)&regs->bp,
- (u32 __user __force *)(unsigned long)(u32)regs->sp);
- } else {
- res = get_user(*(u32 *)&regs->bp,
- (u32 __user __force *)(unsigned long)(u32)regs->sp);
- }
-
- if (res) {
- /* User code screwed up. */
- regs->ax = -EFAULT;
-
- local_irq_disable();
- instrumentation_end();
- irqentry_exit_to_user_mode(regs);
- return false;
- }
-
- nr = syscall_enter_from_user_mode_work(regs, nr);
-
- /* Now this is just like a normal syscall. */
- do_syscall_32_irqs_on(regs, nr);
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
- return true;
-}
-
-/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
-__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
-{
- /*
- * Called using the internal vDSO SYSENTER/SYSCALL32 calling
- * convention. Adjust regs so it looks like we entered using int80.
- */
- unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
- vdso_image_32.sym_int80_landing_pad;
-
- /*
- * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
- * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
- * Fix it up.
- */
- regs->ip = landing_pad;
-
- /* Invoke the syscall. If it failed, keep it simple: use IRET. */
- if (!__do_fast_syscall_32(regs))
- return false;
-
- /*
- * Check that the register state is valid for using SYSRETL/SYSEXIT
- * to exit to userspace. Otherwise use the slower but fully capable
- * IRET exit path.
- */
-
- /* XEN PV guests always use the IRET path */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
- return false;
-
- /* EIP must point to the VDSO landing pad */
- if (unlikely(regs->ip != landing_pad))
- return false;
-
- /* CS and SS must match the values set in MSR_STAR */
- if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS))
- return false;
-
- /* If the TF, RF, or VM flags are set, use IRET */
- if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)))
- return false;
-
- /* Use SYSRETL/SYSEXIT to exit to userspace */
- return true;
-}
-
-/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
-__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs)
-{
- /* SYSENTER loses RSP, but the vDSO saved it in RBP. */
- regs->sp = regs->bp;
-
- /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */
- regs->flags |= X86_EFLAGS_IF;
-
- return do_fast_syscall_32(regs);
-}
-#endif
-
-SYSCALL_DEFINE0(ni_syscall)
-{
- return -ENOSYS;
-}
-
-#ifdef CONFIG_XEN_PV
-#ifndef CONFIG_PREEMPTION
-/*
- * Some hypercalls issued by the toolstack can take many 10s of
- * seconds. Allow tasks running hypercalls via the privcmd driver to
- * be voluntarily preempted even if full kernel preemption is
- * disabled.
- *
- * Such preemptible hypercalls are bracketed by
- * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
- * calls.
- */
-DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
-
-/*
- * In case of scheduling the flag must be cleared and restored after
- * returning from schedule as the task might move to a different CPU.
- */
-static __always_inline bool get_and_clear_inhcall(void)
-{
- bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
-
- __this_cpu_write(xen_in_preemptible_hcall, false);
- return inhcall;
-}
-
-static __always_inline void restore_inhcall(bool inhcall)
-{
- __this_cpu_write(xen_in_preemptible_hcall, inhcall);
-}
-#else
-static __always_inline bool get_and_clear_inhcall(void) { return false; }
-static __always_inline void restore_inhcall(bool inhcall) { }
-#endif
-
-static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- inc_irq_stat(irq_hv_callback_count);
-
- xen_evtchn_do_upcall();
-
- set_irq_regs(old_regs);
-}
-
-__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
-{
- irqentry_state_t state = irqentry_enter(regs);
- bool inhcall;
-
- instrumentation_begin();
- run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
-
- inhcall = get_and_clear_inhcall();
- if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
- irqentry_exit_cond_resched();
- instrumentation_end();
- restore_inhcall(inhcall);
- } else {
- instrumentation_end();
- irqentry_exit(regs, state);
- }
-}
-#endif /* CONFIG_XEN_PV */
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index b7ea3e8e9ecc..d3caa31240ed 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/segment.h>
@@ -17,6 +18,7 @@
.pushsection .noinstr.text, "ax"
SYM_FUNC_START(entry_ibpb)
+ ANNOTATE_NOENDBR
movl $MSR_IA32_PRED_CMD, %ecx
movl $PRED_CMD_IBPB, %eax
xorl %edx, %edx
@@ -52,7 +54,6 @@ EXPORT_SYMBOL_GPL(mds_verw_sel);
THUNK warn_thunk_thunk, __warn_thunk
-#ifndef CONFIG_X86_64
/*
* Clang's implementation of TLS stack cookies requires the variable in
* question to be a TLS variable. If the variable happens to be defined as an
@@ -63,7 +64,6 @@ THUNK warn_thunk_thunk, __warn_thunk
* entirely in the C code, and use an alias emitted by the linker script
* instead.
*/
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP)
EXPORT_SYMBOL(__ref_stack_chk_guard);
#endif
-#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 20be5758c2d2..92c0b4a94e0a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1153,7 +1153,7 @@ SYM_CODE_START(asm_exc_nmi)
* is using the thread stack right now, so it's safe for us to use it.
*/
movl %esp, %ebx
- movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
call exc_nmi
movl %ebx, %esp
@@ -1217,7 +1217,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
- movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
call make_task_dead
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f52dbe0ad93c..f40bdf97d390 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64)
/* tss.sp2 is scratch space. */
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -175,6 +175,7 @@ SYM_CODE_END(entry_SYSCALL_64)
*/
.pushsection .text, "ax"
SYM_FUNC_START(__switch_to_asm)
+ ANNOTATE_NOENDBR
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -192,7 +193,7 @@ SYM_FUNC_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary)
+ movq %rbx, PER_CPU_VAR(__stack_chk_guard)
#endif
/*
@@ -742,6 +743,7 @@ _ASM_NOKPROBE(common_interrupt_return)
* Is in entry.text as it shouldn't be instrumented.
*/
SYM_FUNC_START(asm_load_gs_index)
+ ANNOTATE_NOENDBR
FRAME_BEGIN
swapgs
.Lgs_change:
@@ -1166,7 +1168,7 @@ SYM_CODE_START(asm_exc_nmi)
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
@@ -1484,7 +1486,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS
@@ -1526,6 +1528,7 @@ SYM_CODE_END(rewind_stack_and_make_dead)
* refactored in the future if needed.
*/
SYM_FUNC_START(clear_bhb_loop)
+ ANNOTATE_NOENDBR
push %rbp
mov %rsp, %rbp
movl $5, %ecx
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index ed0a5f2dc129..a45e1125fc6c 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -57,7 +57,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
popq %rax
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
@@ -193,7 +193,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Switch to the kernel stack */
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index a02bc6f3d2e6..29c5c32c16c3 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -58,6 +58,7 @@ SYM_CODE_END(asm_fred_entrypoint_kernel)
#if IS_ENABLED(CONFIG_KVM_INTEL)
SYM_FUNC_START(asm_fred_entry_from_kvm)
+ ANNOTATE_NOENDBR
push %rbp
mov %rsp, %rbp
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cc9950d7104..2b15ea17bb7c 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -1,10 +1,16 @@
-// SPDX-License-Identifier: GPL-2.0
-/* System call table for i386. */
+// SPDX-License-Identifier: GPL-2.0-only
+/* 32-bit system call dispatch */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
+#include <linux/entry-common.h>
+#include <linux/nospec.h>
+#include <linux/uaccess.h>
+#include <asm/apic.h>
+#include <asm/traps.h>
+#include <asm/cpufeature.h>
#include <asm/syscall.h>
#ifdef CONFIG_IA32_EMULATION
@@ -41,4 +47,324 @@ long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
#include <asm/syscalls_32.h>
default: return __ia32_sys_ni_syscall(regs);
}
-};
+}
+
+static __always_inline int syscall_32_enter(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_IA32_EMULATION))
+ current_thread_info()->status |= TS_COMPAT;
+
+ return (int)regs->orig_ax;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED);
+
+static int __init ia32_emulation_override_cmdline(char *arg)
+{
+ return kstrtobool(arg, &__ia32_enabled);
+}
+early_param("ia32_emulation", ia32_emulation_override_cmdline);
+#endif
+
+/*
+ * Invoke a 32-bit syscall. Called with IRQs on in CT_STATE_KERNEL.
+ */
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
+{
+ /*
+ * Convert negative numbers to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int unr = nr;
+
+ if (likely(unr < IA32_NR_syscalls)) {
+ unr = array_index_nospec(unr, IA32_NR_syscalls);
+ regs->ax = ia32_sys_call(regs, unr);
+ } else if (nr != -1) {
+ regs->ax = __ia32_sys_ni_syscall(regs);
+ }
+}
+
+#ifdef CONFIG_IA32_EMULATION
+static __always_inline bool int80_is_external(void)
+{
+ const unsigned int offs = (0x80 / 32) * 0x10;
+ const u32 bit = BIT(0x80 % 32);
+
+ /* The local APIC on XENPV guests is fake */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /*
+ * If vector 0x80 is set in the APIC ISR then this is an external
+ * interrupt. Either from broken hardware or injected by a VMM.
+ *
+ * Note: In guest mode this is only valid for secure guests where
+ * the secure module fully controls the vAPIC exposed to the guest.
+ */
+ return apic_read(APIC_ISR + offs) & bit;
+}
+
+/**
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
+ * @regs: syscall arguments in struct pt_args on the stack.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * The arguments for the INT $0x80 based syscall are on stack in the
+ * pt_regs structure:
+ * eax: system call number
+ * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
+ */
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
+{
+ int nr;
+
+ /* Kernel does not use INT $0x80! */
+ if (unlikely(!user_mode(regs))) {
+ irqentry_enter(regs);
+ instrumentation_begin();
+ panic("Unexpected external interrupt 0x80\n");
+ }
+
+ /*
+ * Establish kernel context for instrumentation, including for
+ * int80_is_external() below which calls into the APIC driver.
+ * Identical for soft and external interrupts.
+ */
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /* Validate that this is a soft interrupt to the extent possible */
+ if (unlikely(int80_is_external()))
+ panic("Unexpected external interrupt 0x80\n");
+
+ /*
+ * The low level idtentry code pushed -1 into regs::orig_ax
+ * and regs::ax contains the syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+
+#ifdef CONFIG_X86_FRED
+/*
+ * A FRED-specific INT80 handler is warranted for the follwing reasons:
+ *
+ * 1) As INT instructions and hardware interrupts are separate event
+ * types, FRED does not preclude the use of vector 0x80 for external
+ * interrupts. As a result, the FRED setup code does not reserve
+ * vector 0x80 and calling int80_is_external() is not merely
+ * suboptimal but actively incorrect: it could cause a system call
+ * to be incorrectly ignored.
+ *
+ * 2) It is called only for handling vector 0x80 of event type
+ * EVENT_TYPE_SWINT and will never be called to handle any external
+ * interrupt (event type EVENT_TYPE_EXTINT).
+ *
+ * 3) FRED has separate entry flows depending on if the event came from
+ * user space or kernel space, and because the kernel does not use
+ * INT insns, the FRED kernel entry handler fred_entry_from_kernel()
+ * falls through to fred_bad_type() if the event type is
+ * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling
+ * an INT insn, it can only be from a user level.
+ *
+ * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will
+ * likely take a different approach if it is ever needed: it
+ * probably belongs in either fred_intx()/ fred_other() or
+ * asm_fred_entrypoint_user(), depending on if this ought to be done
+ * for all entries from userspace or only system
+ * calls.
+ *
+ * 5) INT $0x80 is the fast path for 32-bit system calls under FRED.
+ */
+DEFINE_FREDENTRY_RAW(int80_emulation)
+{
+ int nr;
+
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /*
+ * FRED pushed 0 into regs::orig_ax and regs::ax contains the
+ * syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#endif /* CONFIG_X86_FRED */
+
+#else /* CONFIG_IA32_EMULATION */
+
+/* Handles int $0x80 on a 32bit kernel */
+__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
+{
+ int nr = syscall_32_enter(regs);
+
+ add_random_kstack_offset();
+ /*
+ * Subtlety here: if ptrace pokes something larger than 2^31-1 into
+ * orig_ax, the int return value truncates it. This matches
+ * the semantics of syscall_get_nr().
+ */
+ nr = syscall_enter_from_user_mode(regs, nr);
+ instrumentation_begin();
+
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#endif /* !CONFIG_IA32_EMULATION */
+
+static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
+{
+ int nr = syscall_32_enter(regs);
+ int res;
+
+ add_random_kstack_offset();
+ /*
+ * This cannot use syscall_enter_from_user_mode() as it has to
+ * fetch EBP before invoking any of the syscall entry work
+ * functions.
+ */
+ syscall_enter_from_user_mode_prepare(regs);
+
+ instrumentation_begin();
+ /* Fetch EBP from where the vDSO stashed it. */
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /*
+ * Micro-optimization: the pointer we're following is
+ * explicitly 32 bits, so it can't be out of range.
+ */
+ res = __get_user(*(u32 *)&regs->bp,
+ (u32 __user __force *)(unsigned long)(u32)regs->sp);
+ } else {
+ res = get_user(*(u32 *)&regs->bp,
+ (u32 __user __force *)(unsigned long)(u32)regs->sp);
+ }
+
+ if (res) {
+ /* User code screwed up. */
+ regs->ax = -EFAULT;
+
+ local_irq_disable();
+ instrumentation_end();
+ irqentry_exit_to_user_mode(regs);
+ return false;
+ }
+
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+
+ /* Now this is just like a normal syscall. */
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+ return true;
+}
+
+/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
+__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
+{
+ /*
+ * Called using the internal vDSO SYSENTER/SYSCALL32 calling
+ * convention. Adjust regs so it looks like we entered using int80.
+ */
+ unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
+ vdso_image_32.sym_int80_landing_pad;
+
+ /*
+ * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
+ * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
+ * Fix it up.
+ */
+ regs->ip = landing_pad;
+
+ /* Invoke the syscall. If it failed, keep it simple: use IRET. */
+ if (!__do_fast_syscall_32(regs))
+ return false;
+
+ /*
+ * Check that the register state is valid for using SYSRETL/SYSEXIT
+ * to exit to userspace. Otherwise use the slower but fully capable
+ * IRET exit path.
+ */
+
+ /* XEN PV guests always use the IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* EIP must point to the VDSO landing pad */
+ if (unlikely(regs->ip != landing_pad))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS))
+ return false;
+
+ /* If the TF, RF, or VM flags are set, use IRET */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)))
+ return false;
+
+ /* Use SYSRETL/SYSEXIT to exit to userspace */
+ return true;
+}
+
+/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */
+__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs)
+{
+ /* SYSENTER loses RSP, but the vDSO saved it in RBP. */
+ regs->sp = regs->bp;
+
+ /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */
+ regs->flags |= X86_EFLAGS_IF;
+
+ return do_fast_syscall_32(regs);
+}
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index ba8354424860..b6e68ea98b83 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -1,15 +1,20 @@
-// SPDX-License-Identifier: GPL-2.0
-/* System call table for x86-64. */
+// SPDX-License-Identifier: GPL-2.0-only
+/* 64-bit system call dispatch */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
+#include <linux/entry-common.h>
+#include <linux/nospec.h>
#include <asm/syscall.h>
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
+#ifdef CONFIG_X86_X32_ABI
+#include <asm/syscalls_x32.h>
+#endif
#undef __SYSCALL
#undef __SYSCALL_NORETURN
@@ -33,4 +38,104 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
-};
+}
+
+#ifdef CONFIG_X86_X32_ABI
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+}
+#endif
+
+static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+{
+ /*
+ * Convert negative numbers to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int unr = nr;
+
+ if (likely(unr < NR_syscalls)) {
+ unr = array_index_nospec(unr, NR_syscalls);
+ regs->ax = x64_sys_call(regs, unr);
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+{
+ /*
+ * Adjust the starting offset of the table, and convert numbers
+ * < __X32_SYSCALL_BIT to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int xnr = nr - __X32_SYSCALL_BIT;
+
+ if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+ xnr = array_index_nospec(xnr, X32_NR_syscalls);
+ regs->ax = x32_sys_call(regs, xnr);
+ return true;
+ }
+ return false;
+}
+
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
+{
+ add_random_kstack_offset();
+ nr = syscall_enter_from_user_mode(regs, nr);
+
+ instrumentation_begin();
+
+ if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
+ /* Invalid system call, but still a system call. */
+ regs->ax = __x64_sys_ni_syscall(regs);
+ }
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use the IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ *
+ * TASK_SIZE_MAX covers all user-accessible addresses other than
+ * the deprecated vsyscall page.
+ */
+ if (unlikely(regs->ip >= TASK_SIZE_MAX))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
+}
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
deleted file mode 100644
index fb77908f44f3..000000000000
--- a/arch/x86/entry/syscall_x32.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* System call table for x32 ABI. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <linux/syscalls.h>
-#include <asm/syscall.h>
-
-#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
-#include <asm/syscalls_x32.h>
-#undef __SYSCALL
-
-#undef __SYSCALL_NORETURN
-#define __SYSCALL_NORETURN __SYSCALL
-
-#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
-{
- switch (nr) {
- #include <asm/syscalls_x32.h>
- default: return __x64_sys_ni_syscall(regs);
- }
-};
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3f0ec87d5db4..ac007ea00979 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,7 +396,7 @@
381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx
-384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+384 i386 arch_prctl sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
386 i386 rseq sys_rseq
393 i386 semget sys_semget
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index c9216ac4fb1e..bf9f4f63e1b4 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -133,6 +133,7 @@ KBUILD_CFLAGS_32 += -fno-stack-protector
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS_32 += -DBUILD_VDSO
ifdef CONFIG_MITIGATION_RETPOLINE
ifneq ($(RETPOLINE_VDSO_CFLAGS),)
diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h
index b56f6b012941..baba612b832c 100644
--- a/arch/x86/entry/vdso/extable.h
+++ b/arch/x86/entry/vdso/extable.h
@@ -7,7 +7,7 @@
* vDSO uses a dedicated handler the addresses are relative to the overall
* exception table, not each individual entry.
*/
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ASM_VDSO_EXTABLE_HANDLE(from, to) \
ASM_VDSO_EXTABLE_HANDLE from to
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 39e6efc1a9ca..bfc7cabf4017 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -48,8 +48,7 @@ int __init init_vdso_image(const struct vdso_image *image)
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
- image->alt_len),
- NULL);
+ image->alt_len));
return 0;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 833478ffbbf5..6866cc5acb0b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2849,7 +2849,7 @@ static bool is_uprobe_at_func_entry(struct pt_regs *regs)
return true;
/* endbr64 (64-bit only) */
- if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn))
+ if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
return true;
return false;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dc38dec244c1..1ac39611fea8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4735,9 +4735,9 @@ static int adl_hw_config(struct perf_event *event)
return -EOPNOTSUPP;
}
-static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
+static enum intel_cpu_type adl_get_hybrid_cpu_type(void)
{
- return HYBRID_INTEL_CORE;
+ return INTEL_CPU_TYPE_CORE;
}
static inline bool erratum_hsw11(struct perf_event *event)
@@ -5082,7 +5082,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
{
- u8 cpu_type = get_this_hybrid_cpu_type();
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ enum intel_cpu_type cpu_type = c->topo.intel_type;
int i;
/*
@@ -5091,7 +5092,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
* on it. There should be a fixup function provided for these
* troublesome CPUs (->get_hybrid_cpu_type).
*/
- if (cpu_type == HYBRID_INTEL_NONE) {
+ if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) {
if (x86_pmu.get_hybrid_cpu_type)
cpu_type = x86_pmu.get_hybrid_cpu_type();
else
@@ -5108,16 +5109,16 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
u32 native_id;
- if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
+ if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big)
return &x86_pmu.hybrid_pmu[i];
- if (cpu_type == HYBRID_INTEL_ATOM) {
+ if (cpu_type == INTEL_CPU_TYPE_ATOM) {
if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
- native_id = get_this_hybrid_cpu_native_id();
- if (native_id == skt_native_id && pmu_type == hybrid_small)
+ native_id = c->topo.intel_native_model_id;
+ if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
- if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
+ if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny)
return &x86_pmu.hybrid_pmu[i];
}
}
@@ -6583,15 +6584,21 @@ __init int intel_pmu_init(void)
char *name;
struct x86_hybrid_pmu *pmu;
+ /* Architectural Perfmon was introduced starting with Core "Yonah" */
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
- case 0x6:
- return p6_pmu_init();
- case 0xb:
+ case 6:
+ if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH)
+ return p6_pmu_init();
+ break;
+ case 11:
return knc_pmu_init();
- case 0xf:
+ case 15:
return p4_pmu_init();
}
+
+ pr_cont("unsupported CPU family %d model %d ",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
@@ -6739,7 +6746,7 @@ __init int intel_pmu_init(void)
case INTEL_ATOM_SILVERMONT_D:
case INTEL_ATOM_SILVERMONT_MID:
case INTEL_ATOM_AIRMONT:
- case INTEL_ATOM_AIRMONT_MID:
+ case INTEL_ATOM_SILVERMONT_MID2:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 844bc4fc4724..fb726c6fc6e7 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -10,6 +10,7 @@
#include <linux/perf_event.h>
#include <asm/perf_event_p4.h>
+#include <asm/cpu_device_id.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
@@ -732,9 +733,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx)
{
/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
if (event_idx == P4_EVENT_INSTR_COMPLETED) {
- if (boot_cpu_data.x86_model != 3 &&
- boot_cpu_data.x86_model != 4 &&
- boot_cpu_data.x86_model != 6)
+ if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT &&
+ boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M &&
+ boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL)
return false;
}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index a6cffb4f4ef5..65b45e9d7016 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -2,6 +2,8 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>
+
#include "../perf_event.h"
/*
@@ -248,30 +250,8 @@ __init int p6_pmu_init(void)
{
x86_pmu = p6_pmu;
- switch (boot_cpu_data.x86_model) {
- case 1: /* Pentium Pro */
+ if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO)
x86_add_quirk(p6_pmu_rdpmc_quirk);
- break;
-
- case 3: /* Pentium II - Klamath */
- case 5: /* Pentium II - Deschutes */
- case 6: /* Pentium II - Mendocino */
- break;
-
- case 7: /* Pentium III - Katmai */
- case 8: /* Pentium III - Coppermine */
- case 10: /* Pentium III Xeon */
- case 11: /* Pentium III - Tualatin */
- break;
-
- case 9: /* Pentium M - Banias */
- case 13: /* Pentium M - Dothan */
- break;
-
- default:
- pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
- return -ENODEV;
- }
memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8e5a4c3c5b95..2c0ce0e9545e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -674,18 +674,6 @@ enum {
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
-/*
- * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
- * of the core. Bits 31-24 indicates its core type (Core or Atom)
- * and Bits [23:0] indicates the native model ID of the core.
- * Core type and native model ID are defined in below enumerations.
- */
-enum hybrid_cpu_type {
- HYBRID_INTEL_NONE,
- HYBRID_INTEL_ATOM = 0x20,
- HYBRID_INTEL_CORE = 0x40,
-};
-
#define X86_HYBRID_PMU_ATOM_IDX 0
#define X86_HYBRID_PMU_CORE_IDX 1
#define X86_HYBRID_PMU_TINY_IDX 2
@@ -702,11 +690,6 @@ enum hybrid_pmu_type {
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
};
-enum atom_native_id {
- cmt_native_id = 0x2, /* Crestmont */
- skt_native_id = 0x3, /* Skymont */
-};
-
struct x86_hybrid_pmu {
struct pmu pmu;
const char *name;
@@ -993,7 +976,7 @@ struct x86_pmu {
*/
int num_hybrid_pmus;
struct x86_hybrid_pmu *hybrid_pmu;
- enum hybrid_cpu_type (*get_hybrid_cpu_type) (void);
+ enum intel_cpu_type (*get_hybrid_cpu_type) (void);
};
struct x86_perf_task_context_opt {
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index cc8c3bd0e7c2..1f7c3082a36d 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -239,5 +239,4 @@ void hyperv_setup_mmu_ops(void)
pr_info("Using hypercall for remote TLB flush\n");
pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
- pv_ops.mmu.tlb_remove_table = tlb_remove_table;
}
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 58f4ddecc5fa..4566000e15c4 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,6 +8,7 @@ generated-y += syscalls_x32.h
generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
+generated-y += cpufeaturemasks.h
generic-y += early_ioremap.h
generic-y += fprobe.h
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 3b3d3aa19acd..4a37a8bd87fd 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -15,7 +15,7 @@
#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature))
#define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stddef.h>
@@ -87,20 +87,19 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
* instructions were patched in already:
*/
extern int alternatives_patched;
-struct module;
extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
- struct module *mod);
-extern void apply_retpolines(s32 *start, s32 *end, struct module *mod);
-extern void apply_returns(s32 *start, s32 *end, struct module *mod);
-extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
+extern void apply_seal_endbr(s32 *start, s32 *end);
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
- s32 *start_cfi, s32 *end_cfi, struct module *mod);
+ s32 *start_cfi, s32 *end_cfi);
+
+struct module;
struct callthunk_sites {
s32 *call_start, *call_end;
- struct alt_instr *alt_start, *alt_end;
};
#ifdef CONFIG_CALL_THUNKS
@@ -237,10 +236,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
* references: i.e., if used for a function, it would add the PLT
* suffix.
*/
-#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
+#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...) \
asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
: ALT_OUTPUT_SP(output) \
- : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+ : [old] "i" (oldfunc), [new] "i" (newfunc) \
+ COMMA(input) \
+ : clobbers)
/*
* Like alternative_call, but there are two features and respective functions.
@@ -249,24 +250,14 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, old function is used.
*/
#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
- output, input...) \
+ output, input, clobbers...) \
asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
"call %c[new2]", ft_flags2) \
: ALT_OUTPUT_SP(output) \
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a...) a
-
-/*
- * use this macro if you need clobbers but no inputs in
- * alternative_{input,io,call}()
- */
-#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+ [new2] "i" (newfunc2) \
+ COMMA(input) \
+ : clobbers)
#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__
@@ -286,7 +277,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
void BUG_func(void);
void nop_func(void);
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
@@ -369,6 +360,6 @@ void nop_func(void);
ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
newinstr_yes, ft_flags
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 4c4efb93045e..adfa0854cf2d 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -27,7 +27,6 @@ struct amd_l3_cache {
};
struct amd_northbridge {
- struct pci_dev *root;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h
index 113ad3e8ee40..23fe617898a8 100644
--- a/arch/x86/include/asm/amd_node.h
+++ b/arch/x86/include/asm/amd_node.h
@@ -30,7 +30,31 @@ static inline u16 amd_num_nodes(void)
return topology_amd_nodes_per_pkg() * topology_max_packages();
}
+#ifdef CONFIG_AMD_NODE
int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
int __must_check amd_smn_write(u16 node, u32 address, u32 value);
+/* Should only be used by the HSMP driver. */
+int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write);
+#else
+static inline int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { return -ENODEV; }
+static inline int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return -ENODEV; }
+
+static inline int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_AMD_NODE */
+
+/* helper for use with read_poll_timeout */
+static inline int smn_read_register(u32 reg)
+{
+ int data, rc;
+
+ rc = amd_smn_read(0, reg, &data);
+ if (rc)
+ return rc;
+
+ return data;
+}
#endif /*_ASM_X86_AMD_NODE_H_*/
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f21ff1932699..c903d358405d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -99,8 +99,8 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
- ASM_OUTPUT2("=r" (v), "=m" (*addr)),
- ASM_OUTPUT2("0" (v), "m" (*addr)));
+ ASM_OUTPUT("=r" (v), "=m" (*addr)),
+ ASM_INPUT("0" (v), "m" (*addr)));
}
static inline u32 native_apic_mem_read(u32 reg)
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..b5982b94bdba 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -16,9 +16,10 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
- asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ asm_inline (ALTERNATIVE("call __sw_hweight32",
+ "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT)
+ : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+ : [val] REG_IN (w));
return res;
}
@@ -44,9 +45,10 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res;
- asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ asm_inline (ALTERNATIVE("call __sw_hweight64",
+ "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT)
+ : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+ : [val] REG_IN (w));
return res;
}
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 3674006e3974..11c6fecc3ad7 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -16,10 +16,10 @@
#include <asm/gsseg.h>
#include <asm/nospec-branch.h>
-#ifndef CONFIG_X86_CMPXCHG64
+#ifndef CONFIG_X86_CX8
extern void cmpxchg8b_emu(void);
#endif
-#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR)
+#ifdef CONFIG_STACKPROTECTOR
extern unsigned long __ref_stack_chk_guard;
#endif
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 2bec0c89a95c..cc2881576c2c 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
@@ -113,7 +113,7 @@
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef __pic__
static __always_inline __pure void *rip_rel_ptr(void *p)
{
@@ -144,7 +144,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
# include <asm/extable_fixup_types.h>
/* Exception table entry */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define _ASM_EXTABLE_TYPE(from, to, type) \
.pushsection "__ex_table","a" ; \
@@ -164,7 +164,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
# define _ASM_NOKPROBE(entry)
# endif
-#else /* ! __ASSEMBLY__ */
+#else /* ! __ASSEMBLER__ */
# define DEFINE_EXTABLE_TYPE_REG \
".macro extable_type_reg type:req reg:req\n" \
@@ -213,6 +213,17 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
/* For C file, we already have NOKPROBE_SYMBOL macro */
+/* Insert a comma if args are non-empty */
+#define COMMA(x...) __COMMA(x)
+#define __COMMA(...) , ##__VA_ARGS__
+
+/*
+ * Combine multiple asm inline constraint args into a single arg for passing to
+ * another macro.
+ */
+#define ASM_OUTPUT(x...) x
+#define ASM_INPUT(x...) x
+
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -221,7 +232,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
*/
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 55b4d24356ea..75743f1dfd4e 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -30,14 +30,14 @@ static __always_inline void arch_atomic_set(atomic_t *v, int i)
static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "addl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "addl %1, %0"
: "+m" (v->counter)
: "ir" (i) : "memory");
}
static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "subl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "subl %1, %0"
: "+m" (v->counter)
: "ir" (i) : "memory");
}
@@ -50,14 +50,14 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v)
{
- asm volatile(LOCK_PREFIX "incl %0"
+ asm_inline volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_dec(atomic_t *v)
{
- asm volatile(LOCK_PREFIX "decl %0"
+ asm_inline volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec
@@ -116,7 +116,7 @@ static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
static __always_inline void arch_atomic_and(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "andl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "andl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
@@ -134,7 +134,7 @@ static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
static __always_inline void arch_atomic_or(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "orl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "orl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
@@ -152,7 +152,7 @@ static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v)
static __always_inline void arch_atomic_xor(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "xorl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6c6e9b9f98a4..ab838205c1c6 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -48,17 +48,20 @@ static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
ATOMIC64_EXPORT(atomic64_##sym)
#endif
-#ifdef CONFIG_X86_CMPXCHG64
-#define __alternative_atomic64(f, g, out, in...) \
- asm volatile("call %c[func]" \
+#ifdef CONFIG_X86_CX8
+#define __alternative_atomic64(f, g, out, in, clobbers...) \
+ asm volatile("call %c[func]" \
: ALT_OUTPUT_SP(out) \
- : [func] "i" (atomic64_##g##_cx8), ## in)
+ : [func] "i" (atomic64_##g##_cx8) \
+ COMMA(in) \
+ : clobbers)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
-#define __alternative_atomic64(f, g, out, in...) \
- alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
- X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+#define __alternative_atomic64(f, g, out, in, clobbers...) \
+ alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
+ X86_FEATURE_CX8, ASM_OUTPUT(out), \
+ ASM_INPUT(in), clobbers)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
ATOMIC64_DECL_ONE(sym##_386)
@@ -69,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386);
ATOMIC64_DECL_ONE(dec_386);
#endif
-#define alternative_atomic64(f, out, in...) \
- __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+#define alternative_atomic64(f, out, in, clobbers...) \
+ __alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers)
ATOMIC64_DECL(read);
ATOMIC64_DECL(set);
@@ -105,9 +108,10 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
s64 o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
- alternative_atomic64(xchg, "=&A" (o),
- "S" (v), "b" (low), "c" (high)
- : "memory");
+ alternative_atomic64(xchg,
+ "=&A" (o),
+ ASM_INPUT("S" (v), "b" (low), "c" (high)),
+ "memory");
return o;
}
#define arch_atomic64_xchg arch_atomic64_xchg
@@ -116,23 +120,25 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
- alternative_atomic64(set, /* no output */,
- "S" (v), "b" (low), "c" (high)
- : "eax", "edx", "memory");
+ alternative_atomic64(set,
+ /* no output */,
+ ASM_INPUT("S" (v), "b" (low), "c" (high)),
+ "eax", "edx", "memory");
}
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 r;
- alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+ alternative_atomic64(read, "=&A" (r), "c" (v), "memory");
return r;
}
static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
return i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
@@ -140,8 +146,9 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
return i;
}
#define arch_atomic64_sub_return arch_atomic64_sub_return
@@ -149,8 +156,10 @@ static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
s64 a;
- alternative_atomic64(inc_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ alternative_atomic64(inc_return,
+ "=&A" (a),
+ "S" (v),
+ "memory", "ecx");
return a;
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
@@ -158,8 +167,10 @@ static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
s64 a;
- alternative_atomic64(dec_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ alternative_atomic64(dec_return,
+ "=&A" (a),
+ "S" (v),
+ "memory", "ecx");
return a;
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
@@ -167,28 +178,34 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
}
static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
}
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
- __alternative_atomic64(inc, inc_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(inc, inc_return,
+ /* no output */,
+ "S" (v),
+ "memory", "eax", "ecx", "edx");
}
#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
- __alternative_atomic64(dec, dec_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(dec, dec_return,
+ /* no output */,
+ "S" (v),
+ "memory", "eax", "ecx", "edx");
}
#define arch_atomic64_dec arch_atomic64_dec
@@ -197,8 +214,9 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
alternative_atomic64(add_unless,
- ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
- "S" (v) : "memory");
+ ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)),
+ "S" (v),
+ "memory");
return (int)a;
}
#define arch_atomic64_add_unless arch_atomic64_add_unless
@@ -206,8 +224,10 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
- alternative_atomic64(inc_not_zero, "=&a" (r),
- "S" (v) : "ecx", "edx", "memory");
+ alternative_atomic64(inc_not_zero,
+ "=&a" (r),
+ "S" (v),
+ "ecx", "edx", "memory");
return r;
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
@@ -215,8 +235,10 @@ static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 r;
- alternative_atomic64(dec_if_positive, "=&A" (r),
- "S" (v) : "ecx", "memory");
+ alternative_atomic64(dec_if_positive,
+ "=&A" (r),
+ "S" (v),
+ "ecx", "memory");
return r;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index ae12acae5b06..87b496325b5b 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -22,14 +22,14 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "addq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "addq %1, %0"
: "=m" (v->counter)
: "er" (i), "m" (v->counter) : "memory");
}
static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "subq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "subq %1, %0"
: "=m" (v->counter)
: "er" (i), "m" (v->counter) : "memory");
}
@@ -42,7 +42,7 @@ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "incq %0"
+ asm_inline volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
: "m" (v->counter) : "memory");
}
@@ -50,7 +50,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "decq %0"
+ asm_inline volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
: "m" (v->counter) : "memory");
}
@@ -110,7 +110,7 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "andq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "andq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
@@ -128,7 +128,7 @@ static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "orq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "orq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
@@ -146,7 +146,7 @@ static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "xorq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b96d45944c59..100413aff640 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -52,12 +52,12 @@ static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "orb %b1,%0"
+ asm_inline volatile(LOCK_PREFIX "orb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" (CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -72,11 +72,11 @@ static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "andb %b1,%0"
+ asm_inline volatile(LOCK_PREFIX "andb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" (~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -98,7 +98,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "xorb %2,%1"
+ asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), WBYTE_ADDR(addr)
: "iq" ((char)mask) : "memory");
@@ -122,11 +122,11 @@ static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "xorb %b1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" (CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 3e5b111e619d..3f02ff6d333d 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -74,7 +74,7 @@
# define BOOT_STACK_SIZE 0x1000
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern unsigned int output_len;
extern const unsigned long kernel_text_size;
extern const unsigned long kernel_total_size;
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index e85ac0c7c039..f0e9acf72547 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -17,13 +17,17 @@
* In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
*/
#define INSN_ASOP 0x67
+#define INSN_LOCK 0xf0
#define OPCODE_ESCAPE 0x0f
#define SECOND_BYTE_OPCODE_UD1 0xb9
#define SECOND_BYTE_OPCODE_UD2 0x0b
#define BUG_NONE 0xffff
-#define BUG_UD1 0xfffe
-#define BUG_UD2 0xfffd
+#define BUG_UD2 0xfffe
+#define BUG_UD1 0xfffd
+#define BUG_UD1_UBSAN 0xfffc
+#define BUG_EA 0xffea
+#define BUG_LOCK 0xfff0
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 31d19c815f99..3e51ba459154 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -101,6 +101,16 @@ enum cfi_mode {
extern enum cfi_mode cfi_mode;
+#ifdef CONFIG_FINEIBT_BHI
+extern bool cfi_bhi;
+#else
+#define cfi_bhi (0)
+#endif
+
+typedef u8 bhi_thunk[32];
+extern bhi_thunk __bhi_args[];
+extern bhi_thunk __bhi_args_end[];
+
struct pt_regs;
#ifdef CONFIG_CFI_CLANG
@@ -125,6 +135,18 @@ static inline int cfi_get_offset(void)
#define cfi_get_offset cfi_get_offset
extern u32 cfi_get_func_hash(void *func);
+extern int cfi_get_func_arity(void *func);
+
+#ifdef CONFIG_FINEIBT
+extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type);
+#else
+static inline bool
+decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ return false;
+}
+
+#endif
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
@@ -137,6 +159,10 @@ static inline u32 cfi_get_func_hash(void *func)
{
return 0;
}
+static inline int cfi_get_func_arity(void *func)
+{
+ return 0;
+}
#endif /* CONFIG_CFI_CLANG */
#if HAS_KERNEL_IBT == 1
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index fd8afc1f5f6b..b61f32c3459f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -44,22 +44,22 @@ extern void __add_wrong_size(void)
__typeof__ (*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
- asm volatile (lock #op "b %b0, %1\n" \
+ asm_inline volatile (lock #op "b %b0, %1" \
: "+q" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
- asm volatile (lock #op "w %w0, %1\n" \
+ asm_inline volatile (lock #op "w %w0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
- asm volatile (lock #op "l %0, %1\n" \
+ asm_inline volatile (lock #op "l %0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
- asm volatile (lock #op "q %q0, %1\n" \
+ asm_inline volatile (lock #op "q %q0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
@@ -91,7 +91,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
- asm volatile(lock "cmpxchgb %2,%1" \
+ asm_inline volatile(lock "cmpxchgb %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
@@ -100,7 +100,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
- asm volatile(lock "cmpxchgw %2,%1" \
+ asm_inline volatile(lock "cmpxchgw %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -109,7 +109,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
- asm volatile(lock "cmpxchgl %2,%1" \
+ asm_inline volatile(lock "cmpxchgl %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -118,7 +118,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
- asm volatile(lock "cmpxchgq %2,%1" \
+ asm_inline volatile(lock "cmpxchgq %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -165,7 +165,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
- asm volatile(lock "cmpxchgb %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -177,7 +177,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
- asm volatile(lock "cmpxchgw %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -189,7 +189,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
- asm volatile(lock "cmpxchgl %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -201,7 +201,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
- asm volatile(lock "cmpxchgq %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 8806c646d452..371f7906019e 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -19,7 +19,7 @@ union __u64_halves {
union __u64_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
- asm volatile(_lock "cmpxchg8b %[ptr]" \
+ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
@@ -45,7 +45,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new
n = { .full = (_new), }; \
bool ret; \
\
- asm volatile(_lock "cmpxchg8b %[ptr]" \
+ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
@@ -69,7 +69,7 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
return __arch_try_cmpxchg64(ptr, oldp, new,);
}
-#ifdef CONFIG_X86_CMPXCHG64
+#ifdef CONFIG_X86_CX8
#define arch_cmpxchg64 __cmpxchg64
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 5e241306db26..71d1e72ed879 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -38,7 +38,7 @@ union __u128_halves {
union __u128_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
- asm volatile(_lock "cmpxchg16b %[ptr]" \
+ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
@@ -65,7 +65,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
n = { .full = (_new), }; \
bool ret; \
\
- asm volatile(_lock "cmpxchg16b %[ptr]" \
+ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 98eced5084ca..ad235dda1ded 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -12,7 +12,6 @@
#ifndef CONFIG_SMP
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
#define cpu_acpi_id(cpu) 0
-#define safe_smp_processor_id() 0
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
@@ -50,20 +49,6 @@ static inline void split_lock_init(void) {}
static inline void bus_lock_init(void) {}
#endif
-#ifdef CONFIG_CPU_SUP_INTEL
-u8 get_this_hybrid_cpu_type(void);
-u32 get_this_hybrid_cpu_native_id(void);
-#else
-static inline u8 get_this_hybrid_cpu_type(void)
-{
- return 0;
-}
-
-static inline u32 get_this_hybrid_cpu_native_id(void)
-{
- return 0;
-}
-#endif
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index ba32e0f44cba..6be777a06944 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -57,7 +57,7 @@
#define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0)
/**
- * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
+ * X86_MATCH_CPU - Base macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@_vendor
* @_family: The family number or X86_FAMILY_ANY
@@ -74,47 +74,18 @@
* into another macro at the usage site for good reasons, then please
* start this local macro with X86_MATCH to allow easy grepping.
*/
-#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
- _steppings, _feature, _data) { \
- .vendor = X86_VENDOR_##_vendor, \
- .family = _family, \
- .model = _model, \
- .steppings = _steppings, \
- .feature = _feature, \
- .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \
- .driver_data = (unsigned long) _data \
-}
-
-#define X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
- _steppings, _feature, _data) { \
+#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \
.vendor = _vendor, \
.family = _family, \
.model = _model, \
.steppings = _steppings, \
.feature = _feature, \
.flags = X86_CPU_ID_FLAG_ENTRY_VALID, \
+ .type = _type, \
.driver_data = (unsigned long) _data \
}
/**
- * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching
- * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
- * The name is expanded to X86_VENDOR_@_vendor
- * @_family: The family number or X86_FAMILY_ANY
- * @_model: The model number, model constant or X86_MODEL_ANY
- * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
- * @_data: Driver specific data or NULL. The internal storage
- * format is unsigned long. The supplied value, pointer
- * etc. is casted to unsigned long internally.
- *
- * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is
- * set to wildcards.
- */
-#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \
- X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \
- X86_STEPPING_ANY, feature, data)
-
-/**
* X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
@@ -123,13 +94,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \
- X86_MODEL_ANY, feature, data)
+#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
@@ -139,12 +107,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
- X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data)
+#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_FEATURE - Macro for matching a CPU feature
@@ -152,12 +118,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_FEATURE(feature, data) \
- X86_MATCH_VENDOR_FEATURE(ANY, feature, data)
+#define X86_MATCH_FEATURE(feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
@@ -168,13 +132,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \
- X86_FEATURE_ANY, data)
+#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY, \
+ X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VENDOR_FAM - Match vendor and family
@@ -184,12 +145,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set of wildcards.
*/
-#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
- X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
+#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VFM - Match encoded vendor/family/model
@@ -197,34 +156,26 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is cast to unsigned long internally.
- *
- * Stepping and feature are set to wildcards
*/
-#define X86_MATCH_VFM(vfm, data) \
- X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \
- VFM_VENDOR(vfm), \
- VFM_FAMILY(vfm), \
- VFM_MODEL(vfm), \
- X86_STEPPING_ANY, X86_FEATURE_ANY, data)
+#define X86_MATCH_VFM(vfm, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
#define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
/**
- * X86_MATCH_VFM_STEPPINGS - Match encoded vendor/family/model/stepping
+ * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings
+ * range.
* @vfm: Encoded 8-bits each for vendor, family, model
- * @steppings: Bitmask of steppings to match
+ * @min_step: Lowest stepping number to match
+ * @max_step: Highest stepping number to match
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is cast to unsigned long internally.
- *
- * feature is set to wildcard
*/
-#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \
- X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \
- VFM_VENDOR(vfm), \
- VFM_FAMILY(vfm), \
- VFM_MODEL(vfm), \
- __X86_STEPPINGS(min_step, max_step), \
- X86_FEATURE_ANY, data)
+#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY, \
+ X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature
@@ -233,15 +184,22 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is cast to unsigned long internally.
- *
- * Steppings is set to wildcard
*/
-#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \
- X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \
- VFM_VENDOR(vfm), \
- VFM_FAMILY(vfm), \
- VFM_MODEL(vfm), \
- X86_STEPPING_ANY, feature, data)
+#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @type: CPU type e.g. P-core, E-core
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, type, data)
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index de1ad09fe8d7..893cbca37fe9 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,11 +4,12 @@
#include <asm/processor.h>
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
#include <asm/asm.h>
#include <linux/bitops.h>
#include <asm/alternative.h>
+#include <asm/cpufeaturemasks.h>
enum cpuid_leafs
{
@@ -37,92 +38,19 @@ enum cpuid_leafs
NR_CPUID_WORDS,
};
-#define X86_CAP_FMT_NUM "%d:%d"
-#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
-
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
-#define X86_CAP_FMT "%s"
-#define x86_cap_flag(flag) x86_cap_flags[flag]
/*
* In order to save room, we index into this array by doing
* X86_BUG_<name> - NCAPINTS*32.
*/
extern const char * const x86_bug_flags[NBUGINTS*32];
+#define x86_bug_flag(flag) x86_bug_flags[flag]
#define test_cpu_cap(c, bit) \
arch_test_bit(bit, (unsigned long *)((c)->x86_capability))
-/*
- * There are 32 bits/features in each mask word. The high bits
- * (selected with (bit>>5) give us the word number and the low 5
- * bits give us the bit/feature number inside the word.
- * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
- * see if it is set in the mask word.
- */
-#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
- (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
-
-/*
- * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
- * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
- * header macros which use NCAPINTS need to be changed. The duplicated macro
- * use causes the compiler to issue errors for all headers so that all usage
- * sites can be corrected.
- */
-#define REQUIRED_MASK_BIT_SET(feature_bit) \
- ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
- REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 22))
-
-#define DISABLED_MASK_BIT_SET(feature_bit) \
- ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
- DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 22))
-
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))
@@ -149,6 +77,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
extern void setup_clear_cpu_cap(unsigned int bit);
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
+void check_cpufeature_deps(struct cpuinfo_x86 *c);
#define setup_force_cpu_cap(bit) do { \
\
@@ -208,5 +137,5 @@ t_no:
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 508c0dad116b..c0462be0c5f6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -2,14 +2,6 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
/*
* Defines x86 CPU feature bits
*/
@@ -338,6 +330,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
index b2b9b4ef3dae..d5749b25fa10 100644
--- a/arch/x86/include/asm/cpuid.h
+++ b/arch/x86/include/asm/cpuid.h
@@ -1,222 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * CPUID-related helpers/definitions
- */
#ifndef _ASM_X86_CPUID_H
#define _ASM_X86_CPUID_H
-#include <linux/types.h>
-
-#include <asm/string.h>
-
-struct cpuid_regs {
- u32 eax, ebx, ecx, edx;
-};
-
-enum cpuid_regs_idx {
- CPUID_EAX = 0,
- CPUID_EBX,
- CPUID_ECX,
- CPUID_EDX,
-};
-
-#define CPUID_LEAF_MWAIT 0x5
-#define CPUID_LEAF_DCA 0x9
-#define CPUID_LEAF_XSTATE 0x0d
-#define CPUID_LEAF_TSC 0x15
-#define CPUID_LEAF_FREQ 0x16
-#define CPUID_LEAF_TILE 0x1d
-
-#ifdef CONFIG_X86_32
-bool have_cpuid_p(void);
-#else
-static inline bool have_cpuid_p(void)
-{
- return true;
-}
-#endif
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
-}
-
-#define native_cpuid_reg(reg) \
-static inline unsigned int native_cpuid_##reg(unsigned int op) \
-{ \
- unsigned int eax = op, ebx, ecx = 0, edx; \
- \
- native_cpuid(&eax, &ebx, &ecx, &edx); \
- \
- return reg; \
-}
-
-/*
- * Native CPUID functions returning a single datum.
- */
-native_cpuid_reg(eax)
-native_cpuid_reg(ebx)
-native_cpuid_reg(ecx)
-native_cpuid_reg(edx)
-
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/paravirt.h>
-#else
-#define __cpuid native_cpuid
-#endif
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return eax;
-}
-
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ebx;
-}
-
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ecx;
-}
-
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return edx;
-}
-
-static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs)
-{
- regs[CPUID_EAX] = leaf;
- regs[CPUID_ECX] = subleaf;
- __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
-}
-
-#define cpuid_subleaf(leaf, subleaf, regs) { \
- static_assert(sizeof(*(regs)) == 16); \
- __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
-}
-
-#define cpuid_leaf(leaf, regs) { \
- static_assert(sizeof(*(regs)) == 16); \
- __cpuid_read(leaf, 0, (u32 *)(regs)); \
-}
-
-static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf,
- enum cpuid_regs_idx regidx, u32 *reg)
-{
- u32 regs[4];
-
- __cpuid_read(leaf, subleaf, regs);
- *reg = regs[regidx];
-}
-
-#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
- static_assert(sizeof(*(reg)) == 4); \
- __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
-}
-
-#define cpuid_leaf_reg(leaf, regidx, reg) { \
- static_assert(sizeof(*(reg)) == 4); \
- __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
-}
-
-static __always_inline bool cpuid_function_is_indexed(u32 function)
-{
- switch (function) {
- case 4:
- case 7:
- case 0xb:
- case 0xd:
- case 0xf:
- case 0x10:
- case 0x12:
- case 0x14:
- case 0x17:
- case 0x18:
- case 0x1d:
- case 0x1e:
- case 0x1f:
- case 0x24:
- case 0x8000001d:
- return true;
- }
-
- return false;
-}
-
-#define for_each_possible_hypervisor_cpuid_base(function) \
- for (function = 0x40000000; function < 0x40010000; function += 0x100)
-
-static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
-{
- uint32_t base, eax, signature[3];
-
- for_each_possible_hypervisor_cpuid_base(base) {
- cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
-
- /*
- * This must not compile to "call memcmp" because it's called
- * from PVH early boot code before instrumentation is set up
- * and memcmp() itself may be instrumented.
- */
- if (!__builtin_memcmp(sig, signature, 12) &&
- (leaves == 0 || ((eax - base) >= leaves)))
- return base;
- }
-
- return 0;
-}
+#include <asm/cpuid/api.h>
#endif /* _ASM_X86_CPUID_H */
diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
new file mode 100644
index 000000000000..9c180c9cc58e
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/api.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_API_H
+#define _ASM_X86_CPUID_API_H
+
+#include <asm/cpuid/types.h>
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+#include <asm/string.h>
+
+/*
+ * Raw CPUID accessors:
+ */
+
+#ifdef CONFIG_X86_32
+bool have_cpuid_p(void);
+#else
+static inline bool have_cpuid_p(void)
+{
+ return true;
+}
+#endif
+
+static inline void native_cpuid(u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+#define NATIVE_CPUID_REG(reg) \
+static inline u32 native_cpuid_##reg(u32 op) \
+{ \
+ u32 eax = op, ebx, ecx = 0, edx; \
+ \
+ native_cpuid(&eax, &ebx, &ecx, &edx); \
+ \
+ return reg; \
+}
+
+/*
+ * Native CPUID functions returning a single datum:
+ */
+NATIVE_CPUID_REG(eax)
+NATIVE_CPUID_REG(ebx)
+NATIVE_CPUID_REG(ecx)
+NATIVE_CPUID_REG(edx)
+
+#ifdef CONFIG_PARAVIRT_XXL
+# include <asm/paravirt.h>
+#else
+# define __cpuid native_cpuid
+#endif
+
+/*
+ * Generic CPUID function
+ *
+ * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(u32 op,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ECX */
+static inline void cpuid_count(u32 op, int count,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum:
+ */
+
+static inline u32 cpuid_eax(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return eax;
+}
+
+static inline u32 cpuid_ebx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ebx;
+}
+
+static inline u32 cpuid_ecx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ecx;
+}
+
+static inline u32 cpuid_edx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return edx;
+}
+
+static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs)
+{
+ regs[CPUID_EAX] = leaf;
+ regs[CPUID_ECX] = subleaf;
+ __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
+}
+
+#define cpuid_subleaf(leaf, subleaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
+}
+
+#define cpuid_leaf(leaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, 0, (u32 *)(regs)); \
+}
+
+static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
+ enum cpuid_regs_idx regidx, u32 *reg)
+{
+ u32 regs[4];
+
+ __cpuid_read(leaf, subleaf, regs);
+ *reg = regs[regidx];
+}
+
+#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
+}
+
+#define cpuid_leaf_reg(leaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
+}
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+ switch (function) {
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0xf:
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x17:
+ case 0x18:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
+ case 0x24:
+ case 0x8000001d:
+ return true;
+ }
+
+ return false;
+}
+
+#define for_each_possible_hypervisor_cpuid_base(function) \
+ for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
+static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves)
+{
+ u32 base, eax, signature[3];
+
+ for_each_possible_hypervisor_cpuid_base(base) {
+ cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
+
+ /*
+ * This must not compile to "call memcmp" because it's called
+ * from PVH early boot code before instrumentation is set up
+ * and memcmp() itself may be instrumented.
+ */
+ if (!__builtin_memcmp(sig, signature, 12) &&
+ (leaves == 0 || ((eax - base) >= leaves)))
+ return base;
+ }
+
+ return 0;
+}
+
+#endif /* _ASM_X86_CPUID_API_H */
diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
new file mode 100644
index 000000000000..8582e27e836d
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_TYPES_H
+#define _ASM_X86_CPUID_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * Types for raw CPUID access:
+ */
+
+struct cpuid_regs {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+};
+
+enum cpuid_regs_idx {
+ CPUID_EAX = 0,
+ CPUID_EBX,
+ CPUID_ECX,
+ CPUID_EDX,
+};
+
+#define CPUID_LEAF_MWAIT 0x05
+#define CPUID_LEAF_DCA 0x09
+#define CPUID_LEAF_XSTATE 0x0d
+#define CPUID_LEAF_TSC 0x15
+#define CPUID_LEAF_FREQ 0x16
+#define CPUID_LEAF_TILE 0x1d
+
+#endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 4acfd57de8f1..70f6b60ad67b 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPUMASK_H
#define _ASM_X86_CPUMASK_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
extern void setup_cpu_local_masks(void);
@@ -34,5 +34,5 @@ static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp
#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu))
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index bf5953883ec3..cc4a3f725b37 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -5,52 +5,28 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cache.h>
#include <asm/percpu.h>
struct task_struct;
-struct pcpu_hot {
- union {
- struct {
- struct task_struct *current_task;
- int preempt_count;
- int cpu_number;
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
- u64 call_depth;
-#endif
- unsigned long top_of_stack;
- void *hardirq_stack_ptr;
- u16 softirq_pending;
-#ifdef CONFIG_X86_64
- bool hardirq_stack_inuse;
-#else
- void *softirq_stack_ptr;
-#endif
- };
- u8 pad[64];
- };
-};
-static_assert(sizeof(struct pcpu_hot) == 64);
-
-DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
-
-/* const-qualified alias to pcpu_hot, aliased by linker. */
-DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
- const_pcpu_hot);
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct *, current_task);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct * const __percpu_seg_override,
+ const_current_task);
static __always_inline struct task_struct *get_current(void)
{
if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
- return this_cpu_read_const(const_pcpu_hot.current_task);
+ return this_cpu_read_const(const_current_task);
- return this_cpu_read_stable(pcpu_hot.current_task);
+ return this_cpu_read_stable(current_task);
}
#define current get_current()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 62dc9f59ea76..ec95fe44fa3a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -46,7 +46,6 @@ struct gdt_page {
} __attribute__((aligned(PAGE_SIZE)));
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-DECLARE_INIT_PER_CPU(gdt_page);
/* Provide the original GDT */
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index d440a65af8f3..7e6b9314758a 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -58,7 +58,7 @@
#define DESC_USER (_DESC_DPL(3))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -166,7 +166,7 @@ struct desc_ptr {
unsigned long address;
} __attribute__((packed)) ;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* Boot IDT definitions */
#define BOOT_IDT_ENTRIES 32
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index c492bdc97b05..000000000000
--- a/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef. Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_UMIP
-# define DISABLE_UMIP 0
-#else
-# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID 0
-#else
-# define DISABLE_VME 0
-# define DISABLE_K6_MTRR 0
-# define DISABLE_CYRIX_ARR 0
-# define DISABLE_CENTAUR_MCR 0
-# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU 0
-# define DISABLE_OSPKE 0
-#else
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57 0
-#else
-# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI 0
-#else
-# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETPOLINE
-# define DISABLE_RETPOLINE 0
-#else
-# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETHUNK
-# define DISABLE_RETHUNK 0
-#else
-# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_UNRET_ENTRY
-# define DISABLE_UNRET 0
-#else
-# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
-# define DISABLE_CALL_DEPTH_TRACKING 0
-#else
-# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
-#endif
-
-#ifdef CONFIG_ADDRESS_MASKING
-# define DISABLE_LAM 0
-#else
-# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
-#endif
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
-
-#ifdef CONFIG_X86_SGX
-# define DISABLE_SGX 0
-#else
-# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
-#endif
-
-#ifdef CONFIG_XEN_PV
-# define DISABLE_XENPV 0
-#else
-# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31))
-#endif
-
-#ifdef CONFIG_INTEL_TDX_GUEST
-# define DISABLE_TDX_GUEST 0
-#else
-# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
-#endif
-
-#ifdef CONFIG_X86_USER_SHADOW_STACK
-#define DISABLE_USER_SHSTK 0
-#else
-#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
-#endif
-
-#ifdef CONFIG_X86_KERNEL_IBT
-#define DISABLE_IBT 0
-#else
-#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
-#endif
-
-#ifdef CONFIG_X86_FRED
-# define DISABLE_FRED 0
-#else
-# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
-#endif
-
-#ifdef CONFIG_KVM_AMD_SEV
-#define DISABLE_SEV_SNP 0
-#else
-#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0 (DISABLE_VME)
-#define DISABLED_MASK1 0
-#define DISABLED_MASK2 0
-#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 (DISABLE_PCID)
-#define DISABLED_MASK5 0
-#define DISABLED_MASK6 0
-#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
-#define DISABLED_MASK9 (DISABLE_SGX)
-#define DISABLED_MASK10 0
-#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13 0
-#define DISABLED_MASK14 0
-#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
- DISABLE_ENQCMD)
-#define DISABLED_MASK17 0
-#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 (DISABLE_SEV_SNP)
-#define DISABLED_MASK20 0
-#define DISABLED_MASK21 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 430fca13bb56..302e11b15da8 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_DWARF2_H
#define _ASM_X86_DWARF2_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#warning "asm/dwarf2.h should be only included in pure assembly files"
#endif
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1fb83d47711f..128602612eca 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -54,8 +54,9 @@ typedef struct user_i387_struct elf_fpregset_t;
#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
#define R_X86_64_RELATIVE 8 /* Adjust by program base */
-#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative
- offset to GOT */
+#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */
+#define R_X86_64_GOTPCRELX 41
+#define R_X86_64_REX_GOTPCRELX 42
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
#define R_X86_64_16 12 /* Direct 16 bit zero extended */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index d0dcefb5cc59..4519c9f35ba0 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -31,7 +31,7 @@
/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
#define FIXMAP_PMD_TOP 507
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kernel.h>
#include <asm/apicdef.h>
#include <asm/page.h>
@@ -196,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
void __early_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index fb42659f6e98..0ab65073c1cc 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -11,7 +11,7 @@
#ifdef CONFIG_FRAME_POINTER
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro FRAME_BEGIN
push %_ASM_BP
@@ -51,7 +51,7 @@
.endm
#endif /* CONFIG_X86_64 */
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#define FRAME_BEGIN \
"push %" _ASM_BP "\n" \
@@ -82,18 +82,18 @@ static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro ENCODE_FRAME_POINTER ptregs_offset=0
.endm
-#else /* !__ASSEMBLY */
+#else /* !__ASSEMBLER__ */
#define ENCODE_FRAME_POINTER
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 25ca00bd70e8..2a29e5216881 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -32,7 +32,7 @@
#define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9)
#define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_FRED
#include <linux/kernel.h>
@@ -113,6 +113,6 @@ static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) {
static inline void fred_sync_rsp0(unsigned long rsp0) { }
static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 9e7e8ca8e299..02f239569b93 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -2,7 +2,7 @@
#ifndef _ASM_FSGSBASE_H
#define _ASM_FSGSBASE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_64
@@ -80,6 +80,6 @@ extern unsigned long x86_fsgsbase_read_task(struct task_struct *task,
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_FSGSBASE_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index f9cb4d07df58..93156ac4ffe0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -22,7 +22,7 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
@@ -36,21 +36,9 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
{
-#ifdef CONFIG_X86_KERNEL_IBT
- u32 instr;
-
- /* We want to be extra safe in case entry ip is on the page edge,
- * but otherwise we need to avoid get_kernel_nofault()'s overhead.
- */
- if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
- if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
- return fentry_ip;
- } else {
- instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
- }
- if (is_endbr(instr))
+ if (is_endbr((void*)(fentry_ip - ENDBR_INSN_SIZE)))
fentry_ip -= ENDBR_INSN_SIZE;
-#endif
+
return fentry_ip;
}
#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
@@ -118,11 +106,11 @@ struct dyn_arch_ftrace {
};
#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_FUNCTION_TRACER */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
unsigned long frame_pointer);
@@ -166,6 +154,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
}
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
#endif /* !COMPILE_OFFSETS */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 6ffa8b75f4cd..f00c09ffe6a9 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -3,7 +3,6 @@
#define _ASM_X86_HARDIRQ_H
#include <linux/threads.h>
-#include <asm/current.h>
typedef struct {
#if IS_ENABLED(CONFIG_KVM_INTEL)
@@ -66,7 +65,8 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
-#define local_softirq_pending_ref pcpu_hot.softirq_pending
+DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
+#define local_softirq_pending_ref __softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
/*
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index edebf1020e04..162ebd73a698 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,7 +16,7 @@
#include <asm/irq_vectors.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/percpu.h>
#include <linux/profile.h>
@@ -128,6 +128,6 @@ extern char spurious_entries_start[];
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif /* !ASSEMBLY_ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_HW_IRQ_H */
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
index 1e59581d500c..28d845257303 100644
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -21,7 +21,7 @@
#define HAS_KERNEL_IBT 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_64
#define ASM_ENDBR "endbr64\n\t"
@@ -41,7 +41,7 @@
_ASM_PTR fname "\n\t" \
".popsection\n\t"
-static inline __attribute_const__ u32 gen_endbr(void)
+static __always_inline __attribute_const__ u32 gen_endbr(void)
{
u32 endbr;
@@ -56,7 +56,7 @@ static inline __attribute_const__ u32 gen_endbr(void)
return endbr;
}
-static inline __attribute_const__ u32 gen_endbr_poison(void)
+static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
{
/*
* 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
@@ -65,19 +65,24 @@ static inline __attribute_const__ u32 gen_endbr_poison(void)
return 0x001f0f66; /* osp nopl (%rax) */
}
-static inline bool is_endbr(u32 val)
+static inline bool __is_endbr(u32 val)
{
if (val == gen_endbr_poison())
return true;
+ /* See cfi_fineibt_bhi_preamble() */
+ if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5)
+ return true;
+
val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
return val == gen_endbr();
}
+extern __noendbr bool is_endbr(u32 *val);
extern __noendbr u64 ibt_save(bool disable);
extern __noendbr void ibt_restore(u64 save);
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_X86_64
#define ENDBR endbr64
@@ -85,29 +90,29 @@ extern __noendbr void ibt_restore(u64 save);
#define ENDBR endbr32
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#else /* !IBT */
#define HAS_KERNEL_IBT 0
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define ASM_ENDBR
#define IBT_NOSEAL(name)
#define __noendbr
-static inline bool is_endbr(u32 val) { return false; }
+static inline bool is_endbr(u32 *val) { return false; }
static inline u64 ibt_save(bool disable) { return 0; }
static inline void ibt_restore(u64 save) { }
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define ENDBR
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_X86_KERNEL_IBT */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index ad5c68f0509d..a4ec27c67988 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -7,7 +7,7 @@
#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/entry-common.h>
#include <linux/hardirq.h>
@@ -474,7 +474,7 @@ static inline void fred_install_sysvec(unsigned int vector, const idtentry_t fun
idt_install_sysvec(vector, asm_##function); \
}
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
/*
* The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs.
@@ -579,7 +579,7 @@ SYM_CODE_START(spurious_entries_start)
SYM_CODE_END(spurious_entries_start)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* The actual entry points. Note that DECLARE_IDTENTRY*() serves two
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 0e82ebc5d1e1..8b1b1abcef15 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,7 +2,11 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
+#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
+#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector
+#else
#define __head __section(".head.text") __no_sanitize_undefined
+#endif
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index 438ccd4f3cc4..e48a00b3311d 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -6,7 +6,7 @@
#ifndef X86_ASM_INST_H
#define X86_ASM_INST_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define REG_NUM_INVALID 100
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 6d7b04ffc5fd..3a97a7eefb51 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -45,7 +45,18 @@
/* Wildcard match so X86_MATCH_VFM(ANY) works */
#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
+/* Family 5 */
+#define INTEL_FAM5_START IFM(5, 0x00) /* Notational marker, also P5 A-step */
+#define INTEL_PENTIUM_75 IFM(5, 0x02) /* P54C */
+#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */
+#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+
+/* Family 6 */
#define INTEL_PENTIUM_PRO IFM(6, 0x01)
+#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03)
+#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05)
+#define INTEL_PENTIUM_III_TUALATIN IFM(6, 0x0B)
+#define INTEL_PENTIUM_M_DOTHAN IFM(6, 0x0D)
#define INTEL_CORE_YONAH IFM(6, 0x0E)
@@ -110,9 +121,9 @@
#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */
-#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF)
+#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) /* Raptor Cove */
-#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD)
+#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */
#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
/* "Hybrid" Processors (P-Core/E-Core) */
@@ -126,16 +137,16 @@
#define INTEL_RAPTORLAKE_P IFM(6, 0xBA)
#define INTEL_RAPTORLAKE_S IFM(6, 0xBF)
-#define INTEL_METEORLAKE IFM(6, 0xAC)
+#define INTEL_METEORLAKE IFM(6, 0xAC) /* Redwood Cove / Crestmont */
#define INTEL_METEORLAKE_L IFM(6, 0xAA)
-#define INTEL_ARROWLAKE_H IFM(6, 0xC5)
+#define INTEL_ARROWLAKE_H IFM(6, 0xC5) /* Lion Cove / Skymont */
#define INTEL_ARROWLAKE IFM(6, 0xC6)
#define INTEL_ARROWLAKE_U IFM(6, 0xB5)
-#define INTEL_LUNARLAKE_M IFM(6, 0xBD)
+#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */
-#define INTEL_PANTHERLAKE_L IFM(6, 0xCC)
+#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */
/* "Small Core" Processors (Atom/E-Core) */
@@ -149,9 +160,9 @@
#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */
#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */
#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */
+#define INTEL_ATOM_SILVERMONT_MID2 IFM(6, 0x5A) /* Anniedale */
#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */
-#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */
#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */
#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */
@@ -176,16 +187,35 @@
#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */
#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
-/* Family 5 */
-#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+/* Notational marker denoting the last Family 6 model */
+#define INTEL_FAM6_LAST IFM(6, 0xFF)
+
+/* Family 15 - NetBurst */
+#define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */
+#define INTEL_P4_PRESCOTT IFM(15, 0x03)
+#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04)
+#define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */
/* Family 19 */
#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */
-/* CPU core types */
+/*
+ * Intel CPU core types
+ *
+ * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
+ * of the core. Bits 31-24 indicates its core type (Core or Atom)
+ * and Bits [23:0] indicates the native model ID of the core.
+ * Core type and native model ID are defined in below enumerations.
+ */
enum intel_cpu_type {
+ INTEL_CPU_TYPE_UNKNOWN,
INTEL_CPU_TYPE_ATOM = 0x20,
INTEL_CPU_TYPE_CORE = 0x40,
};
+enum intel_native_id {
+ INTEL_ATOM_CMT_NATIVE_ID = 0x2, /* Crestmont */
+ INTEL_ATOM_SKT_NATIVE_ID = 0x3, /* Skymont */
+};
+
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ed580c7f9d0a..1a0dc2b2bf5b 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -175,6 +175,9 @@ extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, un
extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
#define ioremap_encrypted ioremap_encrypted
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
+#define arch_memremap_wb arch_memremap_wb
+
/**
* ioremap - map bus memory into CPU space
* @offset: bus address of the memory
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 562a547c29a5..735c3a491f60 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -116,7 +116,7 @@
ASM_CALL_ARG2
#define call_on_irqstack(func, asm_call, argconstr...) \
- call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \
+ call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
func, asm_call, argconstr)
/* Macros to assert type correctness for run_*_on_irqstack macros */
@@ -135,7 +135,7 @@
* User mode entry and interrupt on the irq stack do not \
* switch stacks. If from user mode the task stack is empty. \
*/ \
- if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \
+ if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \
irq_enter_rcu(); \
func(c_args); \
irq_exit_rcu(); \
@@ -146,9 +146,9 @@
* places. Invoke the stack switch macro with the call \
* sequence which matches the above direct invocation. \
*/ \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
+ __this_cpu_write(hardirq_stack_inuse, true); \
call_on_irqstack(func, asm_call, constr); \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
} \
}
@@ -212,9 +212,9 @@
*/
#define do_softirq_own_stack() \
{ \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
+ __this_cpu_write(hardirq_stack_inuse, true); \
call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
}
#endif
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index cf7fc2b8e3ce..abb8374c9ff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -4,7 +4,7 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/nospec-branch.h>
@@ -79,7 +79,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags)
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
static __always_inline unsigned long arch_local_save_flags(void)
@@ -133,10 +133,10 @@ static __always_inline unsigned long arch_local_irq_save(void)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_PARAVIRT_XXL */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static __always_inline int arch_irqs_disabled_flags(unsigned long flags)
{
return !(flags & X86_EFLAGS_IF);
@@ -154,6 +154,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_enable();
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 3f1c1d6c0da1..61dd1dee7812 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -7,7 +7,7 @@
#include <asm/asm.h>
#include <asm/nops.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stringify.h>
#include <linux/types.h>
@@ -55,6 +55,6 @@ l_yes:
extern int arch_jump_entry_size(struct jump_entry *entry);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index de75306b932e..d7e33c7f096b 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -23,7 +23,7 @@
(1ULL << (__VIRTUAL_MASK_SHIFT - \
KASAN_SHADOW_SCALE_SHIFT)))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_KASAN
void __init kasan_early_init(void);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 8ad187462b68..5432457d2338 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -13,11 +13,12 @@
# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/string.h>
#include <linux/kernel.h>
+#include <asm/asm.h>
#include <asm/page.h>
#include <asm/ptrace.h>
@@ -71,41 +72,32 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
} else {
+ asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx));
+ asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx));
+ asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx));
+ asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si));
+ asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di));
+ asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp));
+ asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax));
+ asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp));
+#ifdef CONFIG_X86_64
+ asm volatile("mov %%r8,%0" : "=m"(newregs->r8));
+ asm volatile("mov %%r9,%0" : "=m"(newregs->r9));
+ asm volatile("mov %%r10,%0" : "=m"(newregs->r10));
+ asm volatile("mov %%r11,%0" : "=m"(newregs->r11));
+ asm volatile("mov %%r12,%0" : "=m"(newregs->r12));
+ asm volatile("mov %%r13,%0" : "=m"(newregs->r13));
+ asm volatile("mov %%r14,%0" : "=m"(newregs->r14));
+ asm volatile("mov %%r15,%0" : "=m"(newregs->r15));
+#endif
+ asm volatile("mov %%ss,%k0" : "=a"(newregs->ss));
+ asm volatile("mov %%cs,%k0" : "=a"(newregs->cs));
#ifdef CONFIG_X86_32
- asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
- asm volatile("movl %%ecx,%0" : "=m"(newregs->cx));
- asm volatile("movl %%edx,%0" : "=m"(newregs->dx));
- asm volatile("movl %%esi,%0" : "=m"(newregs->si));
- asm volatile("movl %%edi,%0" : "=m"(newregs->di));
- asm volatile("movl %%ebp,%0" : "=m"(newregs->bp));
- asm volatile("movl %%eax,%0" : "=m"(newregs->ax));
- asm volatile("movl %%esp,%0" : "=m"(newregs->sp));
- asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
- asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
- asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds));
- asm volatile("movl %%es, %%eax;" :"=a"(newregs->es));
- asm volatile("pushfl; popl %0" :"=m"(newregs->flags));
-#else
- asm volatile("movq %%rbx,%0" : "=m"(newregs->bx));
- asm volatile("movq %%rcx,%0" : "=m"(newregs->cx));
- asm volatile("movq %%rdx,%0" : "=m"(newregs->dx));
- asm volatile("movq %%rsi,%0" : "=m"(newregs->si));
- asm volatile("movq %%rdi,%0" : "=m"(newregs->di));
- asm volatile("movq %%rbp,%0" : "=m"(newregs->bp));
- asm volatile("movq %%rax,%0" : "=m"(newregs->ax));
- asm volatile("movq %%rsp,%0" : "=m"(newregs->sp));
- asm volatile("movq %%r8,%0" : "=m"(newregs->r8));
- asm volatile("movq %%r9,%0" : "=m"(newregs->r9));
- asm volatile("movq %%r10,%0" : "=m"(newregs->r10));
- asm volatile("movq %%r11,%0" : "=m"(newregs->r11));
- asm volatile("movq %%r12,%0" : "=m"(newregs->r12));
- asm volatile("movq %%r13,%0" : "=m"(newregs->r13));
- asm volatile("movq %%r14,%0" : "=m"(newregs->r14));
- asm volatile("movq %%r15,%0" : "=m"(newregs->r15));
- asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
- asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
- asm volatile("pushfq; popq %0" :"=m"(newregs->flags));
+ asm volatile("mov %%ds,%k0" : "=a"(newregs->ds));
+ asm volatile("mov %%es,%k0" : "=a"(newregs->es));
#endif
+ asm volatile("pushf\n\t"
+ "pop %0" : "=m"(newregs->flags));
newregs->ip = _THIS_IP_;
}
}
@@ -225,6 +217,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void);
#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index dc31b13b87a0..b51d8a4673f5 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -38,7 +38,7 @@
#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN)
#define SYM_F_ALIGN __FUNC_ALIGN
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define RET jmp __x86_return_thunk
@@ -50,7 +50,7 @@
#endif
#endif /* CONFIG_MITIGATION_RETPOLINE */
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define ASM_RET "jmp __x86_return_thunk\n\t"
@@ -62,7 +62,7 @@
#endif
#endif /* CONFIG_MITIGATION_RETPOLINE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
@@ -119,33 +119,27 @@
/* SYM_FUNC_START -- use for global functions */
#define SYM_FUNC_START(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)
/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
#define SYM_FUNC_START_NOALIGN(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)
/* SYM_FUNC_START_LOCAL -- use for local functions */
#define SYM_FUNC_START_LOCAL(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN)
/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)
/* SYM_FUNC_START_WEAK -- use for weak functions */
#define SYM_FUNC_START_WEAK(name) \
- SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN)
/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
#define SYM_FUNC_START_WEAK_NOALIGN(name) \
- SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index f922b682b9b4..1530ee301dfe 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -10,7 +10,7 @@
#ifndef __X86_MEM_ENCRYPT_H__
#define __X86_MEM_ENCRYPT_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/init.h>
#include <linux/cc_platform.h>
@@ -114,6 +114,6 @@ void add_encrypt_protection_map(void);
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 3b496cdcb74b..8b8055a8eb9e 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -69,6 +69,18 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+ /*
+ * The global ASID will be a non-zero value when the process has
+ * the same ASID across all CPUs, allowing it to make use of
+ * hardware-assisted remote TLB invalidation like AMD INVLPGB.
+ */
+ u16 global_asid;
+
+ /* The process is transitioning to a new global ASID number. */
+ bool asid_transition;
+#endif
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 795fdd53bd0a..2398058b6e83 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -2,7 +2,6 @@
#ifndef _ASM_X86_MMU_CONTEXT_H
#define _ASM_X86_MMU_CONTEXT_H
-#include <asm/desc.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/pkeys.h>
@@ -13,6 +12,7 @@
#include <asm/paravirt.h>
#include <asm/debugreg.h>
#include <asm/gsseg.h>
+#include <asm/desc.h>
extern atomic64_t last_mm_ctx_id;
@@ -139,6 +139,11 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+#define mm_init_global_asid mm_init_global_asid
+extern void mm_init_global_asid(struct mm_struct *mm);
+
+extern void mm_free_global_asid(struct mm_struct *mm);
+
/*
* Init a new mm. Used on mm copies, like at fork()
* and on mm's that are brand-new, like at execve().
@@ -161,6 +166,8 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
+
+ mm_init_global_asid(mm);
mm_reset_untag_mask(mm);
init_new_context_ldt(mm);
return 0;
@@ -170,6 +177,7 @@ static inline int init_new_context(struct task_struct *tsk,
static inline void destroy_context(struct mm_struct *mm)
{
destroy_context_ldt(mm);
+ mm_free_global_asid(mm);
}
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f91ab1e75f9f..5e6193dbc97e 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -77,11 +77,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_tdx_hypercall(control, input_address, output_address);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[output_address], %%r8\n"
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address)
+ : [output_address] "r" (output_address)
: "cc", "memory", "r8", "r9", "r10", "r11");
return hv_status;
}
@@ -89,12 +89,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
if (!hv_hypercall_pg)
return U64_MAX;
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[output_address], %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address),
- THUNK_TARGET(hv_hypercall_pg)
+ : [output_address] "r" (output_address),
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
#else
u32 input_address_hi = upper_32_bits(input_address);
@@ -187,18 +187,18 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
return hv_tdx_hypercall(control, input1, input2);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[input2], %%r8\n"
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "r" (input2)
+ : [input2] "r" (input2)
: "cc", "r8", "r9", "r10", "r11");
} else {
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[input2], %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "r" (input2),
+ : [input2] "r" (input2),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 72765b2fe0d8..1aacd6b68fab 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
@@ -34,6 +35,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_TCE (1<<_EFER_TCE)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/*
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 001853541f1e..9397a319d165 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -4,7 +4,7 @@
#include "msr-index.h"
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/errno.h>
@@ -397,5 +397,5 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
return wrmsr_safe_regs(regs);
}
#endif /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 41a0ebb699ec..f677382093f3 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -56,6 +56,8 @@ int __register_nmi_handler(unsigned int, struct nmiaction *);
void unregister_nmi_handler(unsigned int, const char *);
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
+
void stop_nmi(void);
void restart_nmi(void);
void local_touch_nmi(void);
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 1c1b7550fa55..cd94221d8335 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -82,7 +82,7 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index aee26bb8230f..804b66a7686a 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -12,7 +12,6 @@
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
-#include <asm/current.h>
/*
* Call depth tracking for Intel SKL CPUs to address the RSB underflow
@@ -78,21 +77,21 @@
#include <asm/asm-offsets.h>
#define CREDIT_CALL_DEPTH \
- movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+ movq $-1, PER_CPU_VAR(__x86_call_depth);
#define RESET_CALL_DEPTH \
xor %eax, %eax; \
bts $63, %rax; \
- movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+ movq %rax, PER_CPU_VAR(__x86_call_depth);
#define RESET_CALL_DEPTH_FROM_CALL \
movb $0xfc, %al; \
shl $56, %rax; \
- movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
+ movq %rax, PER_CPU_VAR(__x86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#define INCREMENT_CALL_DEPTH \
- sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
+ sarq $5, PER_CPU_VAR(__x86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#else
@@ -177,7 +176,7 @@
add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/*
* (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
@@ -335,7 +334,7 @@
#define CLEAR_BRANCH_HISTORY_VMEXIT
#endif
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
@@ -387,6 +386,8 @@ extern void call_depth_return_thunk(void);
__stringify(INCREMENT_CALL_DEPTH), \
X86_FEATURE_CALL_DEPTH)
+DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+
#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
@@ -602,6 +603,6 @@ static __always_inline void mds_idle_clear_cpu_buffers(void)
mds_clear_cpu_buffers();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 46d7e06763c9..e0125afa53fb 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -45,7 +45,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/byteorder.h>
/*
@@ -73,6 +73,6 @@ struct orc_entry {
#endif
} __packed;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index c9fe207916f4..9265f2fca99a 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -14,7 +14,7 @@
#include <asm/page_32.h>
#endif /* CONFIG_X86_64 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct page;
@@ -84,7 +84,7 @@ static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
return __canonical_address(vaddr, vaddr_bits) == vaddr;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 580d71aca65a..0c623706cb7e 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -4,7 +4,7 @@
#include <asm/page_32_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET)
#ifdef CONFIG_DEBUG_VIRTUAL
@@ -26,6 +26,6 @@ static inline void copy_page(void *to, void *from)
{
memcpy(to, from, PAGE_SIZE);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_32_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index faf9cc1c14bb..a9b62e0e6f79 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -11,8 +11,8 @@
* a virtual address space of one gigabyte, which limits the
* amount of physical memory you can use to about 950MB.
*
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ * If you want more physical memory than this then see the CONFIG_VMSPLIT_2G
+ * and CONFIG_HIGHMEM4G options in the kernel configuration.
*/
#define __PAGE_OFFSET_BASE _AC(CONFIG_PAGE_OFFSET, UL)
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
@@ -63,7 +63,7 @@
*/
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This much address space is reserved for vmalloc() and iomap()
@@ -75,6 +75,6 @@ extern int sysctl_legacy_va_layout;
extern void find_low_pfn_range(void);
extern void setup_bootmem_allocator(void);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_32_DEFS_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index d63576608ce7..d3aab6f4e59a 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -4,7 +4,7 @@
#include <asm/page_64_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -55,11 +55,12 @@ static inline void clear_page(void *page)
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
- "D" (page)
- : "cc", "memory", "rax", "rcx");
+ "D" (page),
+ "cc", "memory", "rax", "rcx");
}
void copy_page(void *to, void *from);
+KCFI_REFERENCE(copy_page);
#ifdef CONFIG_X86_5LEVEL
/*
@@ -94,7 +95,7 @@ static __always_inline unsigned long task_size_max(void)
}
#endif /* CONFIG_X86_5LEVEL */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
# define __HAVE_ARCH_GATE_AREA 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 06ef25411d62..1faa8f88850a 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/kaslr.h>
#endif
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 974688973cf6..9f77bf03d747 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -43,7 +43,7 @@
#define IOREMAP_MAX_ORDER (PMD_SHIFT)
#endif /* CONFIG_X86_64 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
extern phys_addr_t physical_mask;
@@ -66,6 +66,6 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
extern void initmem_init(void);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 041aff51eb50..bed346bfac89 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -6,7 +6,7 @@
#include <asm/paravirt_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct mm_struct;
#endif
@@ -15,7 +15,7 @@ struct mm_struct;
#include <asm/asm.h>
#include <asm/nospec-branch.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
@@ -91,11 +91,6 @@ static inline void __flush_tlb_multi(const struct cpumask *cpumask,
PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
}
-static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
-}
-
static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
PVOP_VCALL1(mmu.exit_mmap, mm);
@@ -720,7 +715,7 @@ static __always_inline unsigned long arch_local_irq_save(void)
extern void default_banner(void);
void native_pv_lock_init(void) __init;
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
@@ -740,18 +735,18 @@ void native_pv_lock_init(void) __init;
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#else /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static inline void native_pv_lock_init(void)
{
}
#endif
#endif /* !CONFIG_PARAVIRT */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef CONFIG_PARAVIRT_XXL
static inline void paravirt_enter_mmap(struct mm_struct *mm)
{
@@ -769,5 +764,5 @@ static inline void paravirt_set_cap(void)
{
}
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index fea56b04f436..62912023b46f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -4,7 +4,7 @@
#ifdef CONFIG_PARAVIRT
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/desc_defs.h>
@@ -134,8 +134,6 @@ struct pv_mmu_ops {
void (*flush_tlb_multi)(const struct cpumask *cpus,
const struct flush_tlb_info *info);
- void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
-
/* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm);
void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
@@ -242,9 +240,17 @@ extern struct paravirt_patch_template pv_ops;
#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)
-int paravirt_disable_iospace(void);
-
-/* This generates an indirect call based on the operation type number. */
+/*
+ * This generates an indirect call based on the operation type number.
+ *
+ * Since alternatives run after enabling CET/IBT -- the latter setting/clearing
+ * capabilities and the former requiring all capabilities being finalized --
+ * these indirect calls are subject to IBT and the paravirt stubs should have
+ * ENDBR on.
+ *
+ * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs
+ * don't need to bother with CFI prefixes.
+ */
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
@@ -519,7 +525,7 @@ unsigned long pv_native_read_cr2(void);
#define paravirt_nop ((void *)nop_func)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 08f5f61690b7..105db2d33c7b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -10,7 +10,7 @@
# define __percpu_rel
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CONFIG_SMP
# define __percpu %__percpu_seg:
@@ -20,14 +20,9 @@
#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
-#ifdef CONFIG_X86_64_SMP
-# define INIT_PER_CPU_VAR(var) init_per_cpu__##var
-#else
-# define INIT_PER_CPU_VAR(var) var
-#endif
-
#else /* !__ASSEMBLY__: */
+#include <linux/args.h>
#include <linux/build_bug.h>
#include <linux/stringify.h>
#include <asm/asm.h>
@@ -41,12 +36,7 @@
# define __seg_fs __attribute__((address_space(__seg_fs)))
#endif
-#ifdef CONFIG_X86_64
-# define __percpu_seg_override __seg_gs
-#else
-# define __percpu_seg_override __seg_fs
-#endif
-
+#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg)
#define __percpu_prefix ""
#else /* !CONFIG_CC_HAS_NAMED_AS: */
@@ -98,22 +88,6 @@
#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
/*
- * Initialized pointers to per-CPU variables needed for the boot
- * processor need to use these macros to get the proper address
- * offset from __per_cpu_load on SMP.
- *
- * There also must be an entry in vmlinux_64.lds.S
- */
-#define DECLARE_INIT_PER_CPU(var) \
- extern typeof(var) init_per_cpu_var(var)
-
-#ifdef CONFIG_X86_64_SMP
-# define init_per_cpu_var(var) init_per_cpu__##var
-#else
-# define init_per_cpu_var(var) var
-#endif
-
-/*
* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though).
*/
@@ -128,15 +102,10 @@
#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
#define __pcpu_cast_8(val) ((u64)(val))
-#define __pcpu_op1_1(op, dst) op "b " dst
-#define __pcpu_op1_2(op, dst) op "w " dst
-#define __pcpu_op1_4(op, dst) op "l " dst
-#define __pcpu_op1_8(op, dst) op "q " dst
-
-#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
-#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
-#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
-#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+#define __pcpu_op_1(op) op "b "
+#define __pcpu_op_2(op) op "w "
+#define __pcpu_op_4(op) op "l "
+#define __pcpu_op_8(op) op "q "
#define __pcpu_reg_1(mod, x) mod "q" (x)
#define __pcpu_reg_2(mod, x) mod "r" (x)
@@ -168,7 +137,8 @@ do { \
({ \
__pcpu_type_##size pfo_val__; \
\
- asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), "%[val]") \
+ asm qual (__pcpu_op_##size("mov") \
+ __percpu_arg([var]) ", %[val]" \
: [val] __pcpu_reg_##size("=", pfo_val__) \
: [var] "m" (__my_cpu_var(_var))); \
\
@@ -184,7 +154,8 @@ do { \
pto_tmp__ = (_val); \
(void)pto_tmp__; \
} \
- asm qual(__pcpu_op2_##size("mov", "%[val]", __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size("mov") "%[val], " \
+ __percpu_arg([var]) \
: [var] "=m" (__my_cpu_var(_var)) \
: [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
@@ -201,7 +172,8 @@ do { \
({ \
__pcpu_type_##size pfo_val__; \
\
- asm(__pcpu_op2_##size("mov", __force_percpu_arg(a[var]), "%[val]") \
+ asm(__pcpu_op_##size("mov") \
+ __force_percpu_arg(a[var]) ", %[val]" \
: [val] __pcpu_reg_##size("=", pfo_val__) \
: [var] "i" (&(_var))); \
\
@@ -210,7 +182,7 @@ do { \
#define percpu_unary_op(size, qual, op, _var) \
({ \
- asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size(op) __percpu_arg([var]) \
: [var] "+m" (__my_cpu_var(_var))); \
})
@@ -223,7 +195,7 @@ do { \
pto_tmp__ = (_val); \
(void)pto_tmp__; \
} \
- asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size(op) "%[val], " __percpu_arg([var]) \
: [var] "+m" (__my_cpu_var(_var)) \
: [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
@@ -259,8 +231,8 @@ do { \
({ \
__pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
\
- asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
- __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size("xadd") "%[tmp], " \
+ __percpu_arg([var]) \
: [tmp] __pcpu_reg_##size("+", paro_tmp__), \
[var] "+m" (__my_cpu_var(_var)) \
: : "memory"); \
@@ -303,8 +275,8 @@ do { \
__pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
\
- asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
- __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
+ __percpu_arg([var]) \
: [oval] "+a" (pco_old__), \
[var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
@@ -320,8 +292,8 @@ do { \
__pcpu_type_##size pco_old__ = *pco_oval__; \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
\
- asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
- __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
+ __percpu_arg([var]) \
CC_SET(z) \
: CC_OUT(z) (success), \
[oval] "+a" (pco_old__), \
@@ -579,7 +551,7 @@ do { \
* it is accessed while this_cpu_read_stable() allows the value to be cached.
* this_cpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across CPUs. The current users include
- * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are
+ * current_task and cpu_current_top_of_stack, both of which are
* actually per-thread variables implemented as per-CPU variables and
* thus stable for the duration of the respective task.
*/
@@ -614,9 +586,9 @@ do { \
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index dd4841231bb9..a33147520044 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -29,11 +29,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
static inline void paravirt_release_p4d(unsigned long pfn) {}
#endif
-/*
- * Flags to use when allocating a user page table page.
- */
-extern gfp_t __userpte_alloc_gfp;
-
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 4a12c276b181..66425424ce91 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
typedef unsigned long pteval_t;
@@ -16,7 +16,7 @@ typedef union {
pteval_t pte;
pteval_t pte_low;
} pte_t;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define SHARED_KERNEL_PMD 0
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 80911349519e..9d5b257d44e3 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
typedef u64 pteval_t;
@@ -25,7 +25,7 @@ typedef union {
};
pmdval_t pmd;
} pmd_t;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index a0c1525f1b6f..e12e52ae8083 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -2,7 +2,7 @@
#ifndef _ASM_PGTABLE_INVERT_H
#define _ASM_PGTABLE_INVERT_H 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* A clear pte value is special, and doesn't get inverted.
@@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
return val;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 593f10aabd45..7bd6bd6df4a1 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,7 +15,7 @@
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
@@ -973,7 +973,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
}
#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifdef CONFIG_X86_32
@@ -982,7 +982,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
# include <asm/pgtable_64.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
@@ -1233,12 +1233,12 @@ static inline int pgd_none(pgd_t pgd)
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern int direct_gbpages;
void init_mem_mapping(void);
@@ -1812,6 +1812,6 @@ bool arch_is_platform_page(u64 paddr);
WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
set_pgd(pgdp, pgd); \
})
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 7d4ad8907297..b612cc57a4d3 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -13,7 +13,7 @@
* This file contains the functions and defines necessary to modify and use
* the i386 page table tree.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
@@ -45,7 +45,7 @@ do { \
flush_tlb_one_kernel((vaddr)); \
} while (0)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* This is used to calculate the .brk reservation for initial pagetables.
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
index b6355416a15a..921148b42967 100644
--- a/arch/x86/include/asm/pgtable_32_areas.h
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -13,7 +13,7 @@
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index d1426b64c1b9..b89f8f1194a9 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -5,7 +5,7 @@
#include <linux/const.h>
#include <asm/pgtable_64_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This file contains the functions and defines necessary to modify and use
@@ -270,7 +270,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
#include <asm/pgtable-invert.h>
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
@@ -291,5 +291,5 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
i = i + 1 ; \
.endr
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index ec68f8369bdc..5bb782d856f2 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -4,7 +4,7 @@
#include <asm/sparsemem.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/kaslr.h>
@@ -44,7 +44,7 @@ static inline bool pgtable_l5_enabled(void)
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define SHARED_KERNEL_PMD 0
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 4b804531b03c..b2ed8198d5cd 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -33,6 +33,7 @@
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
+#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
#ifdef CONFIG_X86_64
@@ -64,6 +65,7 @@
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K)
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0)
#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1)
@@ -164,7 +166,7 @@
* to have the WB mode at index 0 (all bits clear). This is the default
* right now and likely would break too much if changed.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
enum page_cache_mode {
_PAGE_CACHE_MODE_WB = 0,
_PAGE_CACHE_MODE_WC = 1,
@@ -239,7 +241,7 @@ enum page_cache_mode {
#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
@@ -262,7 +264,7 @@ enum page_cache_mode {
#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* early identity mapping pte attrib macros.
@@ -281,7 +283,7 @@ enum page_cache_mode {
# include <asm/pgtable_64_types.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -580,6 +582,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
unsigned long page_flags);
extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
unsigned long numpages);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 919909d8cb77..578441db09f0 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -4,10 +4,11 @@
#include <asm/rmwcc.h>
#include <asm/percpu.h>
-#include <asm/current.h>
#include <linux/static_call_types.h>
+DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count);
+
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
@@ -23,18 +24,18 @@
*/
static __always_inline int preempt_count(void)
{
- return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
{
int old, new;
- old = raw_cpu_read_4(pcpu_hot.preempt_count);
+ old = raw_cpu_read_4(__preempt_count);
do {
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
- } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new));
+ } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new));
}
/*
@@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
- per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \
+ per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)
/*
@@ -57,17 +58,17 @@ static __always_inline void preempt_count_set(int pc)
static __always_inline void set_preempt_need_resched(void)
{
- raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED);
+ raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
- raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED);
+ raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
@@ -76,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void)
static __always_inline void __preempt_count_add(int val)
{
- raw_cpu_add_4(pcpu_hot.preempt_count, val);
+ raw_cpu_add_4(__preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
- raw_cpu_add_4(pcpu_hot.preempt_count, -val);
+ raw_cpu_add_4(__preempt_count, -val);
}
/*
@@ -91,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
+ return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e,
__percpu_arg([var]));
}
@@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
*/
static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
+ return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c0cd10182e90..7a3918308a36 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -60,18 +60,13 @@ struct vm86;
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
-enum tlb_infos {
- ENTRIES,
- NR_INFO
-};
-
-extern u16 __read_mostly tlb_lli_4k[NR_INFO];
-extern u16 __read_mostly tlb_lli_2m[NR_INFO];
-extern u16 __read_mostly tlb_lli_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4k[NR_INFO];
-extern u16 __read_mostly tlb_lld_2m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+extern u16 __read_mostly tlb_lli_4k;
+extern u16 __read_mostly tlb_lli_2m;
+extern u16 __read_mostly tlb_lli_4m;
+extern u16 __read_mostly tlb_lld_4k;
+extern u16 __read_mostly tlb_lld_2m;
+extern u16 __read_mostly tlb_lld_4m;
+extern u16 __read_mostly tlb_lld_1g;
/*
* CPU type and hardware bug flags. Kept separately for each CPU.
@@ -234,7 +229,7 @@ static inline unsigned long long l1tf_pfn_limit(void)
void init_cpu_devs(void);
void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+extern void identify_secondary_cpu(unsigned int cpu);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
@@ -420,37 +415,33 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_64
-struct fixed_percpu_data {
- /*
- * GCC hardcodes the stack canary as %gs:40. Since the
- * irq_stack is the object at %gs:0, we reserve the bottom
- * 48 bytes of the irq stack for the canary.
- *
- * Once we are willing to require -mstack-protector-guard-symbol=
- * support for x86_64 stackprotector, we can get rid of this.
- */
- char gs_base[40];
- unsigned long stack_canary;
-};
+DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse);
+#else
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr);
+#endif
-DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
-DECLARE_INIT_PER_CPU(fixed_percpu_data);
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override,
+ const_cpu_current_top_of_stack);
+#ifdef CONFIG_X86_64
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#ifdef CONFIG_SMP
+ return per_cpu_offset(cpu);
+#else
+ return 0;
+#endif
}
extern asmlinkage void entry_SYSCALL32_ignore(void);
/* Save actual FS/GS selectors and bases to current->thread */
void current_save_fsgs(void);
-#else /* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-#endif
-#endif /* !X86_64 */
+#endif /* X86_64 */
struct perf_event;
@@ -561,9 +552,9 @@ static __always_inline unsigned long current_top_of_stack(void)
* entry trampoline.
*/
if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
- return this_cpu_read_const(const_pcpu_hot.top_of_stack);
+ return this_cpu_read_const(const_cpu_current_top_of_stack);
- return this_cpu_read_stable(pcpu_hot.top_of_stack);
+ return this_cpu_read_stable(cpu_current_top_of_stack);
}
static __always_inline bool on_thread_stack(void)
@@ -668,8 +659,6 @@ static __always_inline void prefetchw(const void *x)
.sysenter_cs = __KERNEL_CS, \
}
-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
-
#else
extern unsigned long __top_init_kernel_stack[];
@@ -677,8 +666,6 @@ extern unsigned long __top_init_kernel_stack[];
.sp = (unsigned long)&__top_init_kernel_stack, \
}
-extern unsigned long KSTK_ESP(struct task_struct *task);
-
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -692,6 +679,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
/* Get/set a process' ability to use the timestamp counter instruction */
#define GET_TSC_CTL(adr) get_tsc_mode((adr))
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 365798cb4408..5d0dbab85264 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -8,7 +8,7 @@
#ifndef _ASM_X86_PROM_H
#define _ASM_X86_PROM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/of.h>
#include <linux/types.h>
@@ -33,5 +33,5 @@ static inline void x86_flattree_get_config(void) { }
extern char cmd_line[COMMAND_LINE_SIZE];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 484f4f0131a5..05224a695872 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -15,7 +15,6 @@ void entry_SYSCALL_64(void);
void entry_SYSCALL_64_safe_stack(void);
void entry_SYSRETQ_unsafe_stack(void);
void entry_SYSRETQ_end(void);
-long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
#ifdef CONFIG_X86_32
@@ -41,6 +40,6 @@ void x86_configure_nx(void);
extern int reboot_force;
-long do_arch_prctl_common(int option, unsigned long arg2);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index ab167c96b9ab..88d0a1ab1f77 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
extern void pti_init(void);
@@ -11,5 +11,5 @@ extern void pti_finalize(void);
static inline void pti_check_boottime_disable(void) { }
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5a83fbd9bc0b..50f75467f73d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,7 @@
#include <asm/page_types.h>
#include <uapi/asm/ptrace.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
struct pt_regs {
@@ -469,5 +469,5 @@ extern int do_set_thread_area(struct task_struct *p, int idx,
# define do_set_thread_area_64(p, s, t) (0)
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 5528e9325049..2fee5e9f1ccc 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -2,10 +2,10 @@
#ifndef _ASM_X86_PURGATORY_H
#define _ASM_X86_PURGATORY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/purgatory.h>
extern void purgatory(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PVCLOCK_ABI_H
#define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 87e5482acd0d..f607081a022a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -9,7 +9,7 @@
#define TH_FLAGS_SME_ACTIVE_BIT 0
#define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/io.h>
@@ -95,6 +95,6 @@ void reserve_real_mode(void);
void load_trampoline_pgtable(void);
void init_real_mode(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
deleted file mode 100644
index e9187ddd3d1f..000000000000
--- a/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
- really early to actually display a visible error message before the
- kernel dies. Make sure to assign features to the proper mask!
-
- Some requirements that are not in CPUID yet are also in the
- CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
- The real information is in arch/x86/Kconfig.cpu, this just converts
- the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU 0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE 0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8 0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV 0
-#endif
-
-# define NEED_3DNOW 0
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL 0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE 0
-#define NEED_PGE 0
-#else
-#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM (1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE 0
-#define NEED_MSR 0
-#define NEED_PGE 0
-#define NEED_FXSR 0
-#define NEED_XMM 0
-#define NEED_XMM2 0
-#define NEED_LM 0
-#endif
-
-#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
- NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
- NEED_XMM|NEED_XMM2)
-#define SSE_MASK (NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2 0
-#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 (NEED_MOVBE)
-#define REQUIRED_MASK5 0
-#define REQUIRED_MASK6 0
-#define REQUIRED_MASK7 0
-#define REQUIRED_MASK8 0
-#define REQUIRED_MASK9 0
-#define REQUIRED_MASK10 0
-#define REQUIRED_MASK11 0
-#define REQUIRED_MASK12 0
-#define REQUIRED_MASK13 0
-#define REQUIRED_MASK14 0
-#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
-#define REQUIRED_MASK17 0
-#define REQUIRED_MASK18 0
-#define REQUIRED_MASK19 0
-#define REQUIRED_MASK20 0
-#define REQUIRED_MASK21 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 363266cbcada..3821ee3fae35 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -29,7 +29,7 @@ cc_label: c = true; \
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c; \
- asm volatile (fullop CC_SET(cc) \
+ asm_inline volatile (fullop CC_SET(cc) \
: [var] "+m" (_var), CC_OUT(cc) (c) \
: __VA_ARGS__ : clobbers); \
c; \
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index 6652ebddfd02..8d983cfd06ea 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -2,6 +2,18 @@
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
+#ifdef __ASSEMBLY__
+
+.macro RUNTIME_CONST_PTR sym reg
+ movq $0x0123456789abcdef, %\reg
+ 1:
+ .pushsection runtime_ptr_\sym, "a"
+ .long 1b - 8 - .
+ .popsection
+.endm
+
+#else /* __ASSEMBLY__ */
+
#define runtime_const_ptr(sym) ({ \
typeof(sym) __ret; \
asm_inline("mov %1,%0\n1:\n" \
@@ -58,4 +70,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
}
}
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9d6411c65920..77d8f49b92bd 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -233,7 +233,7 @@
#define VDSO_CPUNODE_BITS 12
#define VDSO_CPUNODE_MASK 0xfff
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Helper functions to store/load CPU and node numbers */
@@ -265,7 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*node = (p >> VDSO_CPUNODE_BITS);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef __KERNEL__
@@ -286,7 +286,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*/
#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
@@ -350,7 +350,7 @@ static inline void __loadsegment_fs(unsigned short value)
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index cc62ef70ccc0..8d9f1c9aaa4c 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -2,7 +2,6 @@
#ifndef _ASM_X86_SET_MEMORY_H
#define _ASM_X86_SET_MEMORY_H
-#include <linux/mm.h>
#include <asm/page.h>
#include <asm-generic/set_memory.h>
@@ -38,7 +37,6 @@ int set_memory_rox(unsigned long addr, int numpages);
* The caller is required to take care of these.
*/
-int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot);
int _set_memory_uc(unsigned long addr, int numpages);
int _set_memory_wc(unsigned long addr, int numpages);
int _set_memory_wt(unsigned long addr, int numpages);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 85f4fde3515c..ad9212df0ec0 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -27,7 +27,7 @@
#define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cache.h>
#include <asm/bootparam.h>
@@ -46,6 +46,7 @@ void setup_bios_corruption_check(void);
void early_platform_quirks(void);
extern unsigned long saved_video_mode;
+extern unsigned long acpi_realmode_flags;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
@@ -141,7 +142,7 @@ extern bool builtin_cmdline_added __ro_after_init;
#define builtin_cmdline_added 0
#endif
-#else /* __ASSEMBLY */
+#else /* __ASSEMBLER__ */
.macro __RESERVE_BRK name, size
.pushsection .bss..brk, "aw"
@@ -153,6 +154,6 @@ SYM_DATA_END(__brk_\name)
#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h
index 77c51111a893..7bb16f843c93 100644
--- a/arch/x86/include/asm/setup_data.h
+++ b/arch/x86/include/asm/setup_data.h
@@ -4,7 +4,7 @@
#include <uapi/asm/setup_data.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct pci_setup_rom {
struct setup_data data;
@@ -27,6 +27,6 @@ struct efi_setup_data {
u64 reserved[8];
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index fcbbef484a78..a28ff6b14145 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -106,7 +106,7 @@
#define TDX_PS_1G 2
#define TDX_PS_NR (TDX_PS_1G + 1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler_attributes.h>
@@ -177,5 +177,5 @@ static __always_inline u64 hcall_func(u64 exit_reason)
return exit_reason;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index 4cb77e004615..ba6f2fe43848 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_SHSTK_H
#define _ASM_X86_SHSTK_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
struct task_struct;
@@ -37,6 +37,6 @@ static inline int shstk_update_last_frame(unsigned long val) { return 0; }
static inline bool shstk_is_enabled(void) { return false; }
#endif /* CONFIG_X86_USER_SHADOW_STACK */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SHSTK_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 4a4043ca6493..c72d46175374 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_SIGNAL_H
#define _ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
/* Most things should be clean enough to redefine this at will, if care
@@ -28,9 +28,9 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <uapi/asm/signal.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __ARCH_HAS_SA_RESTORER
@@ -101,5 +101,5 @@ struct pt_regs;
#endif /* !__i386__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 2de1e5a75c57..daea94c2993c 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,7 +13,7 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define ASM_CLAC \
ALTERNATIVE "", "clac", X86_FEATURE_SMAP
@@ -21,7 +21,7 @@
#define ASM_STAC \
ALTERNATIVE "", "stac", X86_FEATURE_SMAP
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
static __always_inline void clac(void)
{
@@ -61,6 +61,6 @@ static __always_inline void smap_restore(unsigned long flags)
#define ASM_STAC \
ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ca073f40698f..0c1c68039d6f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -1,12 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SMP_H
#define _ASM_X86_SMP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
+#include <linux/thread_info.h>
#include <asm/cpumask.h>
-#include <asm/current.h>
-#include <asm/thread_info.h>
+
+DECLARE_PER_CPU_CACHE_HOT(int, cpu_number);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
@@ -114,13 +115,12 @@ void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void smp_kick_mwait_play_dead(void);
+void __noreturn mwait_play_dead(unsigned int eax_hint);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
-void smp_store_cpu_info(int id);
-
asmlinkage __visible void smp_reboot_interrupt(void);
__visible void smp_reschedule_interrupt(struct pt_regs *regs);
__visible void smp_call_function_interrupt(struct pt_regs *regs);
@@ -133,14 +133,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup.
*/
-#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number)
-#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number)
-
-#ifdef CONFIG_X86_32
-extern int safe_smp_processor_id(void);
-#else
-# define safe_smp_processor_id() smp_processor_id()
-#endif
+#define raw_smp_processor_id() this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
@@ -164,6 +158,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
return (struct cpumask *)cpumask_of(0);
}
+
+static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); }
#endif /* CONFIG_SMP */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
@@ -175,7 +171,7 @@ extern void nmi_selftest(void);
extern unsigned int smpboot_control;
extern unsigned long apic_mmio_base;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* Control bits for startup_64 */
#define STARTUP_READ_APICID 0x80000000
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 03e7c2d49559..6266d6b9e0b8 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -42,14 +42,14 @@ static __always_inline void native_write_cr2(unsigned long val)
asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
-static inline unsigned long __native_read_cr3(void)
+static __always_inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
-static inline void native_write_cr3(unsigned long val)
+static __always_inline void native_write_cr3(unsigned long val)
{
asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
@@ -176,9 +176,8 @@ static __always_inline void clflush(volatile void *__p)
static inline void clflushopt(volatile void *__p)
{
- alternative_io(".byte 0x3e; clflush %0",
- ".byte 0x66; clflush %0",
- X86_FEATURE_CLFLUSHOPT,
+ alternative_io("ds clflush %0",
+ "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
"+m" (*(volatile char __force *)__p));
}
@@ -186,14 +185,11 @@ static inline void clwb(volatile void *__p)
{
volatile struct { char x[64]; } *p = __p;
- asm volatile(ALTERNATIVE_2(
- ".byte 0x3e; clflush (%[pax])",
- ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
- X86_FEATURE_CLFLUSHOPT,
- ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
- X86_FEATURE_CLWB)
- : [p] "+m" (*p)
- : [pax] "a" (p));
+ asm_inline volatile(ALTERNATIVE_2(
+ "ds clflush %0",
+ "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
+ "clwb %0", X86_FEATURE_CLWB)
+ : "+m" (*p));
}
#ifdef CONFIG_X86_USER_SHADOW_STACK
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
deleted file mode 100644
index e0975e9c4f47..000000000000
--- a/arch/x86/include/asm/sta2x11.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
- */
-#ifndef __ASM_STA2X11_H
-#define __ASM_STA2X11_H
-
-#include <linux/pci.h>
-
-/* This needs to be called from the MFD to configure its sub-devices */
-struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
-
-#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 00473a650f51..cd761b14eb02 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,26 +2,10 @@
/*
* GCC stack protector support.
*
- * Stack protector works by putting predefined pattern at the start of
+ * Stack protector works by putting a predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
- * returning from the function. The pattern is called stack canary
- * and unfortunately gcc historically required it to be at a fixed offset
- * from the percpu segment base. On x86_64, the offset is 40 bytes.
- *
- * The same segment is shared by percpu area and stack canary. On
- * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
- * base of percpu area. The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at the appropriate
- * offset. On x86_32, the stack canary is just a regular percpu
- * variable.
- *
- * Putting percpu data in %fs on 32-bit is a minor optimization compared to
- * using %gs. Since 32-bit userspace normally has %fs == 0, we are likely
- * to load 0 into %fs on exit to usermode, whereas with percpu data in
- * %gs, we are likely to load a non-null %gs on return to user mode.
- *
- * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
- * support, we can remove some of this complexity.
+ * returning from the function. The pattern is called the stack canary
+ * and is a unique value for each task.
*/
#ifndef _ASM_STACKPROTECTOR_H
@@ -36,6 +20,8 @@
#include <linux/sched.h>
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
+
/*
* Initialize the stackprotector canary value.
*
@@ -51,25 +37,13 @@ static __always_inline void boot_init_stack_canary(void)
{
unsigned long canary = get_random_canary();
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
-#endif
-
current->stack_canary = canary;
-#ifdef CONFIG_X86_64
- this_cpu_write(fixed_percpu_data.stack_canary, canary);
-#else
this_cpu_write(__stack_chk_guard, canary);
-#endif
}
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
-#ifdef CONFIG_X86_64
- per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
-#else
per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
-#endif
}
#else /* STACKPROTECTOR */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 9d0b324eab21..79e9695dc13e 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -21,6 +21,7 @@ extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t n);
void *__memset(void *s, int c, size_t n);
+KCFI_REFERENCE(__memset);
/*
* KMSAN needs to instrument as much code as possible. Use C versions of
@@ -70,6 +71,7 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
#define __HAVE_ARCH_MEMMOVE
void *memmove(void *dest, const void *src, size_t count);
void *__memmove(void *dest, const void *src, size_t count);
+KCFI_REFERENCE(__memmove);
int memcmp(const void *cs, const void *ct, size_t count);
size_t strlen(const char *s);
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index b4b16dafd55e..65394aa9b49f 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -30,7 +30,7 @@
#define TDX_SUCCESS 0ULL
#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <uapi/asm/mce.h>
@@ -126,5 +126,5 @@ static inline int tdx_enable(void) { return -ENODEV; }
static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
#endif /* CONFIG_INTEL_TDX_HOST */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a55c214f3ba6..9282465eea21 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -54,7 +54,7 @@
* - this struct should fit entirely inside of one cache line
* - this struct shares the supervisor stack pages
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
#include <asm/cpufeature.h>
#include <linux/atomic.h>
@@ -73,7 +73,7 @@ struct thread_info {
.flags = 0, \
}
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#include <asm/asm-offsets.h>
@@ -161,7 +161,7 @@ struct thread_info {
*
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Walks up the stack frames to make sure that the specified object is
@@ -213,7 +213,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#endif
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Thread-synchronous status.
@@ -224,7 +224,7 @@ static inline int arch_within_stack_frames(const void * const stack,
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_COMPAT
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
@@ -242,6 +242,6 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc1578a..866ea78ba156 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
@@ -25,4 +28,139 @@ static inline void invlpg(unsigned long addr)
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
+enum addr_stride {
+ PTE_STRIDE = 0,
+ PMD_STRIDE = 1
+};
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID: invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA BIT(0)
+#define INVLPGB_FLAG_PCID BIT(1)
+#define INVLPGB_FLAG_ASID BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond the
+ * first page, while __invlpgb gets the more human readable number of pages to
+ * invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * Since it is desired to only flush TLB entries for the ASID that is executing
+ * the instruction (a host/hypervisor or a guest), the ASID valid bit should
+ * always be set. On a host/hypervisor, the hardware will use the ASID value
+ * specified in EDX[15:0] (which should be 0). On a guest, the hardware will
+ * use the actual ASID value of the guest.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride stride, u8 flags)
+{
+ u64 rax = addr | flags | INVLPGB_FLAG_ASID;
+ u32 ecx = (stride << 31) | (nr_pages - 1);
+ u32 edx = (pcid << 16) | asid;
+
+ /* The low bits in rax are for flags. Verify addr is clean. */
+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+ /* INVLPGB; supported in binutils >= 2.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+ __invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
+{
+ /*
+ * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+ * to have completed. Print a warning if the task has been migrated,
+ * and might not be waiting on all the INVLPGBs issued during this TLB
+ * invalidation sequence.
+ */
+ cant_migrate();
+
+ /* TLBSYNC: supported in binutils >= 0.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool stride)
+{
+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+ __invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+ /*
+ * TLBSYNC at the end needs to make sure all flushes done on the
+ * current CPU have been executed system-wide. Therefore, make
+ * sure nothing gets migrated in-between but disable preemption
+ * as it is cheaper.
+ */
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+ __tlbsync();
+}
#endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
index 1ad56eb3e8a8..80aaf64ff25f 100644
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch {
* the PFNs being flushed..
*/
struct cpumask cpumask;
+ /*
+ * Set if pages were unmapped from any MM, even one that does not
+ * have active CPUs in its cpumask.
+ */
+ bool unmapped_pages;
};
#endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 02fc2aa06e9e..a9af8759de34 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -6,6 +6,7 @@
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <asm/barrier.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
@@ -183,6 +184,9 @@ static inline void cr4_init_shadow(void)
extern unsigned long mmu_cr4_features;
extern u32 *trampoline_cr4_features;
+/* How many pages can be invalidated with one INVLPGB. */
+extern u16 invlpgb_count_max;
+
extern void initialize_tlbstate_and_flush(void);
/*
@@ -231,6 +235,71 @@ void flush_tlb_one_kernel(unsigned long addr);
void flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
+static inline bool is_dyn_asid(u16 asid)
+{
+ return asid < TLB_NR_DYN_ASIDS;
+}
+
+static inline bool is_global_asid(u16 asid)
+{
+ return !is_dyn_asid(asid);
+}
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+ u16 asid;
+
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return 0;
+
+ asid = smp_load_acquire(&mm->context.global_asid);
+
+ /* mm->context.global_asid is either 0, or a global ASID */
+ VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+ return asid;
+}
+
+static inline void mm_init_global_asid(struct mm_struct *mm)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ mm->context.global_asid = 0;
+ mm->context.asid_transition = false;
+ }
+}
+
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+{
+ /*
+ * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+ * finish_asid_transition() needs to observe asid_transition = true
+ * once it observes global_asid.
+ */
+ mm->context.asid_transition = true;
+ smp_store_release(&mm->context.global_asid, asid);
+}
+
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+ WRITE_ONCE(mm->context.asid_transition, false);
+}
+
+static inline bool mm_in_asid_transition(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false;
+
+ return mm && READ_ONCE(mm->context.asid_transition);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_global_asid(struct mm_struct *mm) { }
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+#endif /* CONFIG_BROADCAST_TLB_FLUSH */
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#endif
@@ -242,7 +311,7 @@ void flush_tlb_multi(const struct cpumask *cpumask,
flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
- : PAGE_SHIFT, false)
+ : PAGE_SHIFT, true)
extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -284,6 +353,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
{
inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ batch->unmapped_pages = true;
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1f1deaecd364..869b88061801 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,8 +35,6 @@ static inline int get_si_code(unsigned long condition)
return TRAP_BRKPT;
}
-extern int panic_on_unrecovered_nmi;
-
void math_emulate(struct math_emu_info *);
bool fault_in_kernel_space(unsigned long address);
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 85cc57cb6539..8f4579c5a6f8 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -5,7 +5,7 @@
#include "orc_types.h"
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro UNWIND_HINT_END_OF_STACK
UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
@@ -88,6 +88,6 @@
#define UNWIND_HINT_RESTORE \
UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
index 2bf9c0e970c3..785f8edcb9c9 100644
--- a/arch/x86/include/asm/vdso/getrandom.h
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_GETRANDOM_H
#define __ASM_VDSO_GETRANDOM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/unistd.h>
@@ -37,6 +37,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void
return &vdso_rng_data;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 375a34b0f365..428f3f4c2235 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -10,7 +10,7 @@
#ifndef __ASM_VDSO_GETTIMEOFDAY_H
#define __ASM_VDSO_GETTIMEOFDAY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
@@ -350,6 +350,6 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles,
}
#define vdso_calc_ns vdso_calc_ns
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index 2cbce97d29ea..c9b2ba7a9ec4 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static __always_inline void rep_nop(void)
@@ -22,6 +22,6 @@ struct getcpu_cache;
notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index 37b4a70559a8..72aedebb7648 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -9,7 +9,7 @@
#define VDSO_PAGE_PVCLOCK_OFFSET 0
#define VDSO_PAGE_HVCLOCK_OFFSET 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <vdso/datapage.h>
#include <asm/vgtod.h>
@@ -36,6 +36,6 @@ struct vdso_rng_data *__x86_get_k_vdso_rng_data(void)
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
index 75884d2cdec3..5d471253c755 100644
--- a/arch/x86/include/asm/vermagic.h
+++ b/arch/x86/include/asm/vermagic.h
@@ -15,8 +15,6 @@
#define MODULE_PROC_FAMILY "586TSC "
#elif defined CONFIG_M586MMX
#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
#elif defined CONFIG_MATOM
#define MODULE_PROC_FAMILY "ATOM "
#elif defined CONFIG_M686
@@ -33,8 +31,6 @@
#define MODULE_PROC_FAMILY "K6 "
#elif defined CONFIG_MK7
#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
#elif defined CONFIG_MELAN
#define MODULE_PROC_FAMILY "ELAN "
#elif defined CONFIG_MCRUSOE
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index baca0b00ef76..a078a2b0f032 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -72,7 +72,7 @@
#endif
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Explicitly size integers that represent pfns in the public interface
* with Xen so that on ARM we can have one ABI that works for 32 and 64
* bit guests. */
@@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct trap_info {
uint8_t vector; /* exception vector */
uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
@@ -186,7 +186,7 @@ struct arch_shared_info {
uint32_t wc_sec_hi;
#endif
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_32
#include <asm/xen/interface_32.h>
@@ -196,7 +196,7 @@ struct arch_shared_info {
#include <asm/pvclock-abi.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -376,7 +376,7 @@ struct xen_pmu_arch {
} c;
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Prefix forces emulation of some non-trapping instructions.
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index dc40578abded..74d9768a9cf7 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -44,7 +44,7 @@
*/
#define __HYPERVISOR_VIRT_START 0xF5800000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct cpu_user_regs {
uint32_t ebx;
@@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t;
#define XEN_CALLBACK(__cs, __eip) \
((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) })
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index c10f279aae93..38a19edb81a3 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -77,7 +77,7 @@
#define VGCF_in_syscall (1<<_VGCF_in_syscall)
#define VGCF_IN_SYSCALL VGCF_in_syscall
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct iret_context {
/* Top of stack (%rsp at point of hypercall). */
@@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t;
#define XEN_CALLBACK(__cs, __rip) \
((unsigned long)(__rip))
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_XEN_INTERFACE_64_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 9b82eebd7add..dafbf581c515 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -26,7 +26,7 @@
#define XLF_5LEVEL_ENABLED (1<<6)
#define XLF_MEM_ENCRYPTION (1<<7)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/screen_info.h>
@@ -210,6 +210,6 @@ enum x86_hardware_subarch {
X86_NR_SUBARCHS,
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 2f491efe3a12..55bc66867156 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -54,7 +54,7 @@
*/
#define E820_RESERVED_KERN 128
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
struct e820entry {
__u64 addr; /* start of memory segment */
@@ -76,7 +76,7 @@ struct e820map {
#define BIOS_ROM_BASE 0xffe00000
#define BIOS_ROM_END 0xffffffff
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index d62ac5db093b..a82c039d8e6a 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -12,7 +12,7 @@
/* The size of each LDT entry. */
#define LDT_ENTRY_SIZE 8
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
* not to the default values if you still want to do syscalls. This
@@ -44,5 +44,5 @@ struct user_desc {
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_LDT_H */
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index e7516b402a00..4b8917ca28fe 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_MSR_H
#define _UAPI_ASM_X86_MSR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/ioctl.h>
@@ -10,5 +10,5 @@
#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 16074b9c93bb..5823584dea13 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,7 +25,7 @@
#else /* __i386__ */
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
@@ -57,7 +57,7 @@
#define EFLAGS 144
#define RSP 152
#define SS 160
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* top of stack page */
#define FRAME_SIZE 168
@@ -87,7 +87,7 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#endif
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..e0b5b4f6226b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -7,7 +7,7 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
/* this struct defines the way the registers are stored on the
@@ -81,6 +81,6 @@ struct pt_regs {
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
index b111b0c18544..50c45ead4e7c 100644
--- a/arch/x86/include/uapi/asm/setup_data.h
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -18,7 +18,7 @@
#define SETUP_INDIRECT (1<<31)
#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -78,6 +78,6 @@ struct ima_setup_data {
__u64 size;
} __attribute__((packed));
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index f777346450ec..1067efabf18b 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_SIGNAL_H
#define _UAPI_ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -16,7 +16,7 @@ struct siginfo;
typedef unsigned long sigset_t;
#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define SIGHUP 1
@@ -68,7 +68,7 @@ typedef unsigned long sigset_t;
#include <asm-generic/signal-defs.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
# ifndef __KERNEL__
@@ -106,6 +106,6 @@ typedef struct sigaltstack {
__kernel_size_t ss_size;
} stack_t;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b43eb7e384eb..84cfa179802c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,6 +44,8 @@ KCOV_INSTRUMENT_unwind_orc.o := n
KCOV_INSTRUMENT_unwind_frame.o := n
KCOV_INSTRUMENT_unwind_guess.o := n
+CFLAGS_head32.o := -fno-stack-protector
+CFLAGS_head64.o := -fno-stack-protector
CFLAGS_irq.o := -I $(src)/../include/asm/trace
obj-y += head_$(BITS).o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5854f0b8f0f1..d5ac34186555 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -13,9 +13,11 @@
#include <linux/sched.h>
#include <acpi/processor.h>
+#include <asm/cpu_device_id.h>
#include <asm/cpuid.h>
#include <asm/mwait.h>
#include <asm/special_insns.h>
+#include <asm/smp.h>
/*
* Initialize bm_flags based on the CPU cache properties
@@ -47,12 +49,11 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
/*
* On all recent Intel platforms, ARB_DISABLE is a nop.
* So, set bm_control to zero to indicate that ARB_DISABLE
- * is not required while entering C3 type state on
- * P4, Core and beyond CPUs
+ * is not required while entering C3 type state.
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
- (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
- flags->bm_control = 0;
+ (c->x86 > 15 || (c->x86_vfm >= INTEL_CORE2_MEROM && c->x86_vfm <= INTEL_FAM6_LAST)))
+ flags->bm_control = 0;
if (c->x86_vendor == X86_VENDOR_CENTAUR) {
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
@@ -205,6 +206,16 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
+void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx)
+{
+ unsigned int cpu = smp_processor_id();
+ struct cstate_entry *percpu_entry;
+
+ percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
+ mwait_play_dead(percpu_entry->states[cx->index].eax);
+}
+EXPORT_SYMBOL_GPL(acpi_processor_ffh_play_dead);
+
void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
{
unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/acpi/madt_playdead.S b/arch/x86/kernel/acpi/madt_playdead.S
index 4e498d28cdc8..aefb9cb583ad 100644
--- a/arch/x86/kernel/acpi/madt_playdead.S
+++ b/arch/x86/kernel/acpi/madt_playdead.S
@@ -14,6 +14,7 @@
* rsi: PGD of the identity mapping
*/
SYM_FUNC_START(asm_acpi_mp_play_dead)
+ ANNOTATE_NOENDBR
/* Turn off global entries. Following CR3 write will flush them. */
movq %cr4, %rdx
andq $~(X86_CR4_PGE), %rdx
diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c
index d5ef6215583b..f36f28405dcc 100644
--- a/arch/x86/kernel/acpi/madt_wakeup.c
+++ b/arch/x86/kernel/acpi/madt_wakeup.c
@@ -70,58 +70,6 @@ static void __init free_pgt_page(void *pgt, void *dummy)
return memblock_free(pgt, PAGE_SIZE);
}
-/*
- * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
- * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
- * to the identity mapping and the function has be present at the same spot in
- * the virtual address space before and after switching page tables.
- */
-static int __init init_transition_pgtable(pgd_t *pgd)
-{
- pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
- unsigned long vaddr, paddr;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- vaddr = (unsigned long)asm_acpi_mp_play_dead;
- pgd += pgd_index(vaddr);
- if (!pgd_present(*pgd)) {
- p4d = (p4d_t *)alloc_pgt_page(NULL);
- if (!p4d)
- return -ENOMEM;
- set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
- }
- p4d = p4d_offset(pgd, vaddr);
- if (!p4d_present(*p4d)) {
- pud = (pud_t *)alloc_pgt_page(NULL);
- if (!pud)
- return -ENOMEM;
- set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
- }
- pud = pud_offset(p4d, vaddr);
- if (!pud_present(*pud)) {
- pmd = (pmd_t *)alloc_pgt_page(NULL);
- if (!pmd)
- return -ENOMEM;
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- }
- pmd = pmd_offset(pud, vaddr);
- if (!pmd_present(*pmd)) {
- pte = (pte_t *)alloc_pgt_page(NULL);
- if (!pte)
- return -ENOMEM;
- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
- }
- pte = pte_offset_kernel(pmd, vaddr);
-
- paddr = __pa(vaddr);
- set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
-
- return 0;
-}
-
static int __init acpi_mp_setup_reset(u64 reset_vector)
{
struct x86_mapping_info info = {
@@ -130,6 +78,7 @@ static int __init acpi_mp_setup_reset(u64 reset_vector)
.page_flag = __PAGE_KERNEL_LARGE_EXEC,
.kernpg_flag = _KERNPG_TABLE_NOENC,
};
+ unsigned long mstart, mend;
pgd_t *pgd;
pgd = alloc_pgt_page(NULL);
@@ -137,8 +86,6 @@ static int __init acpi_mp_setup_reset(u64 reset_vector)
return -ENOMEM;
for (int i = 0; i < nr_pfn_mapped; i++) {
- unsigned long mstart, mend;
-
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
@@ -147,14 +94,24 @@ static int __init acpi_mp_setup_reset(u64 reset_vector)
}
}
- if (kernel_ident_mapping_init(&info, pgd,
- PAGE_ALIGN_DOWN(reset_vector),
- PAGE_ALIGN(reset_vector + 1))) {
+ mstart = PAGE_ALIGN_DOWN(reset_vector);
+ mend = mstart + PAGE_SIZE;
+ if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
kernel_ident_mapping_free(&info, pgd);
return -ENOMEM;
}
- if (init_transition_pgtable(pgd)) {
+ /*
+ * Make sure asm_acpi_mp_play_dead() is present in the identity mapping
+ * at the same place as in the kernel page tables.
+ * asm_acpi_mp_play_dead() switches to the identity mapping and the
+ * function must be present at the same spot in the virtual address space
+ * before and after switching page tables.
+ */
+ info.offset = __START_KERNEL_map - phys_base;
+ mstart = PAGE_ALIGN_DOWN(__pa(asm_acpi_mp_play_dead));
+ mend = mstart + PAGE_SIZE;
+ if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
kernel_ident_mapping_free(&info, pgd);
return -ENOMEM;
}
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index b200a193beeb..04f561f75e99 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -17,6 +17,7 @@
* Hooray, we are in Long 64-bit mode (but still running in low memory)
*/
SYM_FUNC_START(wakeup_long64)
+ ANNOTATE_NOENDBR
movq saved_magic(%rip), %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c71b575bf229..bf82c6f7d690 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -392,10 +392,8 @@ EXPORT_SYMBOL(BUG_func);
* Rewrite the "call BUG_func" replacement to point to the target of the
* indirect pv_ops call "call *disp(%ip)".
*/
-static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a,
- struct module *mod)
+static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
{
- u8 *wr_instr = module_writable_address(mod, instr);
void *target, *bug = &BUG_func;
s32 disp;
@@ -405,14 +403,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a,
}
if (a->instrlen != 6 ||
- wr_instr[0] != CALL_RIP_REL_OPCODE ||
- wr_instr[1] != CALL_RIP_REL_MODRM) {
+ instr[0] != CALL_RIP_REL_OPCODE ||
+ instr[1] != CALL_RIP_REL_MODRM) {
pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
BUG();
}
/* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */
- disp = *(s32 *)(wr_instr + 2);
+ disp = *(s32 *)(instr + 2);
#ifdef CONFIG_X86_64
/* ff 15 00 00 00 00 call *0x0(%rip) */
/* target address is stored at "next instruction + disp". */
@@ -450,8 +448,7 @@ static inline u8 * instr_va(struct alt_instr *i)
* to refetch changed I$ lines.
*/
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
- struct alt_instr *end,
- struct module *mod)
+ struct alt_instr *end)
{
u8 insn_buff[MAX_PATCH_LEN];
u8 *instr, *replacement;
@@ -480,7 +477,6 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
*/
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
- u8 *wr_instr, *wr_replacement;
/*
* In case of nested ALTERNATIVE()s the outer alternative might
@@ -494,11 +490,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
}
instr = instr_va(a);
- wr_instr = module_writable_address(mod, instr);
-
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- wr_replacement = module_writable_address(mod, replacement);
-
BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
@@ -509,9 +501,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* patch if feature is *NOT* present.
*/
if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
- memcpy(insn_buff, wr_instr, a->instrlen);
+ memcpy(insn_buff, instr, a->instrlen);
optimize_nops(instr, insn_buff, a->instrlen);
- text_poke_early(wr_instr, insn_buff, a->instrlen);
+ text_poke_early(instr, insn_buff, a->instrlen);
continue;
}
@@ -521,12 +513,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->flags);
- memcpy(insn_buff, wr_replacement, a->replacementlen);
+ memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
if (a->flags & ALT_FLAG_DIRECT_CALL) {
- insn_buff_sz = alt_replace_call(instr, insn_buff, a,
- mod);
+ insn_buff_sz = alt_replace_call(instr, insn_buff, a);
if (insn_buff_sz < 0)
continue;
}
@@ -536,11 +527,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
- DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr);
+ DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(wr_instr, insn_buff, insn_buff_sz);
+ text_poke_early(instr, insn_buff, insn_buff_sz);
}
kasan_enable_current();
@@ -731,20 +722,18 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
/*
* Generated by 'objtool --retpoline'.
*/
-void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
- struct module *mod)
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr = module_writable_address(mod, addr);
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op1, op2;
- ret = insn_decode_kernel(&insn, wr_addr);
+ ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
continue;
@@ -752,6 +741,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
op2 = insn.opcode.bytes[1];
switch (op1) {
+ case 0x70 ... 0x7f: /* Jcc.d8 */
+ /* See cfi_paranoid. */
+ WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
+ continue;
+
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
break;
@@ -772,9 +766,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(addr, bytes, len);
- DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(wr_addr, bytes, len);
+ text_poke_early(addr, bytes, len);
}
}
}
@@ -810,8 +804,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
return i;
}
-void __init_or_module noinline apply_returns(s32 *start, s32 *end,
- struct module *mod)
+void __init_or_module noinline apply_returns(s32 *start, s32 *end)
{
s32 *s;
@@ -820,13 +813,12 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end,
for (s = start; s < end; s++) {
void *dest = NULL, *addr = (void *)s + *s;
- void *wr_addr = module_writable_address(mod, addr);
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op;
- ret = insn_decode_kernel(&insn, wr_addr);
+ ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
continue;
@@ -846,41 +838,59 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end,
len = patch_return(addr, &insn, bytes);
if (len == insn.length) {
- DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(wr_addr, bytes, len);
+ text_poke_early(addr, bytes, len);
}
}
}
-#else
-void __init_or_module noinline apply_returns(s32 *start, s32 *end,
- struct module *mod) { }
-#endif /* CONFIG_MITIGATION_RETHUNK */
+#else /* !CONFIG_MITIGATION_RETHUNK: */
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+#endif /* !CONFIG_MITIGATION_RETHUNK */
#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
-void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
- struct module *mod) { }
-void __init_or_module noinline apply_returns(s32 *start, s32 *end,
- struct module *mod) { }
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
-#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */
+#endif /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_cfi(void *addr, void *wr_addr);
+__noendbr bool is_endbr(u32 *val)
+{
+ u32 endbr;
+
+ __get_kernel_nofault(&endbr, val, u32, Efault);
+ return __is_endbr(endbr);
+
+Efault:
+ return false;
+}
-static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
+#ifdef CONFIG_FINEIBT
+
+static __noendbr bool exact_endbr(u32 *val)
{
- u32 endbr, poison = gen_endbr_poison();
+ u32 endbr;
- if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr)))
- return;
+ __get_kernel_nofault(&endbr, val, u32, Efault);
+ return endbr == gen_endbr();
+
+Efault:
+ return false;
+}
- if (!is_endbr(endbr)) {
- WARN_ON_ONCE(warn);
+#endif
+
+static void poison_cfi(void *addr);
+
+static void __init_or_module poison_endbr(void *addr)
+{
+ u32 poison = gen_endbr_poison();
+
+ if (WARN_ON_ONCE(!is_endbr(addr)))
return;
- }
DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr);
@@ -889,7 +899,7 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
*/
DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr);
- text_poke_early(wr_addr, &poison, 4);
+ text_poke_early(addr, &poison, 4);
}
/*
@@ -898,36 +908,39 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
* Seal the functions for indirect calls by clobbering the ENDBR instructions
* and the kCFI hash value.
*/
-void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod)
+void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr = module_writable_address(mod, addr);
- poison_endbr(addr, wr_addr, true);
+ poison_endbr(addr);
if (IS_ENABLED(CONFIG_FINEIBT))
- poison_cfi(addr - 16, wr_addr - 16);
+ poison_cfi(addr - 16);
}
}
-#else
+#else /* !CONFIG_X86_KERNEL_IBT: */
-void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { }
+void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
-#endif /* CONFIG_X86_KERNEL_IBT */
+#endif /* !CONFIG_X86_KERNEL_IBT */
#ifdef CONFIG_CFI_AUTO_DEFAULT
-#define __CFI_DEFAULT CFI_AUTO
+# define __CFI_DEFAULT CFI_AUTO
#elif defined(CONFIG_CFI_CLANG)
-#define __CFI_DEFAULT CFI_KCFI
+# define __CFI_DEFAULT CFI_KCFI
#else
-#define __CFI_DEFAULT CFI_OFF
+# define __CFI_DEFAULT CFI_OFF
#endif
enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+#ifdef CONFIG_FINEIBT_BHI
+bool cfi_bhi __ro_after_init = false;
+#endif
+
#ifdef CONFIG_CFI_CLANG
struct bpf_insn;
@@ -935,11 +948,7 @@ struct bpf_insn;
extern unsigned int __bpf_prog_runX(const void *ctx,
const struct bpf_insn *insn);
-/*
- * Force a reference to the external symbol so the compiler generates
- * __kcfi_typid.
- */
-__ADDRESSABLE(__bpf_prog_runX);
+KCFI_REFERENCE(__bpf_prog_runX);
/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
asm (
@@ -956,7 +965,7 @@ asm (
/* Must match bpf_callback_t */
extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
-__ADDRESSABLE(__bpf_callback_fn);
+KCFI_REFERENCE(__bpf_callback_fn);
/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
asm (
@@ -991,6 +1000,21 @@ u32 cfi_get_func_hash(void *func)
return hash;
}
+
+int cfi_get_func_arity(void *func)
+{
+ bhi_thunk *target;
+ s32 disp;
+
+ if (cfi_mode != CFI_FINEIBT && !cfi_bhi)
+ return 0;
+
+ if (get_kernel_nofault(disp, func - 4))
+ return 0;
+
+ target = func + disp;
+ return target - __bhi_args;
+}
#endif
#ifdef CONFIG_FINEIBT
@@ -998,6 +1022,8 @@ u32 cfi_get_func_hash(void *func)
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
+static bool cfi_paranoid __ro_after_init = false;
+
/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
@@ -1005,7 +1031,7 @@ static u32 cfi_seed __ro_after_init;
static u32 cfi_rehash(u32 hash)
{
hash ^= cfi_seed;
- while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
+ while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) {
bool lsb = hash & 1;
hash >>= 1;
if (lsb)
@@ -1037,6 +1063,25 @@ static __init int cfi_parse_cmdline(char *str)
cfi_mode = CFI_FINEIBT;
} else if (!strcmp(str, "norand")) {
cfi_rand = false;
+ } else if (!strcmp(str, "warn")) {
+ pr_alert("CFI mismatch non-fatal!\n");
+ cfi_warn = true;
+ } else if (!strcmp(str, "paranoid")) {
+ if (cfi_mode == CFI_FINEIBT) {
+ cfi_paranoid = true;
+ } else {
+ pr_err("Ignoring paranoid; depends on fineibt.\n");
+ }
+ } else if (!strcmp(str, "bhi")) {
+#ifdef CONFIG_FINEIBT_BHI
+ if (cfi_mode == CFI_FINEIBT) {
+ cfi_bhi = true;
+ } else {
+ pr_err("Ignoring bhi; depends on fineibt.\n");
+ }
+#else
+ pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n");
+#endif
} else {
pr_err("Ignoring unknown cfi option (%s).", str);
}
@@ -1054,9 +1099,9 @@ early_param("cfi", cfi_parse_cmdline);
* __cfi_\func: __cfi_\func:
* movl $0x12345678,%eax // 5 endbr64 // 4
* nop subl $0x12345678,%r10d // 7
- * nop jz 1f // 2
- * nop ud2 // 2
- * nop 1: nop // 1
+ * nop jne __cfi_\func+6 // 2
+ * nop nop3 // 3
+ * nop
* nop
* nop
* nop
@@ -1068,34 +1113,53 @@ early_param("cfi", cfi_parse_cmdline);
*
* caller: caller:
* movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
- * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4
+ * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4
* je 1f // 2 nop4 // 4
* ud2 // 2
- * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5
+ * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6
*
*/
-asm( ".pushsection .rodata \n"
- "fineibt_preamble_start: \n"
- " endbr64 \n"
- " subl $0x12345678, %r10d \n"
- " je fineibt_preamble_end \n"
- " ud2 \n"
- " nop \n"
- "fineibt_preamble_end: \n"
+/*
+ * <fineibt_preamble_start>:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d
+ * b: 75 f9 jne 6 <fineibt_preamble_start+0x6>
+ * d: 0f 1f 00 nopl (%rax)
+ *
+ * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as
+ * (bad) on x86_64 and raises #UD.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_preamble_start: \n"
+ " endbr64 \n"
+ " subl $0x12345678, %r10d \n"
+ "fineibt_preamble_bhi: \n"
+ " jne fineibt_preamble_start+6 \n"
+ ASM_NOP3
+ "fineibt_preamble_end: \n"
".popsection\n"
);
extern u8 fineibt_preamble_start[];
+extern u8 fineibt_preamble_bhi[];
extern u8 fineibt_preamble_end[];
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
+#define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start)
+#define fineibt_preamble_ud 6
#define fineibt_preamble_hash 7
+/*
+ * <fineibt_caller_start>:
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * a: 0f 1f 40 00 nopl 0x0(%rax)
+ */
asm( ".pushsection .rodata \n"
"fineibt_caller_start: \n"
" movl $0x12345678, %r10d \n"
- " sub $16, %r11 \n"
+ " lea -0x10(%r11), %r11 \n"
ASM_NOP4
"fineibt_caller_end: \n"
".popsection \n"
@@ -1109,13 +1173,62 @@ extern u8 fineibt_caller_end[];
#define fineibt_caller_jmp (fineibt_caller_size - 2)
-static u32 decode_preamble_hash(void *addr)
+/*
+ * Since FineIBT does hash validation on the callee side it is prone to
+ * circumvention attacks where a 'naked' ENDBR instruction exists that
+ * is not part of the fineibt_preamble sequence.
+ *
+ * Notably the x86 entry points must be ENDBR and equally cannot be
+ * fineibt_preamble.
+ *
+ * The fineibt_paranoid caller sequence adds additional caller side
+ * hash validation. This stops such circumvention attacks dead, but at the cost
+ * of adding a load.
+ *
+ * <fineibt_paranoid_start>:
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
+ * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11
+ * e: 75 fd jne d <fineibt_paranoid_start+0xd>
+ * 10: 41 ff d3 call *%r11
+ * 13: 90 nop
+ *
+ * Notably LEA does not modify flags and can be reordered with the CMP,
+ * avoiding a dependency. Again, using a non-taken (backwards) branch
+ * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the
+ * Jcc.d8, causing #UD.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_paranoid_start: \n"
+ " movl $0x12345678, %r10d \n"
+ " cmpl -9(%r11), %r10d \n"
+ " lea -0x10(%r11), %r11 \n"
+ " jne fineibt_paranoid_start+0xd \n"
+ "fineibt_paranoid_ind: \n"
+ " call *%r11 \n"
+ " nop \n"
+ "fineibt_paranoid_end: \n"
+ ".popsection \n"
+);
+
+extern u8 fineibt_paranoid_start[];
+extern u8 fineibt_paranoid_ind[];
+extern u8 fineibt_paranoid_end[];
+
+#define fineibt_paranoid_size (fineibt_paranoid_end - fineibt_paranoid_start)
+#define fineibt_paranoid_ind (fineibt_paranoid_ind - fineibt_paranoid_start)
+#define fineibt_paranoid_ud 0xd
+
+static u32 decode_preamble_hash(void *addr, int *reg)
{
u8 *p = addr;
- /* b8 78 56 34 12 mov $0x12345678,%eax */
- if (p[0] == 0xb8)
+ /* b8+reg 78 56 34 12 movl $0x12345678,\reg */
+ if (p[0] >= 0xb8 && p[0] < 0xc0) {
+ if (reg)
+ *reg = p[0] - 0xb8;
return *(u32 *)(addr + 1);
+ }
return 0; /* invalid hash value */
}
@@ -1124,11 +1237,11 @@ static u32 decode_caller_hash(void *addr)
{
u8 *p = addr;
- /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */
+ /* 41 ba 88 a9 cb ed mov $(-0x12345678),%r10d */
if (p[0] == 0x41 && p[1] == 0xba)
return -*(u32 *)(addr + 2);
- /* e8 0c 78 56 34 12 jmp.d8 +12 */
+ /* e8 0c 88 a9 cb ed jmp.d8 +12 */
if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
return -*(u32 *)(addr + 2);
@@ -1136,7 +1249,7 @@ static u32 decode_caller_hash(void *addr)
}
/* .retpoline_sites */
-static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod)
+static int cfi_disable_callers(s32 *start, s32 *end)
{
/*
* Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
@@ -1148,23 +1261,20 @@ static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- wr_addr = module_writable_address(mod, addr);
- hash = decode_caller_hash(wr_addr);
-
+ hash = decode_caller_hash(addr);
if (!hash) /* nocfi callers */
continue;
- text_poke_early(wr_addr, jmp, 2);
+ text_poke_early(addr, jmp, 2);
}
return 0;
}
-static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod)
+static int cfi_enable_callers(s32 *start, s32 *end)
{
/*
* Re-enable kCFI, undo what cfi_disable_callers() did.
@@ -1174,126 +1284,212 @@ static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- wr_addr = module_writable_address(mod, addr);
- hash = decode_caller_hash(wr_addr);
+ hash = decode_caller_hash(addr);
if (!hash) /* nocfi callers */
continue;
- text_poke_early(wr_addr, mov, 2);
+ text_poke_early(addr, mov, 2);
}
return 0;
}
/* .cfi_sites */
-static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod)
+static int cfi_rand_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr = module_writable_address(mod, addr);
u32 hash;
- hash = decode_preamble_hash(wr_addr);
+ hash = decode_preamble_hash(addr, NULL);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
hash = cfi_rehash(hash);
- text_poke_early(wr_addr + 1, &hash, 4);
+ text_poke_early(addr + 1, &hash, 4);
}
return 0;
}
-static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod)
+static void cfi_fineibt_bhi_preamble(void *addr, int arity)
+{
+ if (!arity)
+ return;
+
+ if (!cfi_warn && arity == 1) {
+ /*
+ * Crazy scheme to allow arity-1 inline:
+ *
+ * __cfi_foo:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d
+ * b: 49 0f 45 fa cmovne %r10, %rdi
+ * f: 75 f5 jne __cfi_foo+6
+ * 11: 0f 1f 00 nopl (%rax)
+ *
+ * Code that direct calls to foo()+0, decodes the tail end as:
+ *
+ * foo:
+ * 0: f5 cmc
+ * 1: 0f 1f 00 nopl (%rax)
+ *
+ * which clobbers CF, but does not affect anything ABI
+ * wise.
+ *
+ * Notably, this scheme is incompatible with permissive CFI
+ * because the CMOVcc is unconditional and RDI will have been
+ * clobbered.
+ */
+ const u8 magic[9] = {
+ 0x49, 0x0f, 0x45, 0xfa,
+ 0x75, 0xf5,
+ BYTES_NOP3,
+ };
+
+ text_poke_early(addr + fineibt_preamble_bhi, magic, 9);
+
+ return;
+ }
+
+ text_poke_early(addr + fineibt_preamble_bhi,
+ text_gen_insn(CALL_INSN_OPCODE,
+ addr + fineibt_preamble_bhi,
+ __bhi_args[arity]),
+ CALL_INSN_SIZE);
+}
+
+static int cfi_rewrite_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr = module_writable_address(mod, addr);
+ int arity;
u32 hash;
- hash = decode_preamble_hash(wr_addr);
+ /*
+ * When the function doesn't start with ENDBR the compiler will
+ * have determined there are no indirect calls to it and we
+ * don't need no CFI either.
+ */
+ if (!is_endbr(addr + 16))
+ continue;
+
+ hash = decode_preamble_hash(addr, &arity);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
- text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size);
- WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678);
- text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4);
+ text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
+ WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
+ text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
+
+ WARN_ONCE(!IS_ENABLED(CONFIG_FINEIBT_BHI) && arity,
+ "kCFI preamble has wrong register at: %pS %*ph\n",
+ addr, 5, addr);
+
+ if (cfi_bhi)
+ cfi_fineibt_bhi_preamble(addr, arity);
}
return 0;
}
-static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod)
+static void cfi_rewrite_endbr(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr = module_writable_address(mod, addr);
- poison_endbr(addr + 16, wr_addr + 16, false);
+ if (!exact_endbr(addr + 16))
+ continue;
+
+ poison_endbr(addr + 16);
}
}
/* .retpoline_sites */
-static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod)
+static int cfi_rand_callers(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- wr_addr = module_writable_address(mod, addr);
- hash = decode_caller_hash(wr_addr);
+ hash = decode_caller_hash(addr);
if (hash) {
hash = -cfi_rehash(hash);
- text_poke_early(wr_addr + 2, &hash, 4);
+ text_poke_early(addr + 2, &hash, 4);
}
}
return 0;
}
-static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod)
+static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
+ BUG_ON(fineibt_paranoid_size != 20);
+
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- void *wr_addr;
+ struct insn insn;
+ u8 bytes[20];
u32 hash;
+ int ret;
+ u8 op;
addr -= fineibt_caller_size;
- wr_addr = module_writable_address(mod, addr);
- hash = decode_caller_hash(wr_addr);
- if (hash) {
- text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size);
- WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678);
- text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4);
+ hash = decode_caller_hash(addr);
+ if (!hash)
+ continue;
+
+ if (!cfi_paranoid) {
+ text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
+ WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
+ text_poke_early(addr + fineibt_caller_hash, &hash, 4);
+ /* rely on apply_retpolines() */
+ continue;
+ }
+
+ /* cfi_paranoid */
+ ret = insn_decode_kernel(&insn, addr + fineibt_caller_size);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op = insn.opcode.bytes[0];
+ if (op != CALL_INSN_OPCODE && op != JMP32_INSN_OPCODE) {
+ WARN_ON_ONCE(1);
+ continue;
}
- /* rely on apply_retpolines() */
+
+ memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
+ memcpy(bytes + fineibt_caller_hash, &hash, 4);
+
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
+ if (WARN_ON_ONCE(ret != 3))
+ continue;
+
+ text_poke_early(addr, bytes, fineibt_paranoid_size);
}
return 0;
}
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, struct module *mod)
+ s32 *start_cfi, s32 *end_cfi, bool builtin)
{
- bool builtin = mod ? false : true;
int ret;
if (WARN_ONCE(fineibt_preamble_size != 16,
@@ -1302,8 +1498,15 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
if (cfi_mode == CFI_AUTO) {
cfi_mode = CFI_KCFI;
- if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
+ if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) {
+ /*
+ * FRED has much saner context on exception entry and
+ * is less easy to take advantage of.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ cfi_paranoid = true;
cfi_mode = CFI_FINEIBT;
+ }
}
/*
@@ -1311,7 +1514,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
* rewrite them. This disables all CFI. If this succeeds but any of the
* later stages fails, we're without CFI.
*/
- ret = cfi_disable_callers(start_retpoline, end_retpoline, mod);
+ ret = cfi_disable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
@@ -1322,11 +1525,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
}
- ret = cfi_rand_preamble(start_cfi, end_cfi, mod);
+ ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
goto err;
- ret = cfi_rand_callers(start_retpoline, end_retpoline, mod);
+ ret = cfi_rand_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
}
@@ -1338,7 +1541,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
return;
case CFI_KCFI:
- ret = cfi_enable_callers(start_retpoline, end_retpoline, mod);
+ ret = cfi_enable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
@@ -1348,20 +1551,23 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
case CFI_FINEIBT:
/* place the FineIBT preamble at func()-16 */
- ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod);
+ ret = cfi_rewrite_preamble(start_cfi, end_cfi);
if (ret)
goto err;
/* rewrite the callers to target func()-16 */
- ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod);
+ ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
/* now that nobody targets func()+0, remove ENDBR there */
- cfi_rewrite_endbr(start_cfi, end_cfi, mod);
+ cfi_rewrite_endbr(start_cfi, end_cfi);
- if (builtin)
- pr_info("Using FineIBT CFI\n");
+ if (builtin) {
+ pr_info("Using %sFineIBT%s CFI\n",
+ cfi_paranoid ? "paranoid " : "",
+ cfi_bhi ? "+BHI" : "");
+ }
return;
default:
@@ -1377,11 +1583,25 @@ static inline void poison_hash(void *addr)
*(u32 *)addr = 0;
}
-static void poison_cfi(void *addr, void *wr_addr)
+static void poison_cfi(void *addr)
{
+ /*
+ * Compilers manage to be inconsistent with ENDBR vs __cfi prefixes,
+ * some (static) functions for which they can determine the address
+ * is never taken do not get a __cfi prefix, but *DO* get an ENDBR.
+ *
+ * As such, these functions will get sealed, but we need to be careful
+ * to not unconditionally scribble the previous function.
+ */
switch (cfi_mode) {
case CFI_FINEIBT:
/*
+ * FineIBT prefix should start with an ENDBR.
+ */
+ if (!is_endbr(addr))
+ break;
+
+ /*
* __cfi_\func:
* osp nopl (%rax)
* subl $0, %r10d
@@ -1389,17 +1609,23 @@ static void poison_cfi(void *addr, void *wr_addr)
* ud2
* 1: nop
*/
- poison_endbr(addr, wr_addr, false);
- poison_hash(wr_addr + fineibt_preamble_hash);
+ poison_endbr(addr);
+ poison_hash(addr + fineibt_preamble_hash);
break;
case CFI_KCFI:
/*
+ * kCFI prefix should start with a valid hash.
+ */
+ if (!decode_preamble_hash(addr, NULL))
+ break;
+
+ /*
* __cfi_\func:
* movl $0, %eax
* .skip 11, 0x90
*/
- poison_hash(wr_addr + 1);
+ poison_hash(addr + 1);
break;
default:
@@ -1407,24 +1633,135 @@ static void poison_cfi(void *addr, void *wr_addr)
}
}
-#else
+/*
+ * When regs->ip points to a 0xEA byte in the FineIBT preamble,
+ * return true and fill out target and type.
+ *
+ * We check the preamble by checking for the ENDBR instruction relative to the
+ * 0xEA instruction.
+ */
+static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr = regs->ip - fineibt_preamble_ud;
+ u32 hash;
+
+ if (!exact_endbr((void *)addr))
+ return false;
+
+ *target = addr + fineibt_preamble_size;
+
+ __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
+ *type = (u32)regs->r10 + hash;
+
+ /*
+ * Since regs->ip points to the middle of an instruction; it cannot
+ * continue with the normal fixup.
+ */
+ regs->ip = *target;
+
+ return true;
+
+Efault:
+ return false;
+}
+
+/*
+ * regs->ip points to one of the UD2 in __bhi_args[].
+ */
+static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr;
+ u32 hash;
+
+ if (!cfi_bhi)
+ return false;
+
+ if (regs->ip < (unsigned long)__bhi_args ||
+ regs->ip >= (unsigned long)__bhi_args_end)
+ return false;
+
+ /*
+ * Fetch the return address from the stack, this points to the
+ * FineIBT preamble. Since the CALL instruction is in the 5 last
+ * bytes of the preamble, the return address is in fact the target
+ * address.
+ */
+ __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault);
+ *target = addr;
+
+ addr -= fineibt_preamble_size;
+ if (!exact_endbr((void *)addr))
+ return false;
+
+ __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
+ *type = (u32)regs->r10 + hash;
+
+ /*
+ * The UD2 sites are constructed with a RET immediately following,
+ * as such the non-fatal case can use the regular fixup.
+ */
+ return true;
+
+Efault:
+ return false;
+}
+
+/*
+ * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
+ * sequence.
+ */
+static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr = regs->ip - fineibt_paranoid_ud;
+ u32 hash;
+
+ if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2))
+ return false;
+
+ __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ /*
+ * Since the trapping instruction is the exact, but LOCK prefixed,
+ * Jcc.d8 that got us here, the normal fixup will work.
+ */
+ return true;
+
+Efault:
+ return false;
+}
+
+bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ if (decode_fineibt_paranoid(regs, target, type))
+ return true;
+
+ if (decode_fineibt_bhi(regs, target, type))
+ return true;
+
+ return decode_fineibt_preamble(regs, target, type);
+}
+
+#else /* !CONFIG_FINEIBT: */
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, struct module *mod)
+ s32 *start_cfi, s32 *end_cfi, bool builtin)
{
}
#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_cfi(void *addr, void *wr_addr) { }
+static void poison_cfi(void *addr) { }
#endif
-#endif
+#endif /* !CONFIG_FINEIBT */
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, struct module *mod)
+ s32 *start_cfi, s32 *end_cfi)
{
return __apply_fineibt(start_retpoline, end_retpoline,
- start_cfi, end_cfi, mod);
+ start_cfi, end_cfi,
+ /* .builtin = */ false);
}
#ifdef CONFIG_SMP
@@ -1721,27 +2058,27 @@ void __init alternative_instructions(void)
paravirt_set_cap();
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
- __cfi_sites, __cfi_sites_end, NULL);
+ __cfi_sites, __cfi_sites_end, true);
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
*/
- apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
- apply_returns(__return_sites, __return_sites_end, NULL);
-
- apply_alternatives(__alt_instructions, __alt_instructions_end, NULL);
+ apply_retpolines(__retpoline_sites, __retpoline_sites_end);
+ apply_returns(__return_sites, __return_sites_end);
/*
- * Now all calls are established. Apply the call thunks if
- * required.
+ * Adjust all CALL instructions to point to func()-10, including
+ * those in .altinstr_replacement.
*/
callthunks_patch_builtin_calls();
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+
/*
* Seal all functions that do not have their address taken.
*/
- apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL);
+ apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
#ifdef CONFIG_SMP
/* Patch to UP if other cpus not imminent. */
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 67e773744edb..6d12a9b69432 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -73,7 +73,6 @@ static int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
for (i = 0; i < amd_northbridges.num; i++) {
- node_to_amd_nb(i)->root = amd_node_get_root(i);
node_to_amd_nb(i)->misc = amd_node_get_func(i, 3);
/*
diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c
index d2ec7fd555c5..b670fa85c61b 100644
--- a/arch/x86/kernel/amd_node.c
+++ b/arch/x86/kernel/amd_node.c
@@ -8,6 +8,7 @@
* Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
*/
+#include <linux/debugfs.h>
#include <asm/amd_node.h>
/*
@@ -93,10 +94,14 @@ static struct pci_dev **amd_roots;
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
+static bool smn_exclusive;
#define SMN_INDEX_OFFSET 0x60
#define SMN_DATA_OFFSET 0x64
+#define HSMP_INDEX_OFFSET 0xc4
+#define HSMP_DATA_OFFSET 0xc8
+
/*
* SMN accesses may fail in ways that are difficult to detect here in the called
* functions amd_smn_read() and amd_smn_write(). Therefore, callers must do
@@ -146,6 +151,9 @@ static int __amd_smn_rw(u8 i_off, u8 d_off, u16 node, u32 address, u32 *value, b
if (!root)
return err;
+ if (!smn_exclusive)
+ return err;
+
guard(mutex)(&smn_mutex);
err = pci_write_config_dword(root, i_off, address);
@@ -179,6 +187,93 @@ int __must_check amd_smn_write(u16 node, u32 address, u32 value)
}
EXPORT_SYMBOL_GPL(amd_smn_write);
+int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write)
+{
+ return __amd_smn_rw(HSMP_INDEX_OFFSET, HSMP_DATA_OFFSET, node, address, value, write);
+}
+EXPORT_SYMBOL_GPL(amd_smn_hsmp_rdwr);
+
+static struct dentry *debugfs_dir;
+static u16 debug_node;
+static u32 debug_address;
+
+static ssize_t smn_node_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ u16 node;
+ int ret;
+
+ ret = kstrtou16_from_user(userbuf, count, 0, &node);
+ if (ret)
+ return ret;
+
+ if (node >= amd_num_nodes())
+ return -ENODEV;
+
+ debug_node = node;
+ return count;
+}
+
+static int smn_node_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "0x%08x\n", debug_node);
+ return 0;
+}
+
+static ssize_t smn_address_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+
+ ret = kstrtouint_from_user(userbuf, count, 0, &debug_address);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static int smn_address_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "0x%08x\n", debug_address);
+ return 0;
+}
+
+static int smn_value_show(struct seq_file *m, void *v)
+{
+ u32 val;
+ int ret;
+
+ ret = amd_smn_read(debug_node, debug_address, &val);
+ if (ret)
+ return ret;
+
+ seq_printf(m, "0x%08x\n", val);
+ return 0;
+}
+
+static ssize_t smn_value_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ u32 val;
+ int ret;
+
+ ret = kstrtouint_from_user(userbuf, count, 0, &val);
+ if (ret)
+ return ret;
+
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+
+ ret = amd_smn_write(debug_node, debug_address, val);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+DEFINE_SHOW_STORE_ATTRIBUTE(smn_node);
+DEFINE_SHOW_STORE_ATTRIBUTE(smn_address);
+DEFINE_SHOW_STORE_ATTRIBUTE(smn_value);
+
static int amd_cache_roots(void)
{
u16 node, num_nodes = amd_num_nodes();
@@ -193,6 +288,48 @@ static int amd_cache_roots(void)
return 0;
}
+static int reserve_root_config_spaces(void)
+{
+ struct pci_dev *root = NULL;
+ struct pci_bus *bus = NULL;
+
+ while ((bus = pci_find_next_bus(bus))) {
+ /* Root device is Device 0 Function 0 on each Primary Bus. */
+ root = pci_get_slot(bus, 0);
+ if (!root)
+ continue;
+
+ if (root->vendor != PCI_VENDOR_ID_AMD &&
+ root->vendor != PCI_VENDOR_ID_HYGON)
+ continue;
+
+ pci_dbg(root, "Reserving PCI config space\n");
+
+ /*
+ * There are a few SMN index/data pairs and other registers
+ * that shouldn't be accessed by user space.
+ * So reserve the entire PCI config space for simplicity rather
+ * than covering specific registers piecemeal.
+ */
+ if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) {
+ pci_err(root, "Failed to reserve config space\n");
+ return -EEXIST;
+ }
+ }
+
+ smn_exclusive = true;
+ return 0;
+}
+
+static bool enable_dfs;
+
+static int __init amd_smn_enable_dfs(char *str)
+{
+ enable_dfs = true;
+ return 1;
+}
+__setup("amd_smn_debugfs_enable", amd_smn_enable_dfs);
+
static int __init amd_smn_init(void)
{
int err;
@@ -209,6 +346,18 @@ static int __init amd_smn_init(void)
if (err)
return err;
+ err = reserve_root_config_spaces();
+ if (err)
+ return err;
+
+ if (enable_dfs) {
+ debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir);
+
+ debugfs_create_file("node", 0600, debugfs_dir, NULL, &smn_node_fops);
+ debugfs_create_file("address", 0600, debugfs_dir, NULL, &smn_address_fops);
+ debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops);
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 3bf0487cf3b7..52d1808ee360 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -23,8 +23,5 @@ obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
obj-y += apic_flat_64.o
endif
-# APIC probe will depend on the listing order here
-obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
-
# For 32bit, probe_32 need to be listed last
obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e893dc6f11c1..62584a347931 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1371,8 +1371,6 @@ void __init apic_intr_mode_init(void)
x86_64_probe_apic();
- x86_32_install_bigsmp();
-
if (x86_platform.apic_post_init)
x86_platform.apic_post_init();
@@ -1674,7 +1672,6 @@ static __init void apic_read_boot_cpu_id(bool x2apic)
boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
}
topology_register_boot_apic(boot_cpu_physical_apicid);
- x86_32_probe_bigsmp_early();
}
#ifdef CONFIG_X86_X2APIC
@@ -2014,8 +2011,8 @@ static bool __init detect_init_APIC(void)
case X86_VENDOR_HYGON:
break;
case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
- (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
+ if ((boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)) ||
+ boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO)
break;
goto no_apic;
default:
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
deleted file mode 100644
index 9285d500d5b4..000000000000
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
- *
- * Drives the local APIC in "clustered mode".
- */
-#include <linux/cpumask.h>
-#include <linux/dmi.h>
-#include <linux/smp.h>
-
-#include <asm/apic.h>
-#include <asm/io_apic.h>
-
-#include "local.h"
-
-static u32 bigsmp_get_apic_id(u32 x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static void bigsmp_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void bigsmp_send_IPI_all(int vector)
-{
- default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
-}
-
-static int dmi_bigsmp; /* can be set by dmi scanners */
-
-static int hp_ht_bigsmp(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
- dmi_bigsmp = 1;
-
- return 0;
-}
-
-
-static const struct dmi_system_id bigsmp_dmi_table[] = {
- { hp_ht_bigsmp, "HP ProLiant DL760 G2",
- { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
- DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
- }
- },
-
- { hp_ht_bigsmp, "HP ProLiant DL740",
- { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
- DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
- }
- },
- { } /* NULL entry stops DMI scanning */
-};
-
-static int probe_bigsmp(void)
-{
- return dmi_check_system(bigsmp_dmi_table);
-}
-
-static struct apic apic_bigsmp __ro_after_init = {
-
- .name = "bigsmp",
- .probe = probe_bigsmp,
-
- .dest_mode_logical = false,
-
- .disable_esr = 1,
-
- .cpu_present_to_apicid = default_cpu_present_to_apicid,
-
- .max_apic_id = 0xFE,
- .get_apic_id = bigsmp_get_apic_id,
-
- .calc_dest_apicid = apic_default_calc_apicid,
-
- .send_IPI = default_send_IPI_single_phys,
- .send_IPI_mask = default_send_IPI_mask_sequence_phys,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
- .send_IPI_all = bigsmp_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi = native_apic_mem_eoi,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = apic_mem_wait_icr_idle,
- .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout,
-};
-
-bool __init apic_bigsmp_possible(bool cmdline_override)
-{
- return apic == &apic_bigsmp || !cmdline_override;
-}
-
-void __init apic_bigsmp_force(void)
-{
- if (apic != &apic_bigsmp)
- apic_install_driver(&apic_bigsmp);
-}
-
-apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 5da693d633b7..23025a3a1db4 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -287,34 +287,4 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
__default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}
-
-#ifdef CONFIG_SMP
-static int convert_apicid_to_cpu(u32 apic_id)
-{
- int i;
-
- for_each_possible_cpu(i) {
- if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
- return i;
- }
- return -1;
-}
-
-int safe_smp_processor_id(void)
-{
- u32 apicid;
- int cpuid;
-
- if (!boot_cpu_has(X86_FEATURE_APIC))
- return 0;
-
- apicid = read_apic_id();
- if (apicid == BAD_APICID)
- return 0;
-
- cpuid = convert_apicid_to_cpu(apicid);
-
- return cpuid >= 0 ? cpuid : 0;
-}
-#endif
#endif
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index 842fe28496be..bdcf609eb283 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -65,17 +65,4 @@ void default_send_IPI_self(int vector);
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector);
void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector);
void default_send_IPI_mask_logical(const struct cpumask *mask, int vector);
-void x86_32_probe_bigsmp_early(void);
-void x86_32_install_bigsmp(void);
-#else
-static inline void x86_32_probe_bigsmp_early(void) { }
-static inline void x86_32_install_bigsmp(void) { }
-#endif
-
-#ifdef CONFIG_X86_BIGSMP
-bool apic_bigsmp_possible(bool cmdline_selected);
-void apic_bigsmp_force(void);
-#else
-static inline bool apic_bigsmp_possible(bool cmdline_selected) { return false; };
-static inline void apic_bigsmp_force(void) { }
#endif
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index f75ee345c02d..87bc9e7ca5d6 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -93,35 +93,6 @@ static int __init parse_apic(char *arg)
}
early_param("apic", parse_apic);
-void __init x86_32_probe_bigsmp_early(void)
-{
- if (nr_cpu_ids <= 8 || xen_pv_domain())
- return;
-
- if (IS_ENABLED(CONFIG_X86_BIGSMP)) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- if (!APIC_XAPIC(boot_cpu_apic_version))
- break;
- /* P4 and above */
- fallthrough;
- case X86_VENDOR_HYGON:
- case X86_VENDOR_AMD:
- if (apic_bigsmp_possible(cmdline_apic))
- return;
- break;
- }
- }
- pr_info("Limiting to 8 possible CPUs\n");
- set_nr_cpu_ids(8);
-}
-
-void __init x86_32_install_bigsmp(void)
-{
- if (nr_cpu_ids > 8 && !xen_pv_domain())
- apic_bigsmp_force();
-}
-
void __init x86_32_probe_apic(void)
{
if (!cmdline_apic) {
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index a98020bf31bb..ad4ea6fb3b6c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -107,11 +107,6 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
- OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
- OFFSET(X86_current_task, pcpu_hot, current_task);
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
- OFFSET(X86_call_depth, pcpu_hot, call_depth);
-#endif
#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64)
/* Offset for fields in aria_ctx */
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bb65371ea9df..590b6cd0eac0 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -54,11 +54,5 @@ int main(void)
BLANK();
#undef ENTRY
- BLANK();
-
-#ifdef CONFIG_STACKPROTECTOR
- OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
- BLANK();
-#endif
return 0;
}
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 8418a892d195..25ae54250112 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -240,21 +240,10 @@ patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
}
static __init_or_module void
-patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end,
- const struct core_text *ct)
-{
- struct alt_instr *a;
-
- for (a = start; a < end; a++)
- patch_call((void *)&a->instr_offset + a->instr_offset, ct);
-}
-
-static __init_or_module void
callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
{
prdbg("Patching call sites %s\n", ct->name);
patch_call_sites(cs->call_start, cs->call_end, ct);
- patch_alt_call_sites(cs->alt_start, cs->alt_end, ct);
prdbg("Patching call sites done%s\n", ct->name);
}
@@ -263,8 +252,6 @@ void __init callthunks_patch_builtin_calls(void)
struct callthunk_sites cs = {
.call_start = __call_sites,
.call_end = __call_sites_end,
- .alt_start = __alt_instructions,
- .alt_end = __alt_instructions_end
};
if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c
index e6bf78fac146..77086cf565ec 100644
--- a/arch/x86/kernel/cfi.c
+++ b/arch/x86/kernel/cfi.c
@@ -67,16 +67,30 @@ static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target,
*/
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
- unsigned long target;
+ unsigned long target, addr = regs->ip;
u32 type;
- if (!is_cfi_trap(regs->ip))
- return BUG_TRAP_TYPE_NONE;
+ switch (cfi_mode) {
+ case CFI_KCFI:
+ if (!is_cfi_trap(addr))
+ return BUG_TRAP_TYPE_NONE;
+
+ if (!decode_cfi_insn(regs, &target, &type))
+ return report_cfi_failure_noaddr(regs, addr);
+
+ break;
- if (!decode_cfi_insn(regs, &target, &type))
- return report_cfi_failure_noaddr(regs, regs->ip);
+ case CFI_FINEIBT:
+ if (!decode_fineibt_insn(regs, &target, &type))
+ return BUG_TRAP_TYPE_NONE;
+
+ break;
+
+ default:
+ return BUG_TRAP_TYPE_NONE;
+ }
- return report_cfi_failure(regs, regs->ip, &target, type);
+ return report_cfi_failure(regs, addr, &target, type);
}
/*
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 54194f5995de..79569f72b8ee 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -29,6 +29,8 @@
#include "cpu.h"
+u16 invlpgb_count_max __ro_after_init;
+
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -632,7 +634,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
* (model = 0x14) and later actually support it.
* (AMD Erratum #110, docId: 25759).
*/
- if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+ if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) {
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
if (!rdmsrl_amd_safe(0xc001100d, &value)) {
value &= ~BIT_64(32);
@@ -1073,6 +1075,10 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
+
+ /* Enable Translation Cache Extension */
+ if (cpu_has(c, X86_FEATURE_TCE))
+ msr_set_bit(MSR_EFER, _EFER_TCE);
}
#ifdef CONFIG_X86_32
@@ -1105,8 +1111,8 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
- tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
- tlb_lli_4k[ENTRIES] = ebx & mask;
+ tlb_lld_4k = (ebx >> 16) & mask;
+ tlb_lli_4k = ebx & mask;
/*
* K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
@@ -1119,26 +1125,30 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
if (!((eax >> 16) & mask))
- tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff;
+ tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff;
else
- tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+ tlb_lld_2m = (eax >> 16) & mask;
/* a 4M entry uses two 2M entries */
- tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+ tlb_lld_4m = tlb_lld_2m >> 1;
/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
if (!(eax & mask)) {
/* Erratum 658 */
if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
- tlb_lli_2m[ENTRIES] = 1024;
+ tlb_lli_2m = 1024;
} else {
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
- tlb_lli_2m[ENTRIES] = eax & 0xff;
+ tlb_lli_2m = eax & 0xff;
}
} else
- tlb_lli_2m[ENTRIES] = eax & mask;
+ tlb_lli_2m = eax & mask;
+
+ tlb_lli_4m = tlb_lli_2m >> 1;
- tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+ /* Max number of pages INVLPGB can invalidate in one shot */
+ if (cpu_has(c, X86_FEATURE_INVLPGB))
+ invlpgb_count_max = (cpuid_edx(0x80000008) & 0xffff) + 1;
}
static const struct cpu_dev amd_cpu_dev = {
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index a6c6bccfa8b8..b3a520959b51 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -8,21 +8,19 @@
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
-#include <linux/slab.h>
#include <linux/cacheinfo.h>
+#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
-#include <linux/sched.h>
-#include <linux/capability.h>
-#include <linux/sysfs.h>
#include <linux/pci.h>
#include <linux/stop_machine.h>
+#include <linux/sysfs.h>
-#include <asm/cpufeature.h>
-#include <asm/cacheinfo.h>
#include <asm/amd_nb.h>
-#include <asm/smp.h>
+#include <asm/cacheinfo.h>
+#include <asm/cpufeature.h>
#include <asm/mtrr.h>
+#include <asm/smp.h>
#include <asm/tlbflush.h>
#include "cpu.h"
@@ -31,7 +29,6 @@
#define LVL_1_DATA 2
#define LVL_2 3
#define LVL_3 4
-#define LVL_TRACE 5
/* Shared last level cache maps */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
@@ -96,10 +93,6 @@ static const struct _cache_table cache_table[] =
{ 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
{ 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
{ 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
- { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
- { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
- { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
{ 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
{ 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
{ 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
@@ -787,19 +780,13 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
}
}
}
- /*
- * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
- * trace cache
- */
- if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
+
+ /* Don't use CPUID(2) if CPUID(4) is supported. */
+ if (!ci->num_leaves && c->cpuid_level > 1) {
/* supports eax=2 call */
int j, n;
unsigned int regs[4];
unsigned char *dp = (unsigned char *)regs;
- int only_trace = 0;
-
- if (ci->num_leaves && c->x86 == 15)
- only_trace = 1;
/* Number of times to iterate */
n = cpuid_eax(2) & 0xFF;
@@ -820,8 +807,6 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
/* look up this descriptor in the table */
while (cache_table[k].descriptor != 0) {
if (cache_table[k].descriptor == des) {
- if (only_trace && cache_table[k].cache_type != LVL_TRACE)
- break;
switch (cache_table[k].cache_type) {
case LVL_1_INST:
l1i += cache_table[k].size;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7cce91b19fb2..73565168fc19 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -667,8 +667,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
if (!warn)
continue;
- pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
- x86_cap_flag(df->feature), df->level);
+ pr_warn("CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
+ x86_cap_flags[df->feature], df->level);
}
}
@@ -846,13 +846,13 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
c->x86_cache_size = l2size;
}
-u16 __read_mostly tlb_lli_4k[NR_INFO];
-u16 __read_mostly tlb_lli_2m[NR_INFO];
-u16 __read_mostly tlb_lli_4m[NR_INFO];
-u16 __read_mostly tlb_lld_4k[NR_INFO];
-u16 __read_mostly tlb_lld_2m[NR_INFO];
-u16 __read_mostly tlb_lld_4m[NR_INFO];
-u16 __read_mostly tlb_lld_1g[NR_INFO];
+u16 __read_mostly tlb_lli_4k;
+u16 __read_mostly tlb_lli_2m;
+u16 __read_mostly tlb_lli_4m;
+u16 __read_mostly tlb_lld_4k;
+u16 __read_mostly tlb_lld_2m;
+u16 __read_mostly tlb_lld_4m;
+u16 __read_mostly tlb_lld_1g;
static void cpu_detect_tlb(struct cpuinfo_x86 *c)
{
@@ -860,12 +860,10 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
this_cpu->c_detect_tlb(c);
pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
- tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
- tlb_lli_4m[ENTRIES]);
+ tlb_lli_4k, tlb_lli_2m, tlb_lli_4m);
pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
- tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES],
- tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
+ tlb_lld_4k, tlb_lld_2m, tlb_lld_4m, tlb_lld_1g);
}
void get_cpu_vendor(struct cpuinfo_x86 *c)
@@ -1164,7 +1162,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB),
- VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY),
+ VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID2,NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY),
VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
@@ -1205,6 +1203,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define VULNBL_INTEL_STEPS(vfm, max_stepping, issues) \
X86_MATCH_VFM_STEPS(vfm, X86_STEP_MIN, max_stepping, issues)
+#define VULNBL_INTEL_TYPE(vfm, cpu_type, issues) \
+ X86_MATCH_VFM_CPU_TYPE(vfm, INTEL_CPU_TYPE_##cpu_type, issues)
+
#define VULNBL_AMD(family, blacklist) \
VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
@@ -1253,9 +1254,9 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS),
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS),
@@ -1479,15 +1480,96 @@ static void detect_nopl(void)
#endif
}
+static inline bool parse_set_clear_cpuid(char *arg, bool set)
+{
+ char *opt;
+ int taint = 0;
+
+ while (arg) {
+ bool found __maybe_unused = false;
+ unsigned int bit;
+
+ opt = strsep(&arg, ",");
+
+ /*
+ * Handle naked numbers first for feature flags which don't
+ * have names. It doesn't make sense for a bug not to have a
+ * name so don't handle bug flags here.
+ */
+ if (!kstrtouint(opt, 10, &bit)) {
+ if (bit < NCAPINTS * 32) {
+
+ if (set) {
+ pr_warn("setcpuid: force-enabling CPU feature flag:");
+ setup_force_cpu_cap(bit);
+ } else {
+ pr_warn("clearcpuid: force-disabling CPU feature flag:");
+ setup_clear_cpu_cap(bit);
+ }
+ /* empty-string, i.e., ""-defined feature flags */
+ if (!x86_cap_flags[bit])
+ pr_cont(" %d:%d\n", bit >> 5, bit & 31);
+ else
+ pr_cont(" %s\n", x86_cap_flags[bit]);
+
+ taint++;
+ }
+ /*
+ * The assumption is that there are no feature names with only
+ * numbers in the name thus go to the next argument.
+ */
+ continue;
+ }
+
+ for (bit = 0; bit < 32 * (NCAPINTS + NBUGINTS); bit++) {
+ const char *flag;
+ const char *kind;
+
+ if (bit < 32 * NCAPINTS) {
+ flag = x86_cap_flags[bit];
+ kind = "feature";
+ } else {
+ kind = "bug";
+ flag = x86_bug_flags[bit - (32 * NCAPINTS)];
+ }
+
+ if (!flag)
+ continue;
+
+ if (strcmp(flag, opt))
+ continue;
+
+ if (set) {
+ pr_warn("setcpuid: force-enabling CPU %s flag: %s\n",
+ kind, flag);
+ setup_force_cpu_cap(bit);
+ } else {
+ pr_warn("clearcpuid: force-disabling CPU %s flag: %s\n",
+ kind, flag);
+ setup_clear_cpu_cap(bit);
+ }
+ taint++;
+ found = true;
+ break;
+ }
+
+ if (!found)
+ pr_warn("%s: unknown CPU flag: %s", set ? "setcpuid" : "clearcpuid", opt);
+ }
+
+ return taint;
+}
+
+
/*
* We parse cpu parameters early because fpu__init_system() is executed
* before parse_early_param().
*/
static void __init cpu_parse_early_param(void)
{
+ bool cpuid_taint = false;
char arg[128];
- char *argptr = arg, *opt;
- int arglen, taint = 0;
+ int arglen;
#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -1519,61 +1601,17 @@ static void __init cpu_parse_early_param(void)
setup_clear_cpu_cap(X86_FEATURE_FRED);
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
- if (arglen <= 0)
- return;
-
- pr_info("Clearing CPUID bits:");
-
- while (argptr) {
- bool found __maybe_unused = false;
- unsigned int bit;
-
- opt = strsep(&argptr, ",");
-
- /*
- * Handle naked numbers first for feature flags which don't
- * have names.
- */
- if (!kstrtouint(opt, 10, &bit)) {
- if (bit < NCAPINTS * 32) {
-
- /* empty-string, i.e., ""-defined feature flags */
- if (!x86_cap_flags[bit])
- pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit));
- else
- pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
-
- setup_clear_cpu_cap(bit);
- taint++;
- }
- /*
- * The assumption is that there are no feature names with only
- * numbers in the name thus go to the next argument.
- */
- continue;
- }
-
- for (bit = 0; bit < 32 * NCAPINTS; bit++) {
- if (!x86_cap_flag(bit))
- continue;
-
- if (strcmp(x86_cap_flag(bit), opt))
- continue;
+ if (arglen > 0)
+ cpuid_taint |= parse_set_clear_cpuid(arg, false);
- pr_cont(" %s", opt);
- setup_clear_cpu_cap(bit);
- taint++;
- found = true;
- break;
- }
+ arglen = cmdline_find_option(boot_command_line, "setcpuid", arg, sizeof(arg));
+ if (arglen > 0)
+ cpuid_taint |= parse_set_clear_cpuid(arg, true);
- if (!found)
- pr_cont(" (unknown: %s)", opt);
- }
- pr_cont("\n");
-
- if (taint)
+ if (cpuid_taint) {
+ pr_warn("!!! setcpuid=/clearcpuid= in use, this is for TESTING ONLY, may break things horribly. Tainting kernel.\n");
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ }
}
/*
@@ -1610,6 +1648,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
c->cpu_index = 0;
filter_cpuid_features(c, false);
+ check_cpufeature_deps(c);
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
@@ -1870,6 +1909,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Filter out anything that depends on CPUID levels we don't have */
filter_cpuid_features(c, true);
+ /* Check for unmet dependencies based on the CPUID dependency table */
+ check_cpufeature_deps(c);
+
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
const char *p;
@@ -1962,9 +2004,15 @@ static __init void identify_boot_cpu(void)
lkgs_init();
}
-void identify_secondary_cpu(struct cpuinfo_x86 *c)
+void identify_secondary_cpu(unsigned int cpu)
{
- BUG_ON(c == &boot_cpu_data);
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ /* Copy boot_cpu_data only on the first bringup */
+ if (!c->initialized)
+ *c = boot_cpu_data;
+ c->cpu_index = cpu;
+
identify_cpu(c);
#ifdef CONFIG_X86_32
enable_sep_cpu();
@@ -1975,6 +2023,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
update_gds_msr();
tsx_ap_init();
+ c->initialized = true;
}
void print_cpu_info(struct cpuinfo_x86 *c)
@@ -2005,27 +2054,40 @@ void print_cpu_info(struct cpuinfo_x86 *c)
}
/*
- * clearcpuid= was already parsed in cpu_parse_early_param(). This dummy
- * function prevents it from becoming an environment variable for init.
+ * clearcpuid= and setcpuid= were already parsed in cpu_parse_early_param().
+ * These dummy functions prevent them from becoming an environment variable for
+ * init.
*/
+
static __init int setup_clearcpuid(char *arg)
{
return 1;
}
__setup("clearcpuid=", setup_clearcpuid);
-DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
- .current_task = &init_task,
- .preempt_count = INIT_PREEMPT_COUNT,
- .top_of_stack = TOP_OF_INIT_STACK,
-};
-EXPORT_PER_CPU_SYMBOL(pcpu_hot);
-EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
+static __init int setup_setcpuid(char *arg)
+{
+ return 1;
+}
+__setup("setcpuid=", setup_setcpuid);
+
+DEFINE_PER_CPU_CACHE_HOT(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+EXPORT_PER_CPU_SYMBOL(const_current_task);
+
+DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
+
+DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
#ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
- fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
+/*
+ * Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+ * so that this space is reserved in the hot cache section even when the
+ * mitigation is disabled.
+ */
+DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+EXPORT_PER_CPU_SYMBOL(__x86_call_depth);
static void wrmsrl_cstar(unsigned long val)
{
@@ -2089,18 +2151,15 @@ void syscall_init(void)
if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_syscall_init();
}
-
-#else /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_64 */
#ifdef CONFIG_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+DEFINE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
#ifndef CONFIG_SMP
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif
-#endif /* CONFIG_X86_64 */
-
/*
* Clear all 6 debug registers:
*/
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 1beccefbaff9..51deb60a9d26 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -33,14 +33,6 @@ struct cpu_dev {
#endif
};
-struct _tlb_table {
- unsigned char descriptor;
- char tlb_type;
- unsigned int entries;
- /* unsigned int ways; */
- char info[128];
-};
-
#define cpu_dev_register(cpu_devX) \
static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
__section(".x86_cpu_dev.init") = \
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index df838e3bdbe0..a2fbea0be535 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -147,3 +147,38 @@ void setup_clear_cpu_cap(unsigned int feature)
{
do_clear_cpu_cap(NULL, feature);
}
+
+/*
+ * Return the feature "name" if available, otherwise return
+ * the X86_FEATURE_* numerals to make it easier to identify
+ * the feature.
+ */
+static const char *x86_feature_name(unsigned int feature, char *buf)
+{
+ if (x86_cap_flags[feature])
+ return x86_cap_flags[feature];
+
+ snprintf(buf, 16, "%d*32+%2d", feature / 32, feature % 32);
+
+ return buf;
+}
+
+void check_cpufeature_deps(struct cpuinfo_x86 *c)
+{
+ char feature_buf[16], depends_buf[16];
+ const struct cpuid_dep *d;
+
+ for (d = cpuid_deps; d->feature; d++) {
+ if (cpu_has(c, d->feature) && !cpu_has(c, d->depends)) {
+ /*
+ * Only warn about the first unmet dependency on the
+ * first CPU where it is encountered to avoid spamming
+ * the kernel log.
+ */
+ pr_warn_once("x86 CPU feature dependency check failure: CPU%d has '%s' enabled but '%s' disabled. Kernel might be fine, but no guarantees.\n",
+ smp_processor_id(),
+ x86_feature_name(d->feature, feature_buf),
+ x86_feature_name(d->depends, depends_buf));
+ }
+ }
+}
diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
index cacfd3f6abef..1976fef2dfe5 100644
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -16,8 +16,8 @@ static int cpu_debug_show(struct seq_file *m, void *p)
if (!c->initialized)
return 0;
- seq_printf(m, "initial_apicid: %x\n", c->topo.initial_apicid);
- seq_printf(m, "apicid: %x\n", c->topo.apicid);
+ seq_printf(m, "initial_apicid: 0x%x\n", c->topo.initial_apicid);
+ seq_printf(m, "apicid: 0x%x\n", c->topo.apicid);
seq_printf(m, "pkg_id: %u\n", c->topo.pkg_id);
seq_printf(m, "die_id: %u\n", c->topo.die_id);
seq_printf(m, "cu_id: %u\n", c->topo.cu_id);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index c5191b06f9f2..6af4a4a90a52 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -240,26 +240,26 @@ static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
- tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
- tlb_lli_4k[ENTRIES] = ebx & mask;
+ tlb_lld_4k = (ebx >> 16) & mask;
+ tlb_lli_4k = ebx & mask;
/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
if (!((eax >> 16) & mask))
- tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff;
+ tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff;
else
- tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+ tlb_lld_2m = (eax >> 16) & mask;
/* a 4M entry uses two 2M entries */
- tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+ tlb_lld_4m = tlb_lld_2m >> 1;
/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
if (!(eax & mask)) {
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
- tlb_lli_2m[ENTRIES] = eax & 0xff;
+ tlb_lli_2m = eax & 0xff;
} else
- tlb_lli_2m[ENTRIES] = eax & mask;
+ tlb_lli_2m = eax & mask;
- tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+ tlb_lli_4m = tlb_lli_2m >> 1;
}
static const struct cpu_dev hygon_cpu_dev = {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 134368a3f4b1..4cbb2e69bea1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1,40 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/pgtable.h>
-#include <linux/string.h>
#include <linux/bitops.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/sched/clock.h>
-#include <linux/thread_info.h>
#include <linux/init.h>
-#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/minmax.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+
+#ifdef CONFIG_X86_64
+#include <linux/topology.h>
+#endif
-#include <asm/cpufeature.h>
-#include <asm/msr.h>
#include <asm/bugs.h>
+#include <asm/cpu_device_id.h>
+#include <asm/cpufeature.h>
#include <asm/cpu.h>
+#include <asm/hwcap2.h>
#include <asm/intel-family.h>
#include <asm/microcode.h>
-#include <asm/hwcap2.h>
-#include <asm/elf.h>
-#include <asm/cpu_device_id.h>
-#include <asm/resctrl.h>
+#include <asm/msr.h>
#include <asm/numa.h>
+#include <asm/resctrl.h>
#include <asm/thermal.h>
-
-#ifdef CONFIG_X86_64
-#include <linux/topology.h>
-#endif
+#include <asm/uaccess.h>
#include "cpu.h"
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#endif
-
/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
@@ -195,7 +186,7 @@ void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return;
- if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
+ if (c->x86_vfm < INTEL_PENTIUM_M_DOTHAN)
return;
/*
@@ -210,10 +201,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
c->microcode = intel_get_microcode_revision();
@@ -256,8 +243,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
#endif
/* CPUID workaround for 0F33/0F34 CPU */
- if (c->x86 == 0xF && c->x86_model == 0x3
- && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
+ if (c->x86_vfm == INTEL_P4_PRESCOTT &&
+ (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
c->x86_phys_bits = 36;
/*
@@ -266,10 +253,16 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*
* It is also reliable across cores and sockets. (but not across
* cabinets - we turn it off in that case explicitly.)
+ *
+ * Use a model-specific check for some older CPUs that have invariant
+ * TSC but may not report it architecturally via 8000_0007.
*/
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_WILLAMETTE) ||
+ (c->x86_vfm >= INTEL_CORE_YONAH && c->x86_vfm <= INTEL_IVYBRIDGE)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
@@ -298,12 +291,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_PAT);
/*
- * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
- * clear the fast string and enhanced fast string CPU capabilities.
+ * Modern CPUs are generally expected to have a sane fast string
+ * implementation. However, BIOSes typically have a knob to tweak
+ * the architectural MISC_ENABLE.FAST_STRING enable bit.
+ *
+ * Adhere to the preference and program the Linux-defined fast
+ * string flag and enhanced fast string capabilities accordingly.
*/
- if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+ if (c->x86_vfm >= INTEL_PENTIUM_M_DOTHAN) {
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
- if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
+ if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
+ /* X86_FEATURE_ERMS is set based on CPUID */
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ } else {
pr_info("Disabled fast string operations\n");
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
setup_clear_cpu_cap(X86_FEATURE_ERMS);
@@ -350,9 +350,7 @@ static void bsp_init_intel(struct cpuinfo_x86 *c)
int ppro_with_ram_bug(void)
{
/* Uses data from early_cpu_detect now */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
- boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model == 1 &&
+ if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO &&
boot_cpu_data.x86_stepping < 8) {
pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
@@ -369,9 +367,8 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
/*
* Mask B, Pentium, but not Pentium MMX
*/
- if (c->x86 == 5 &&
- c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
- c->x86_model <= 3) {
+ if (c->x86_vfm >= INTEL_FAM5_START && c->x86_vfm < INTEL_PENTIUM_MMX &&
+ c->x86_stepping >= 1 && c->x86_stepping <= 4) {
/*
* Remember we have B step Pentia with bugs
*/
@@ -398,7 +395,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* The Quark is also family 5, but does not have the same bug.
*/
clear_cpu_bug(c, X86_BUG_F00F);
- if (c->x86 == 5 && c->x86_model < 9) {
+ if (c->x86_vfm >= INTEL_FAM5_START && c->x86_vfm < INTEL_QUARK_X1000) {
static int f00f_workaround_enabled;
set_cpu_bug(c, X86_BUG_F00F);
@@ -413,7 +410,8 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
* model 3 mask 3
*/
- if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
+ if ((c->x86_vfm == INTEL_PENTIUM_II_KLAMATH && c->x86_stepping < 3) ||
+ c->x86_vfm < INTEL_PENTIUM_II_KLAMATH)
clear_cpu_cap(c, X86_FEATURE_SEP);
/*
@@ -431,7 +429,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* P4 Xeon erratum 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
+ if (c->x86_vfm == INTEL_P4_WILLAMETTE && c->x86_stepping == 1) {
if (msr_set_bit(MSR_IA32_MISC_ENABLE,
MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -445,27 +443,20 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
- if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ if (boot_cpu_has(X86_FEATURE_APIC) && c->x86_vfm == INTEL_PENTIUM_75 &&
(c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
-
#ifdef CONFIG_X86_INTEL_USERCOPY
/*
- * Set up the preferred alignment for movsl bulk memory moves
+ * MOVSL bulk memory moves can be slow when source and dest are not
+ * both 8-byte aligned. PII/PIII only like MOVSL with 8-byte alignment.
+ *
+ * Set the preferred alignment for Pentium Pro and newer processors, as
+ * it has only been tested on these.
*/
- switch (c->x86) {
- case 4: /* 486: untested */
- break;
- case 5: /* Old Pentia: untested */
- break;
- case 6: /* PII/PIII only like movsl with 8-byte alignment */
+ if (c->x86_vfm >= INTEL_PENTIUM_PRO)
movsl_mask.mask = 7;
- break;
- case 15: /* P4 is OK down to 8-byte alignment */
- movsl_mask.mask = 7;
- break;
- }
#endif
intel_smp_check(c);
@@ -563,8 +554,6 @@ static void init_intel(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
- if (c->x86 == 6)
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
#else
/*
* Names for the Pentium II/Celeron processors
@@ -622,14 +611,14 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
* to determine which, so we use a boottime override
* for the 512kb model, and assume 256 otherwise.
*/
- if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
+ if (c->x86_vfm == INTEL_PENTIUM_III_TUALATIN && size == 0)
size = 256;
/*
* Intel Quark SoC X1000 contains a 4-way set associative
* 16K cache with a 16 byte cache line and 256 lines per tag
*/
- if ((c->x86 == 5) && (c->x86_model == 9))
+ if (c->x86_vfm == INTEL_QUARK_X1000)
size = 16;
return size;
}
@@ -667,50 +656,58 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
*/
#define TLB_0x63_2M_4M_ENTRIES 32
+struct _tlb_table {
+ unsigned char descriptor;
+ char tlb_type;
+ unsigned int entries;
+};
+
static const struct _tlb_table intel_tlb_table[] = {
- { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
- { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
- { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
- { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
- { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
- { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
- { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" },
- { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
- { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
- { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
- { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
- { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
- { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
- { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
- { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
- { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
- { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
- { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
- { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
- { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative"
- " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" },
- { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" },
- { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
- { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" },
- { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
- { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
- { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
- { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
- { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
- { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
- { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" },
- { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" },
- { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
- { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
- { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
- { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
- { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
+ { 0x01, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages, 4-way set associative */
+ { 0x02, TLB_INST_4M, 2}, /* TLB_INST 4 MByte pages, full associative */
+ { 0x03, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way set associative */
+ { 0x04, TLB_DATA_4M, 8}, /* TLB_DATA 4 MByte pages, 4-way set associative */
+ { 0x05, TLB_DATA_4M, 32}, /* TLB_DATA 4 MByte pages, 4-way set associative */
+ { 0x0b, TLB_INST_4M, 4}, /* TLB_INST 4 MByte pages, 4-way set associative */
+ { 0x4f, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages */
+ { 0x50, TLB_INST_ALL, 64}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
+ { 0x51, TLB_INST_ALL, 128}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
+ { 0x52, TLB_INST_ALL, 256}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
+ { 0x55, TLB_INST_2M_4M, 7}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */
+ { 0x56, TLB_DATA0_4M, 16}, /* TLB_DATA0 4 MByte pages, 4-way set associative */
+ { 0x57, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, 4-way associative */
+ { 0x59, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, fully associative */
+ { 0x5a, TLB_DATA0_2M_4M, 32}, /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */
+ { 0x5b, TLB_DATA_4K_4M, 64}, /* TLB_DATA 4 KByte and 4 MByte pages */
+ { 0x5c, TLB_DATA_4K_4M, 128}, /* TLB_DATA 4 KByte and 4 MByte pages */
+ { 0x5d, TLB_DATA_4K_4M, 256}, /* TLB_DATA 4 KByte and 4 MByte pages */
+ { 0x61, TLB_INST_4K, 48}, /* TLB_INST 4 KByte pages, full associative */
+ { 0x63, TLB_DATA_1G_2M_4M, 4}, /* TLB_DATA 1 GByte pages, 4-way set associative
+ * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */
+ { 0x6b, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 8-way associative */
+ { 0x6c, TLB_DATA_2M_4M, 128}, /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */
+ { 0x6d, TLB_DATA_1G, 16}, /* TLB_DATA 1 GByte pages, fully associative */
+ { 0x76, TLB_INST_2M_4M, 8}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */
+ { 0xb0, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 4-way set associative */
+ { 0xb1, TLB_INST_2M_4M, 4}, /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */
+ { 0xb2, TLB_INST_4K, 64}, /* TLB_INST 4KByte pages, 4-way set associative */
+ { 0xb3, TLB_DATA_4K, 128}, /* TLB_DATA 4 KByte pages, 4-way set associative */
+ { 0xb4, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 4-way associative */
+ { 0xb5, TLB_INST_4K, 64}, /* TLB_INST 4 KByte pages, 8-way set associative */
+ { 0xb6, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 8-way set associative */
+ { 0xba, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way associative */
+ { 0xc0, TLB_DATA_4K_4M, 8}, /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */
+ { 0xc1, STLB_4K_2M, 1024}, /* STLB 4 KByte and 2 MByte pages, 8-way associative */
+ { 0xc2, TLB_DATA_2M_4M, 16}, /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */
+ { 0xca, STLB_4K, 512}, /* STLB 4 KByte pages, 4-way associative */
{ 0x00, 0, 0 }
};
static void intel_tlb_lookup(const unsigned char desc)
{
+ unsigned int entries;
unsigned char k;
+
if (desc == 0)
return;
@@ -722,81 +719,58 @@ static void intel_tlb_lookup(const unsigned char desc)
if (intel_tlb_table[k].tlb_type == 0)
return;
+ entries = intel_tlb_table[k].entries;
switch (intel_tlb_table[k].tlb_type) {
case STLB_4K:
- if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lli_4k = max(tlb_lli_4k, entries);
+ tlb_lld_4k = max(tlb_lld_4k, entries);
break;
case STLB_4K_2M:
- if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lli_4k = max(tlb_lli_4k, entries);
+ tlb_lld_4k = max(tlb_lld_4k, entries);
+ tlb_lli_2m = max(tlb_lli_2m, entries);
+ tlb_lld_2m = max(tlb_lld_2m, entries);
+ tlb_lli_4m = max(tlb_lli_4m, entries);
+ tlb_lld_4m = max(tlb_lld_4m, entries);
break;
case TLB_INST_ALL:
- if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lli_4k = max(tlb_lli_4k, entries);
+ tlb_lli_2m = max(tlb_lli_2m, entries);
+ tlb_lli_4m = max(tlb_lli_4m, entries);
break;
case TLB_INST_4K:
- if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lli_4k = max(tlb_lli_4k, entries);
break;
case TLB_INST_4M:
- if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lli_4m = max(tlb_lli_4m, entries);
break;
case TLB_INST_2M_4M:
- if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lli_2m = max(tlb_lli_2m, entries);
+ tlb_lli_4m = max(tlb_lli_4m, entries);
break;
case TLB_DATA_4K:
case TLB_DATA0_4K:
- if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lld_4k = max(tlb_lld_4k, entries);
break;
case TLB_DATA_4M:
case TLB_DATA0_4M:
- if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lld_4m = max(tlb_lld_4m, entries);
break;
case TLB_DATA_2M_4M:
case TLB_DATA0_2M_4M:
- if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lld_2m = max(tlb_lld_2m, entries);
+ tlb_lld_4m = max(tlb_lld_4m, entries);
break;
case TLB_DATA_4K_4M:
- if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
- if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lld_4k = max(tlb_lld_4k, entries);
+ tlb_lld_4m = max(tlb_lld_4m, entries);
break;
case TLB_DATA_1G_2M_4M:
- if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES)
- tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES;
- if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES)
- tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES;
+ tlb_lld_2m = max(tlb_lld_2m, TLB_0x63_2M_4M_ENTRIES);
+ tlb_lld_4m = max(tlb_lld_4m, TLB_0x63_2M_4M_ENTRIES);
fallthrough;
case TLB_DATA_1G:
- if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
- tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
+ tlb_lld_1g = max(tlb_lld_1g, entries);
break;
}
}
@@ -891,34 +865,3 @@ static const struct cpu_dev intel_cpu_dev = {
};
cpu_dev_register(intel_cpu_dev);
-
-#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24
-
-/**
- * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU
- *
- * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in
- * a hybrid processor. If the processor is not hybrid, returns 0.
- */
-u8 get_this_hybrid_cpu_type(void)
-{
- if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
- return 0;
-
- return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
-}
-
-/**
- * get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU
- *
- * Returns the uarch native ID [23:0] of a CPU in a hybrid processor.
- * If the processor is not hybrid, returns 0.
- */
-u32 get_this_hybrid_cpu_native_id(void)
-{
- if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
- return 0;
-
- return cpuid_eax(0x0000001a) &
- (BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1);
-}
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 4f3c65429f82..6af1e8baeb0f 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -6,6 +6,34 @@
#include <linux/slab.h>
/**
+ * x86_match_vendor_cpu_type - helper function to match the hardware defined
+ * cpu-type for a single entry in the x86_cpu_id
+ * table. Note, this function does not match the
+ * generic cpu-types TOPO_CPU_TYPE_EFFICIENCY and
+ * TOPO_CPU_TYPE_PERFORMANCE.
+ * @c: Pointer to the cpuinfo_x86 structure of the CPU to match.
+ * @m: Pointer to the x86_cpu_id entry to match against.
+ *
+ * Return: true if the cpu-type matches, false otherwise.
+ */
+static bool x86_match_vendor_cpu_type(struct cpuinfo_x86 *c, const struct x86_cpu_id *m)
+{
+ if (m->type == X86_CPU_TYPE_ANY)
+ return true;
+
+ /* Hybrid CPUs are special, they are assumed to match all cpu-types */
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+ return true;
+
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ return m->type == c->topo.intel_type;
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ return m->type == c->topo.amd_type;
+
+ return false;
+}
+
+/**
* x86_match_cpu - match current CPU against an array of x86_cpu_ids
* @match: Pointer to array of x86_cpu_ids. Last entry terminated with
* {}.
@@ -50,6 +78,8 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
continue;
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
continue;
+ if (!x86_match_vendor_cpu_type(c, m))
+ continue;
return m;
}
return NULL;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f3d534807d91..819199bc0119 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -74,7 +74,7 @@ void intel_collect_cpu_info(struct cpu_signature *sig)
sig->pf = 0;
sig->rev = intel_get_microcode_revision();
- if (x86_model(sig->sig) >= 5 || x86_family(sig->sig) > 6) {
+ if (IFM(x86_family(sig->sig), x86_model(sig->sig)) >= INTEL_PENTIUM_III_DESCHUTES) {
unsigned int val[2];
/* get processor flags from MSR 0x17 */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 2fdfda2b60e4..e2c6b471d230 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -9,9 +9,11 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/cc_platform.h>
+#include <linux/string_choices.h>
#include <asm/processor-flags.h>
#include <asm/cacheinfo.h>
#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
#include <asm/hypervisor.h>
#include <asm/mshyperv.h>
#include <asm/tlbflush.h>
@@ -646,10 +648,10 @@ static void __init print_mtrr_state(void)
pr_info("MTRR default type: %s\n",
mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
- pr_info("MTRR fixed ranges %sabled:\n",
- ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
- (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ?
- "en" : "dis");
+ pr_info("MTRR fixed ranges %s:\n",
+ str_enabled_disabled(
+ (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
+ (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)));
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
print_fixed(0x80000 + i * 0x20000, 0x04000,
@@ -661,8 +663,8 @@ static void __init print_mtrr_state(void)
/* tail */
print_fixed_last();
}
- pr_info("MTRR variable ranges %sabled:\n",
- mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
+ pr_info("MTRR variable ranges %s:\n",
+ str_enabled_disabled(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED));
high_width = (boot_cpu_data.x86_phys_bits - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
@@ -1025,8 +1027,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
* For Intel PPro stepping <= 7
* must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
*/
- if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model == 1 &&
+ if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO &&
boot_cpu_data.x86_stepping <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index a5c506f6da7f..4049235b1bfe 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -99,7 +99,6 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
char *ptr;
char line[LINE_SIZE];
int length;
- size_t linelen;
memset(line, 0, LINE_SIZE);
@@ -108,9 +107,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
if (length < 0)
return length;
- linelen = strlen(line);
- ptr = line + linelen - 1;
- if (linelen && *ptr == '\n')
+ ptr = line + length - 1;
+ if (length && *ptr == '\n')
*ptr = '\0';
if (!strncmp(line, "disable=", 8)) {
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 340af8155658..0be61c45400c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -140,7 +140,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
x86_platform.guest.enc_kexec_begin();
x86_platform.guest.enc_kexec_finish();
- crash_save_cpu(regs, safe_smp_processor_id());
+ crash_save_cpu(regs, smp_processor_id());
}
#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b4905d5173fd..722fd712e1cf 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
+ unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
@@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr);
+ unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index f05339fee778..6c5defd6569a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
+ unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin;
/*
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 44f937015e1e..fc1714bad045 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -19,6 +19,7 @@
#include <linux/usb/ehci_def.h>
#include <linux/usb/xhci-dbgp.h>
#include <asm/pci_x86.h>
+#include <linux/static_call.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@@ -94,26 +95,28 @@ static unsigned long early_serial_base = 0x3f8; /* ttyS0 */
#define DLL 0 /* Divisor Latch Low */
#define DLH 1 /* Divisor latch High */
-static unsigned int io_serial_in(unsigned long addr, int offset)
+static __noendbr unsigned int io_serial_in(unsigned long addr, int offset)
{
return inb(addr + offset);
}
+ANNOTATE_NOENDBR_SYM(io_serial_in);
-static void io_serial_out(unsigned long addr, int offset, int value)
+static __noendbr void io_serial_out(unsigned long addr, int offset, int value)
{
outb(value, addr + offset);
}
+ANNOTATE_NOENDBR_SYM(io_serial_out);
-static unsigned int (*serial_in)(unsigned long addr, int offset) = io_serial_in;
-static void (*serial_out)(unsigned long addr, int offset, int value) = io_serial_out;
+DEFINE_STATIC_CALL(serial_in, io_serial_in);
+DEFINE_STATIC_CALL(serial_out, io_serial_out);
static int early_serial_putc(unsigned char ch)
{
unsigned timeout = 0xffff;
- while ((serial_in(early_serial_base, LSR) & XMTRDY) == 0 && --timeout)
+ while ((static_call(serial_in)(early_serial_base, LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
- serial_out(early_serial_base, TXR, ch);
+ static_call(serial_out)(early_serial_base, TXR, ch);
return timeout ? 0 : -1;
}
@@ -131,16 +134,16 @@ static __init void early_serial_hw_init(unsigned divisor)
{
unsigned char c;
- serial_out(early_serial_base, LCR, 0x3); /* 8n1 */
- serial_out(early_serial_base, IER, 0); /* no interrupt */
- serial_out(early_serial_base, FCR, 0); /* no fifo */
- serial_out(early_serial_base, MCR, 0x3); /* DTR + RTS */
+ static_call(serial_out)(early_serial_base, LCR, 0x3); /* 8n1 */
+ static_call(serial_out)(early_serial_base, IER, 0); /* no interrupt */
+ static_call(serial_out)(early_serial_base, FCR, 0); /* no fifo */
+ static_call(serial_out)(early_serial_base, MCR, 0x3); /* DTR + RTS */
- c = serial_in(early_serial_base, LCR);
- serial_out(early_serial_base, LCR, c | DLAB);
- serial_out(early_serial_base, DLL, divisor & 0xff);
- serial_out(early_serial_base, DLH, (divisor >> 8) & 0xff);
- serial_out(early_serial_base, LCR, c & ~DLAB);
+ c = static_call(serial_in)(early_serial_base, LCR);
+ static_call(serial_out)(early_serial_base, LCR, c | DLAB);
+ static_call(serial_out)(early_serial_base, DLL, divisor & 0xff);
+ static_call(serial_out)(early_serial_base, DLH, (divisor >> 8) & 0xff);
+ static_call(serial_out)(early_serial_base, LCR, c & ~DLAB);
}
#define DEFAULT_BAUD 9600
@@ -183,28 +186,26 @@ static __init void early_serial_init(char *s)
/* Convert from baud to divisor value */
divisor = 115200 / baud;
- /* These will always be IO based ports */
- serial_in = io_serial_in;
- serial_out = io_serial_out;
-
/* Set up the HW */
early_serial_hw_init(divisor);
}
#ifdef CONFIG_PCI
-static void mem32_serial_out(unsigned long addr, int offset, int value)
+static __noendbr void mem32_serial_out(unsigned long addr, int offset, int value)
{
u32 __iomem *vaddr = (u32 __iomem *)addr;
/* shift implied by pointer type */
writel(value, vaddr + offset);
}
+ANNOTATE_NOENDBR_SYM(mem32_serial_out);
-static unsigned int mem32_serial_in(unsigned long addr, int offset)
+static __noendbr unsigned int mem32_serial_in(unsigned long addr, int offset)
{
u32 __iomem *vaddr = (u32 __iomem *)addr;
/* shift implied by pointer type */
return readl(vaddr + offset);
}
+ANNOTATE_NOENDBR_SYM(mem32_serial_in);
/*
* early_pci_serial_init()
@@ -278,15 +279,13 @@ static __init void early_pci_serial_init(char *s)
*/
if ((bar0 & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
/* it is IO mapped */
- serial_in = io_serial_in;
- serial_out = io_serial_out;
early_serial_base = bar0 & PCI_BASE_ADDRESS_IO_MASK;
write_pci_config(bus, slot, func, PCI_COMMAND,
cmdreg|PCI_COMMAND_IO);
} else {
/* It is memory mapped - assume 32-bit alignment */
- serial_in = mem32_serial_in;
- serial_out = mem32_serial_out;
+ static_call_update(serial_in, mem32_serial_in);
+ static_call_update(serial_out, mem32_serial_out);
/* WARNING! assuming the address is always in the first 4G */
early_serial_base =
(unsigned long)early_ioremap(bar0 & PCI_BASE_ADDRESS_MEM_MASK, 0x10);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 166bc0ea3bdf..cace6e8d7cc7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -118,13 +118,10 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code,
return ret;
/* replace the text with the new text */
- if (ftrace_poke_late) {
+ if (ftrace_poke_late)
text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
- } else {
- mutex_lock(&text_mutex);
- text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE);
- mutex_unlock(&text_mutex);
- }
+ else
+ text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
return 0;
}
@@ -321,7 +318,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
union ftrace_op_code_union op_ptr;
- void *ret;
+ int ret;
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
start_offset = (unsigned long)ftrace_regs_caller;
@@ -352,15 +349,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
/* Copy ftrace_caller onto the trampoline memory */
- ret = text_poke_copy(trampoline, (void *)start_offset, size);
- if (WARN_ON(!ret))
+ ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size);
+ if (WARN_ON(ret < 0))
goto fail;
ip = trampoline + size;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
- text_poke_copy(ip, retq, sizeof(retq));
+ memcpy(ip, retq, sizeof(retq));
/* No need to test direct calls on created trampolines */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
@@ -368,7 +365,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + (jmp_offset - start_offset);
if (WARN_ON(*(char *)ip != 0x75))
goto fail;
- if (!text_poke_copy(ip, x86_nops[2], 2))
+ ret = copy_from_kernel_nofault(ip, x86_nops[2], 2);
+ if (ret < 0)
goto fail;
}
@@ -381,7 +379,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
*/
ptr = (unsigned long *)(trampoline + size + RET_SIZE);
- text_poke_copy(ptr, &ops, sizeof(unsigned long));
+ *ptr = (unsigned long)ops;
op_offset -= start_offset;
memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
@@ -397,7 +395,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_ptr.offset = offset;
/* put in the new offset to the ftrace_ops */
- text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
+ memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
/* put in the call to the function */
mutex_lock(&text_mutex);
@@ -407,9 +405,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
* the depth accounting before the call already.
*/
dest = ftrace_ops_get_func(ops);
- text_poke_copy_locked(trampoline + call_offset,
- text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
- CALL_INSN_SIZE, false);
+ memcpy(trampoline + call_offset,
+ text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
+ CALL_INSN_SIZE);
mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index d51647228596..367da3638167 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -146,12 +146,14 @@ SYM_FUNC_END(ftrace_stub_graph)
#ifdef CONFIG_DYNAMIC_FTRACE
SYM_FUNC_START(__fentry__)
+ ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
SYM_FUNC_START(ftrace_caller)
+ ANNOTATE_NOENDBR
/* save_mcount_regs fills in first two parameters */
save_mcount_regs
@@ -197,6 +199,7 @@ SYM_FUNC_END(ftrace_caller);
STACK_FRAME_NON_STANDARD_FP(ftrace_caller)
SYM_FUNC_START(ftrace_regs_caller)
+ ANNOTATE_NOENDBR
/* Save the current flags before any operations that can change them */
pushfq
@@ -310,6 +313,7 @@ SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
SYM_FUNC_START(ftrace_stub_direct_tramp)
+ ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub_direct_tramp)
@@ -317,6 +321,7 @@ SYM_FUNC_END(ftrace_stub_direct_tramp)
#else /* ! CONFIG_DYNAMIC_FTRACE */
SYM_FUNC_START(__fentry__)
+ ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
cmpq $ftrace_stub, ftrace_trace_function
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 368157a7f6d2..fa9b6339975f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -565,7 +565,7 @@ void early_setup_idt(void)
*/
void __head startup_64_setup_gdt_idt(void)
{
- struct desc_struct *gdt = (void *)(__force unsigned long)init_per_cpu_var(gdt_page.gdt);
+ struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt;
void *handler = NULL;
struct desc_ptr startup_gdt_descr = {
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 31345e0ba006..fefe2a25cf02 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Set up the stack for verify_cpu() */
leaq __top_init_kernel_stack(%rip), %rsp
- /* Setup GSBASE to allow stack canary access for C code */
+ /*
+ * Set up GSBASE.
+ * Note that on SMP the boot CPU uses the init data section until
+ * the per-CPU areas are set up.
+ */
movl $MSR_GS_BASE, %ecx
- leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
- movl %edx, %eax
- shrq $32, %rdx
+ xorl %eax, %eax
+ xorl %edx, %edx
wrmsr
call startup_64_setup_gdt_idt
@@ -319,7 +322,7 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
*
* RDX contains the per-cpu offset
*/
- movq pcpu_hot + X86_current_task(%rdx), %rax
+ movq current_task(%rdx), %rax
movq TASK_threadsp(%rax), %rsp
/*
@@ -359,17 +362,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
movl %eax,%fs
movl %eax,%gs
- /* Set up %gs.
- *
- * The base of %gs always points to fixed_percpu_data. If the
- * stack protector canary is enabled, it is located at %gs:40.
+ /*
+ * Set up GSBASE.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
-#ifndef CONFIG_SMP
- leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
-#endif
movl %edx, %eax
shrq $32, %rdx
wrmsr
@@ -435,7 +433,7 @@ SYM_CODE_START(soft_restart_cpu)
UNWIND_HINT_END_OF_STACK
/* Find the idle task stack */
- movq PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx
+ movq PER_CPU_VAR(current_task), %rcx
movq TASK_threadsp(%rcx), %rsp
jmp .Ljump_to_C_code
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index e2fab3ceb09f..6290dd120f5e 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -144,7 +144,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
* Update the sequence number to force a TSS update on return to
* user mode.
*/
- iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence);
+ iobm->sequence = atomic64_inc_return(&io_bitmap_sequence);
return 0;
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index feca4f20b06a..81f9b78e0f7b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -33,6 +33,11 @@
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
+DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
+EXPORT_PER_CPU_SYMBOL(__softirq_pending);
+
+DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
+
atomic_t irq_err_count;
/*
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index dc1049c01f9b..c7a5d2960d57 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -29,12 +29,9 @@
int sysctl_panic_on_stackoverflow __read_mostly;
/* Debugging check for stack overflow: is there less than 1KB free? */
-static int check_stack_overflow(void)
+static bool check_stack_overflow(void)
{
- long sp;
-
- __asm__ __volatile__("andl %%esp,%0" :
- "=r" (sp) : "0" (THREAD_SIZE - 1));
+ unsigned long sp = current_stack_pointer & (THREAD_SIZE - 1);
return sp < (sizeof(struct thread_info) + STACK_WARN);
}
@@ -48,18 +45,19 @@ static void print_stack_overflow(void)
}
#else
-static inline int check_stack_overflow(void) { return 0; }
+static inline bool check_stack_overflow(void) { return false; }
static inline void print_stack_overflow(void) { }
#endif
+DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr);
+
static void call_on_stack(void *func, void *stack)
{
- asm volatile("xchgl %%ebx,%%esp \n"
+ asm volatile("xchgl %[sp], %%esp\n"
CALL_NOSPEC
- "movl %%ebx,%%esp \n"
- : "=b" (stack)
- : "0" (stack),
- [thunk_target] "D"(func)
+ "movl %[sp], %%esp"
+ : [sp] "+b" (stack)
+ : [thunk_target] "D" (func)
: "memory", "cc", "edx", "ecx", "eax");
}
@@ -68,13 +66,13 @@ static inline void *current_stack(void)
return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
-static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
+static inline bool execute_on_irq_stack(bool overflow, struct irq_desc *desc)
{
struct irq_stack *curstk, *irqstk;
- u32 *isp, *prev_esp, arg1;
+ u32 *isp, *prev_esp;
curstk = (struct irq_stack *) current_stack();
- irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr);
+ irqstk = __this_cpu_read(hardirq_stack_ptr);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -83,7 +81,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
* current stack (which is the irq stack already after all)
*/
if (unlikely(curstk == irqstk))
- return 0;
+ return false;
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
@@ -94,14 +92,13 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
- asm volatile("xchgl %%ebx,%%esp \n"
+ asm volatile("xchgl %[sp], %%esp\n"
CALL_NOSPEC
- "movl %%ebx,%%esp \n"
- : "=a" (arg1), "=b" (isp)
- : "0" (desc), "1" (isp),
- [thunk_target] "D" (desc->handle_irq)
- : "memory", "cc", "ecx");
- return 1;
+ "movl %[sp], %%esp"
+ : "+a" (desc), [sp] "+b" (isp)
+ : [thunk_target] "D" (desc->handle_irq)
+ : "memory", "cc", "edx", "ecx");
+ return true;
}
/*
@@ -112,7 +109,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
int node = cpu_to_node(cpu);
struct page *ph, *ps;
- if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
+ if (per_cpu(hardirq_stack_ptr, cpu))
return 0;
ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
@@ -124,8 +121,8 @@ int irq_init_percpu_irqstack(unsigned int cpu)
return -ENOMEM;
}
- per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph);
- per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps);
+ per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
+ per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
return 0;
}
@@ -135,7 +132,7 @@ void do_softirq_own_stack(void)
struct irq_stack *irqstk;
u32 *isp, *prev_esp;
- irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr);
+ irqstk = __this_cpu_read(softirq_stack_ptr);
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
@@ -150,7 +147,7 @@ void do_softirq_own_stack(void)
void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
- int overflow = check_stack_overflow();
+ bool overflow = check_stack_overflow();
if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) {
if (unlikely(overflow))
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index ade0043ce56e..ca78dce39361 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,8 +26,8 @@
#include <asm/io_apic.h>
#include <asm/apic.h>
+DEFINE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse);
DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_backing_store);
#ifdef CONFIG_VMAP_STACK
/*
@@ -51,7 +51,7 @@ static int map_irq_stack(unsigned int cpu)
return -ENOMEM;
/* Store actual TOS to avoid adjustment in the hotpath */
- per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
+ per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0;
}
#else
@@ -64,14 +64,14 @@ static int map_irq_stack(unsigned int cpu)
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
/* Store actual TOS to avoid adjustment in the hotpath */
- per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
+ per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0;
}
#endif
int irq_init_percpu_irqstack(unsigned int cpu)
{
- if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
+ if (per_cpu(hardirq_stack_ptr, cpu))
return 0;
return map_irq_stack(cpu);
}
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index 7f542a7799cb..fdabd5dda154 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -9,6 +9,7 @@
*/
.pushsection .noinstr.text, "ax"
SYM_FUNC_START(native_save_fl)
+ ENDBR
pushf
pop %_ASM_AX
RET
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 72e6a45e7ec2..09608fd93687 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -373,16 +373,7 @@ out:
kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
bool *on_func_entry)
{
- u32 insn;
-
- /*
- * Since 'addr' is not guaranteed to be safe to access, use
- * copy_from_kernel_nofault() to read the instruction:
- */
- if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
- return NULL;
-
- if (is_endbr(insn)) {
+ if (is_endbr((u32 *)addr)) {
*on_func_entry = !offset || offset == 4;
if (*on_func_entry)
offset = 4;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7a422a6c5983..3be9b3342c67 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -838,7 +838,6 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
if (pv_tlb_flush_supported()) {
pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
- pv_ops.mmu.tlb_remove_table = tlb_remove_table;
pr_info("KVM setup pv remote TLB flush\n");
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 8984abd91c00..a7998f351701 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -19,6 +19,7 @@
#include <linux/jump_label.h>
#include <linux/random.h>
#include <linux/memory.h>
+#include <linux/stackprotector.h>
#include <asm/text-patching.h>
#include <asm/page.h>
@@ -130,6 +131,20 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs,
goto overflow;
size = 4;
break;
+#if defined(CONFIG_STACKPROTECTOR) && \
+ defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
+ case R_X86_64_REX_GOTPCRELX: {
+ static unsigned long __percpu *const addr = &__stack_chk_guard;
+
+ if (sym->st_value != (u64)addr) {
+ pr_err("%s: Unsupported GOTPCREL relocation\n", me->name);
+ return -ENOEXEC;
+ }
+
+ val = (u64)&addr + rel[i].r_addend;
+ fallthrough;
+ }
+#endif
case R_X86_64_PC32:
case R_X86_64_PLT32:
val -= (u64)loc;
@@ -146,21 +161,18 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs,
}
if (apply) {
- void *wr_loc = module_writable_address(me, loc);
-
- if (memcmp(wr_loc, &zero, size)) {
+ if (memcmp(loc, &zero, size)) {
pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
- write(wr_loc, &val, size);
+ write(loc, &val, size);
} else {
if (memcmp(loc, &val, size)) {
pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
- /* FIXME: needs care for ROX module allocations */
write(loc, &zero, size);
}
}
@@ -227,7 +239,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- const Elf_Shdr *s, *alt = NULL,
+ const Elf_Shdr *s, *alt = NULL, *locks = NULL,
*orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
@@ -236,6 +248,8 @@ int module_finalize(const Elf_Ehdr *hdr,
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
+ if (!strcmp(".smp_locks", secstrings + s->sh_name))
+ locks = s;
if (!strcmp(".orc_unwind", secstrings + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
@@ -266,60 +280,33 @@ int module_finalize(const Elf_Ehdr *hdr,
csize = cfi->sh_size;
}
- apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me);
+ apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
}
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
- apply_retpolines(rseg, rseg + retpolines->sh_size, me);
+ apply_retpolines(rseg, rseg + retpolines->sh_size);
}
if (returns) {
void *rseg = (void *)returns->sh_addr;
- apply_returns(rseg, rseg + returns->sh_size, me);
- }
- if (alt) {
- /* patch .altinstructions */
- void *aseg = (void *)alt->sh_addr;
- apply_alternatives(aseg, aseg + alt->sh_size, me);
+ apply_returns(rseg, rseg + returns->sh_size);
}
- if (calls || alt) {
+ if (calls) {
struct callthunk_sites cs = {};
- if (calls) {
- cs.call_start = (void *)calls->sh_addr;
- cs.call_end = (void *)calls->sh_addr + calls->sh_size;
- }
-
- if (alt) {
- cs.alt_start = (void *)alt->sh_addr;
- cs.alt_end = (void *)alt->sh_addr + alt->sh_size;
- }
+ cs.call_start = (void *)calls->sh_addr;
+ cs.call_end = (void *)calls->sh_addr + calls->sh_size;
callthunks_patch_module_calls(&cs, me);
}
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
+ apply_alternatives(aseg, aseg + alt->sh_size);
+ }
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
- apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me);
+ apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size);
}
-
- if (orc && orc_ip)
- unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
- (void *)orc->sh_addr, orc->sh_size);
-
- return 0;
-}
-
-int module_post_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
-{
- const Elf_Shdr *s, *locks = NULL;
- char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
- for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- if (!strcmp(".smp_locks", secstrings + s->sh_name))
- locks = s;
- }
-
if (locks) {
void *lseg = (void *)locks->sh_addr;
void *text = me->mem[MOD_TEXT].base;
@@ -329,6 +316,10 @@ int module_post_finalize(const Elf_Ehdr *hdr,
text, text_end);
}
+ if (orc && orc_ip)
+ unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
+ (void *)orc->sh_addr, orc->sh_size);
+
return 0;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ed163c8c8604..9a95d00f1423 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -40,8 +40,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
+/*
+ * An emergency handler can be set in any context including NMI
+ */
struct nmi_desc {
raw_spinlock_t lock;
+ nmi_handler_t emerg_handler;
struct list_head head;
};
@@ -132,9 +136,22 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
static int nmi_handle(unsigned int type, struct pt_regs *regs)
{
struct nmi_desc *desc = nmi_to_desc(type);
+ nmi_handler_t ehandler;
struct nmiaction *a;
int handled=0;
+ /*
+ * Call the emergency handler, if set
+ *
+ * In the case of crash_nmi_callback() emergency handler, it will
+ * return in the case of the crashing CPU to enable it to complete
+ * other necessary crashing actions ASAP. Other handlers in the
+ * linked list won't need to be run.
+ */
+ ehandler = desc->emerg_handler;
+ if (ehandler)
+ return ehandler(type, regs);
+
rcu_read_lock();
/*
@@ -224,6 +241,31 @@ void unregister_nmi_handler(unsigned int type, const char *name)
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
+/**
+ * set_emergency_nmi_handler - Set emergency handler
+ * @type: NMI type
+ * @handler: the emergency handler to be stored
+ *
+ * Set an emergency NMI handler which, if set, will preempt all the other
+ * handlers in the linked list. If a NULL handler is passed in, it will clear
+ * it. It is expected that concurrent calls to this function will not happen
+ * or the system is screwed beyond repair.
+ */
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler)
+{
+ struct nmi_desc *desc = nmi_to_desc(type);
+
+ if (WARN_ON_ONCE(desc->emerg_handler == handler))
+ return;
+ desc->emerg_handler = handler;
+
+ /*
+ * Ensure the emergency handler is visible to other CPUs before
+ * function return
+ */
+ smp_wmb();
+}
+
static void
pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1ccaa3397a67..97925632c28e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -59,21 +59,6 @@ void __init native_pv_lock_init(void)
static_branch_enable(&virt_spin_lock_key);
}
-#ifndef CONFIG_PT_RECLAIM
-static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct ptdesc *ptdesc = (struct ptdesc *)table;
-
- pagetable_dtor(ptdesc);
- tlb_remove_page(tlb, ptdesc_page(ptdesc));
-}
-#else
-static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- tlb_remove_table(tlb, table);
-}
-#endif
-
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
@@ -90,30 +75,20 @@ void paravirt_set_sched_clock(u64 (*func)(void))
static_call_update(pv_sched_clock, func);
}
-/* These are in entry.S */
-static struct resource reserve_ioports = {
- .start = 0,
- .end = IO_SPACE_LIMIT,
- .name = "paravirt-ioport",
- .flags = IORESOURCE_IO | IORESOURCE_BUSY,
-};
+#ifdef CONFIG_PARAVIRT_XXL
+static noinstr void pv_native_write_cr2(unsigned long val)
+{
+ native_write_cr2(val);
+}
-/*
- * Reserve the whole legacy IO space to prevent any legacy drivers
- * from wasting time probing for their hardware. This is a fairly
- * brute-force approach to disabling all non-virtual drivers.
- *
- * Note that this must be called very early to have any effect.
- */
-int paravirt_disable_iospace(void)
+static noinstr unsigned long pv_native_read_cr3(void)
{
- return request_resource(&ioport_resource, &reserve_ioports);
+ return __native_read_cr3();
}
-#ifdef CONFIG_PARAVIRT_XXL
-static noinstr void pv_native_write_cr2(unsigned long val)
+static noinstr void pv_native_write_cr3(unsigned long cr3)
{
- native_write_cr2(val);
+ native_write_cr3(cr3);
}
static noinstr unsigned long pv_native_get_debugreg(int regno)
@@ -195,7 +170,6 @@ struct paravirt_patch_template pv_ops = {
.mmu.flush_tlb_kernel = native_flush_tlb_global,
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
.mmu.flush_tlb_multi = native_flush_tlb_multi,
- .mmu.tlb_remove_table = native_tlb_remove_table,
.mmu.exit_mmap = paravirt_nop,
.mmu.notify_page_enc_status_changed = paravirt_nop,
@@ -203,8 +177,8 @@ struct paravirt_patch_template pv_ops = {
#ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
.mmu.write_cr2 = pv_native_write_cr2,
- .mmu.read_cr3 = __native_read_cr3,
- .mmu.write_cr3 = native_write_cr3,
+ .mmu.read_cr3 = pv_native_read_cr3,
+ .mmu.write_cr3 = pv_native_write_cr3,
.mmu.pgd_alloc = __paravirt_pgd_alloc,
.mmu.pgd_free = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6da6769d7254..5452237fabd4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1043,7 +1043,7 @@ unsigned long __get_wchan(struct task_struct *p)
return addr;
}
-long do_arch_prctl_common(int option, unsigned long arg2)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
switch (option) {
case ARCH_GET_CPUID:
@@ -1058,5 +1058,13 @@ long do_arch_prctl_common(int option, unsigned long arg2)
return fpu_xstate_prctl(option, arg2);
}
+ if (!in_ia32_syscall())
+ return do_arch_prctl_64(current, option, arg2);
+
return -EINVAL;
}
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+ return -ENOSYS;
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0917c7f25720..4636ef359973 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -190,13 +190,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p);
/*
- * Reload esp0 and pcpu_hot.top_of_stack. This changes
+ * Reload esp0 and cpu_current_top_of_stack. This changes
* current_thread_info(). Refresh the SYSENTER configuration in
* case prev or next is vm86.
*/
update_task_stack(next_p);
refresh_sysenter_cs(next);
- this_cpu_write(pcpu_hot.top_of_stack,
+ this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
@@ -206,7 +206,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
- raw_cpu_write(pcpu_hot.current_task, next_p);
+ raw_cpu_write(current_task, next_p);
switch_fpu_finish(next_p);
@@ -215,8 +215,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
-
-SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
-{
- return do_arch_prctl_common(option, arg2);
-}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 226472332a70..7196ca7048be 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -614,7 +614,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
- this_cpu_read(pcpu_hot.hardirq_stack_inuse));
+ this_cpu_read(hardirq_stack_inuse));
if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
switch_fpu_prepare(prev_p, cpu);
@@ -668,8 +668,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- raw_cpu_write(pcpu_hot.current_task, next_p);
- raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
+ raw_cpu_write(current_task, next_p);
+ raw_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
switch_fpu_finish(next_p);
@@ -942,7 +942,7 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
case ARCH_MAP_VDSO_X32:
return prctl_map_vdso(&vdso_image_x32, arg2);
# endif
-# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+# ifdef CONFIG_IA32_EMULATION
case ARCH_MAP_VDSO_32:
return prctl_map_vdso(&vdso_image_32, arg2);
# endif
@@ -979,26 +979,3 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
return ret;
}
-
-SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
-{
- long ret;
-
- ret = do_arch_prctl_64(current, option, arg2);
- if (ret == -EINVAL)
- ret = do_arch_prctl_common(option, arg2);
-
- return ret;
-}
-
-#ifdef CONFIG_IA32_EMULATION
-COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
-{
- return do_arch_prctl_common(option, arg2);
-}
-#endif
-
-unsigned long KSTK_ESP(struct task_struct *task)
-{
- return task_pt_regs(task)->sp;
-}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index dc1dd3f3e67f..964f6b0a3d68 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -921,20 +921,16 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
return;
/* Make a note of crashing cpu. Will be used in NMI callback. */
- crashing_cpu = safe_smp_processor_id();
+ crashing_cpu = smp_processor_id();
shootdown_callback = callback;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- /* Would it be better to replace the trap vector here? */
- if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
- NMI_FLAG_FIRST, "crash"))
- return; /* Return what? */
+
/*
- * Ensure the new callback function is set before sending
- * out the NMI
+ * Set emergency handler to preempt other handlers.
*/
- wmb();
+ set_emergency_nmi_handler(NMI_LOCAL, crash_nmi_callback);
apic_send_IPI_allbutself(NMI_VECTOR);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cebee310e200..9f8ff3aad4f4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -56,6 +56,9 @@
#include <asm/unwind.h>
#include <asm/vsyscall.h>
#include <linux/vmalloc.h>
+#if defined(CONFIG_X86_LOCAL_APIC)
+#include <asm/nmi.h>
+#endif
/*
* max_low_pfn_mapped: highest directly mapped pfn < 4 GB
@@ -146,6 +149,69 @@ static size_t ima_kexec_buffer_size;
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
int bootloader_type, bootloader_version;
+static const struct ctl_table x86_sysctl_table[] = {
+ {
+ .procname = "panic_on_unrecovered_nmi",
+ .data = &panic_on_unrecovered_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "panic_on_io_nmi",
+ .data = &panic_on_io_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "bootloader_type",
+ .data = &bootloader_type,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "bootloader_version",
+ .data = &bootloader_version,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#if defined(CONFIG_X86_LOCAL_APIC)
+ {
+ .procname = "unknown_nmi_panic",
+ .data = &unknown_nmi_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#if defined(CONFIG_ACPI_SLEEP)
+ {
+ .procname = "acpi_video_flags",
+ .data = &acpi_realmode_flags,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+#endif
+};
+
+static int __init init_x86_sysctl(void)
+{
+ register_sysctl_init("kernel", x86_sysctl_table);
+ return 0;
+}
+arch_initcall(init_x86_sysctl);
+
/*
* Setup options
*/
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b30d6e180df7..bfa48e7a32a2 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -23,18 +23,13 @@
#include <asm/cpumask.h>
#include <asm/cpu.h>
-#ifdef CONFIG_X86_64
-#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
-#else
-#define BOOT_PERCPU_OFFSET 0
-#endif
+DEFINE_PER_CPU_CACHE_HOT(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
-DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
+DEFINE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
- [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
-};
+unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
EXPORT_SYMBOL(__per_cpu_offset);
/*
@@ -169,7 +164,7 @@ void __init setup_per_cpu_areas(void)
for_each_possible_cpu(cpu) {
per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
- per_cpu(pcpu_hot.cpu_number, cpu) = cpu;
+ per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
/*
* Copy data used in early init routines from the
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index ef654530bf5a..98123ff10506 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -33,25 +33,55 @@
#include <asm/smap.h>
#include <asm/gsseg.h>
+/*
+ * The first GDT descriptor is reserved as 'NULL descriptor'. As bits 0
+ * and 1 of a segment selector, i.e., the RPL bits, are NOT used to index
+ * GDT, selector values 0~3 all point to the NULL descriptor, thus values
+ * 0, 1, 2 and 3 are all valid NULL selector values.
+ *
+ * However IRET zeros ES, FS, GS, and DS segment registers if any of them
+ * is found to have any nonzero NULL selector value, which can be used by
+ * userspace in pre-FRED systems to spot any interrupt/exception by loading
+ * a nonzero NULL selector and waiting for it to become zero. Before FRED
+ * there was nothing software could do to prevent such an information leak.
+ *
+ * ERETU, the only legit instruction to return to userspace from kernel
+ * under FRED, by design does NOT zero any segment register to avoid this
+ * problem behavior.
+ *
+ * As such, leave NULL selector values 0~3 unchanged.
+ */
+static inline u16 fixup_rpl(u16 sel)
+{
+ return sel <= 3 ? sel : sel | 3;
+}
+
#ifdef CONFIG_IA32_EMULATION
#include <asm/unistd_32_ia32.h>
static inline void reload_segments(struct sigcontext_32 *sc)
{
- unsigned int cur;
+ u16 cur;
+ /*
+ * Reload fs and gs if they have changed in the signal
+ * handler. This does not handle long fs/gs base changes in
+ * the handler, but does not clobber them at least in the
+ * normal case.
+ */
savesegment(gs, cur);
- if ((sc->gs | 0x03) != cur)
- load_gs_index(sc->gs | 0x03);
+ if (fixup_rpl(sc->gs) != cur)
+ load_gs_index(fixup_rpl(sc->gs));
savesegment(fs, cur);
- if ((sc->fs | 0x03) != cur)
- loadsegment(fs, sc->fs | 0x03);
+ if (fixup_rpl(sc->fs) != cur)
+ loadsegment(fs, fixup_rpl(sc->fs));
+
savesegment(ds, cur);
- if ((sc->ds | 0x03) != cur)
- loadsegment(ds, sc->ds | 0x03);
+ if (fixup_rpl(sc->ds) != cur)
+ loadsegment(ds, fixup_rpl(sc->ds));
savesegment(es, cur);
- if ((sc->es | 0x03) != cur)
- loadsegment(es, sc->es | 0x03);
+ if (fixup_rpl(sc->es) != cur)
+ loadsegment(es, fixup_rpl(sc->es));
}
#define sigset32_t compat_sigset_t
@@ -105,18 +135,12 @@ static bool ia32_restore_sigcontext(struct pt_regs *regs,
regs->orig_ax = -1;
#ifdef CONFIG_IA32_EMULATION
- /*
- * Reload fs and gs if they have changed in the signal
- * handler. This does not handle long fs/gs base changes in
- * the handler, but does not clobber them at least in the
- * normal case.
- */
reload_segments(&sc);
#else
- loadsegment(gs, sc.gs);
- regs->fs = sc.fs;
- regs->es = sc.es;
- regs->ds = sc.ds;
+ loadsegment(gs, fixup_rpl(sc.gs));
+ regs->fs = fixup_rpl(sc.fs);
+ regs->es = fixup_rpl(sc.es);
+ regs->ds = fixup_rpl(sc.ds);
#endif
return fpu__restore_sig(compat_ptr(sc.fpstate), 1);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c10850ae6f09..d6cf1e23c2a3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -190,7 +190,7 @@ static void ap_starting(void)
apic_ap_setup();
/* Save the processor parameters. */
- smp_store_cpu_info(cpuid);
+ identify_secondary_cpu(cpuid);
/*
* The topology information must be up to date before
@@ -215,7 +215,7 @@ static void ap_calibrate_delay(void)
{
/*
* Calibrate the delay loop and update loops_per_jiffy in cpu_data.
- * smp_store_cpu_info() stored a value that is close but not as
+ * identify_secondary_cpu() stored a value that is close but not as
* accurate as the value just calculated.
*
* As this is invoked after the TSC synchronization check,
@@ -229,7 +229,7 @@ static void ap_calibrate_delay(void)
/*
* Activate a secondary processor.
*/
-static void notrace start_secondary(void *unused)
+static void notrace __noendbr start_secondary(void *unused)
{
/*
* Don't put *anything* except direct CPU state initialization
@@ -314,26 +314,7 @@ static void notrace start_secondary(void *unused)
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-void smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = &cpu_data(id);
-
- /* Copy boot_cpu_data only on the first bringup */
- if (!c->initialized)
- *c = boot_cpu_data;
- c->cpu_index = id;
- /*
- * During boot time, CPU0 has this setup already. Save the info when
- * bringing up an AP.
- */
- identify_secondary_cpu(c);
- c->initialized = true;
-}
+ANNOTATE_NOENDBR_SYM(start_secondary);
static bool
topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@ -654,10 +635,9 @@ static void impress_friends(void)
* But that slows boot and resume on modern processors, which include
* many cores and don't require that delay.
*
- * Cmdline "init_cpu_udelay=" is available to over-ride this delay.
- * Modern processor families are quirked to remove the delay entirely.
+ * Cmdline "cpu_init_udelay=" is available to override this delay.
*/
-#define UDELAY_10MS_DEFAULT 10000
+#define UDELAY_10MS_LEGACY 10000
static unsigned int init_udelay = UINT_MAX;
@@ -669,21 +649,21 @@ static int __init cpu_init_udelay(char *str)
}
early_param("cpu_init_udelay", cpu_init_udelay);
-static void __init smp_quirk_init_udelay(void)
+static void __init smp_set_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
if (init_udelay != UINT_MAX)
return;
/* if modern processor, use no delay */
- if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
- ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
- ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO) ||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 >= 0x18) ||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0xF)) {
init_udelay = 0;
return;
}
/* else, use legacy delay */
- init_udelay = UDELAY_10MS_DEFAULT;
+ init_udelay = UDELAY_10MS_LEGACY;
}
/*
@@ -841,7 +821,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
- per_cpu(pcpu_hot.current_task, cpu) = idle;
+ per_cpu(current_task, cpu) = idle;
cpu_init_stack_canary(cpu, idle);
/* Initialize the interrupt stack(s) */
@@ -851,7 +831,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
- per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
+ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#endif
return 0;
}
@@ -1094,7 +1074,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
uv_system_init();
- smp_quirk_init_udelay();
+ smp_set_init_udelay();
speculative_store_bypass_ht_init();
@@ -1262,43 +1242,9 @@ void play_dead_common(void)
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
*/
-static inline void mwait_play_dead(void)
+void __noreturn mwait_play_dead(unsigned int eax_hint)
{
struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
- unsigned int eax, ebx, ecx, edx;
- unsigned int highest_cstate = 0;
- unsigned int highest_subcstate = 0;
- int i;
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
- return;
- if (!this_cpu_has(X86_FEATURE_MWAIT))
- return;
- if (!this_cpu_has(X86_FEATURE_CLFLUSH))
- return;
-
- eax = CPUID_LEAF_MWAIT;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
-
- /*
- * eax will be 0 if EDX enumeration is not valid.
- * Initialized below to cstate, sub_cstate value when EDX is valid.
- */
- if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
- eax = 0;
- } else {
- edx >>= MWAIT_SUBSTATE_SIZE;
- for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
- if (edx & MWAIT_SUBSTATE_MASK) {
- highest_cstate = i;
- highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
- }
- }
- eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
- (highest_subcstate - 1);
- }
/* Set up state for the kexec() hack below */
md->status = CPUDEAD_MWAIT_WAIT;
@@ -1319,7 +1265,7 @@ static inline void mwait_play_dead(void)
mb();
__monitor(md, 0, 0);
mb();
- __mwait(eax, 0);
+ __mwait(eax_hint, 0);
if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
/*
@@ -1391,9 +1337,9 @@ void native_play_dead(void)
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
- mwait_play_dead();
- if (cpuidle_play_dead())
- hlt_play_dead();
+ /* Below returns only on error. */
+ cpuidle_play_dead();
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5e3e036e6e53..9f88b8a78e50 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -94,10 +94,20 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
/*
* Check for UD1 or UD2, accounting for Address Size Override Prefixes.
- * If it's a UD1, get the ModRM byte to pass along to UBSan.
+ * If it's a UD1, further decode to determine its use:
+ *
+ * FineIBT: ea (bad)
+ * FineIBT: f0 75 f9 lock jne . - 6
+ * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax
+ * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
+ * static_call: 0f b9 cc ud1 %esp,%ecx
+ *
+ * Notably UBSAN uses EAX, static_call uses ECX.
*/
-__always_inline int decode_bug(unsigned long addr, u32 *imm)
+__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
{
+ unsigned long start = addr;
+ bool lock = false;
u8 v;
if (addr < TASK_SIZE_MAX)
@@ -106,28 +116,67 @@ __always_inline int decode_bug(unsigned long addr, u32 *imm)
v = *(u8 *)(addr++);
if (v == INSN_ASOP)
v = *(u8 *)(addr++);
- if (v != OPCODE_ESCAPE)
+
+ if (v == INSN_LOCK) {
+ lock = true;
+ v = *(u8 *)(addr++);
+ }
+
+ switch (v) {
+ case 0x70 ... 0x7f: /* Jcc.d8 */
+ addr += 1; /* d8 */
+ *len = addr - start;
+ WARN_ON_ONCE(!lock);
+ return BUG_LOCK;
+
+ case 0xea:
+ *len = addr - start;
+ return BUG_EA;
+
+ case OPCODE_ESCAPE:
+ break;
+
+ default:
return BUG_NONE;
+ }
v = *(u8 *)(addr++);
- if (v == SECOND_BYTE_OPCODE_UD2)
+ if (v == SECOND_BYTE_OPCODE_UD2) {
+ *len = addr - start;
return BUG_UD2;
+ }
- if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1)
+ if (v != SECOND_BYTE_OPCODE_UD1)
return BUG_NONE;
- /* Retrieve the immediate (type value) for the UBSAN UD1 */
- v = *(u8 *)(addr++);
- if (X86_MODRM_RM(v) == 4)
- addr++;
-
*imm = 0;
- if (X86_MODRM_MOD(v) == 1)
- *imm = *(u8 *)addr;
- else if (X86_MODRM_MOD(v) == 2)
- *imm = *(u32 *)addr;
- else
- WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v));
+ v = *(u8 *)(addr++); /* ModRM */
+
+ if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4)
+ addr++; /* SIB */
+
+ /* Decode immediate, if present */
+ switch (X86_MODRM_MOD(v)) {
+ case 0: if (X86_MODRM_RM(v) == 5)
+ addr += 4; /* RIP + disp32 */
+ break;
+
+ case 1: *imm = *(s8 *)addr;
+ addr += 1;
+ break;
+
+ case 2: *imm = *(s32 *)addr;
+ addr += 4;
+ break;
+
+ case 3: break;
+ }
+
+ /* record instruction length */
+ *len = addr - start;
+
+ if (X86_MODRM_REG(v) == 0) /* EAX */
+ return BUG_UD1_UBSAN;
return BUG_UD1;
}
@@ -257,11 +306,12 @@ static inline void handle_invalid_op(struct pt_regs *regs)
static noinstr bool handle_bug(struct pt_regs *regs)
{
+ unsigned long addr = regs->ip;
bool handled = false;
- int ud_type;
- u32 imm;
+ int ud_type, ud_len;
+ s32 ud_imm;
- ud_type = decode_bug(regs->ip, &imm);
+ ud_type = decode_bug(addr, &ud_imm, &ud_len);
if (ud_type == BUG_NONE)
return handled;
@@ -281,15 +331,47 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_enable();
- if (ud_type == BUG_UD2) {
- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
- handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
- regs->ip += LEN_UD2;
+
+ switch (ud_type) {
+ case BUG_UD2:
+ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
handled = true;
+ break;
}
- } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
- pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip);
+ fallthrough;
+
+ case BUG_EA:
+ case BUG_LOCK:
+ if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+ handled = true;
+ break;
+ }
+ break;
+
+ case BUG_UD1_UBSAN:
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
+ pr_crit("%s at %pS\n",
+ report_ubsan_failure(regs, ud_imm),
+ (void *)regs->ip);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * When continuing, and regs->ip hasn't changed, move it to the next
+ * instruction. When not continuing execution, restore the instruction
+ * pointer.
+ */
+ if (handled) {
+ if (regs->ip == addr)
+ regs->ip += ud_len;
+ } else {
+ regs->ip = addr;
}
+
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_disable();
instrumentation_end();
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index deeb02825670..48e6cc1cb017 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -152,7 +152,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &freq_desc_byt),
X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &freq_desc_tng),
X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &freq_desc_cht),
- X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &freq_desc_ann),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2, &freq_desc_ann),
X86_MATCH_VFM(INTEL_ATOM_AIRMONT_NP, &freq_desc_lgm),
{}
};
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 1258a5872d12..37ad43792452 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -29,8 +29,12 @@
*/
#include <asm/cpufeatures.h>
+#include <asm/cpufeaturemasks.h>
#include <asm/msr-index.h>
+#define SSE_MASK \
+ (REQUIRED_MASK0 & ((1<<(X86_FEATURE_XMM & 31)) | (1<<(X86_FEATURE_XMM2 & 31))))
+
SYM_FUNC_START_LOCAL(verify_cpu)
pushf # Save caller passed flags
push $0 # Kill any dangerous flags
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0deb4887d6e9..ccdc45e5b759 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -43,7 +43,8 @@ ENTRY(phys_startup_64)
#endif
jiffies = jiffies_64;
-const_pcpu_hot = pcpu_hot;
+const_current_task = current_task;
+const_cpu_current_top_of_stack = cpu_current_top_of_stack;
#if defined(CONFIG_X86_64)
/*
@@ -112,12 +113,6 @@ ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(6); /* RW_ */
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_SMP
- percpu PT_LOAD FLAGS(6); /* RW_ */
-#endif
- init PT_LOAD FLAGS(7); /* RWE */
-#endif
note PT_NOTE FLAGS(0); /* ___ */
}
@@ -193,6 +188,8 @@ SECTIONS
PAGE_ALIGNED_DATA(PAGE_SIZE)
+ CACHE_HOT_DATA(L1_CACHE_BYTES)
+
CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
DATA_DATA
@@ -216,21 +213,7 @@ SECTIONS
__init_begin = .; /* paired with __init_end */
}
-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
- /*
- * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
- * output PHDR, so the next output section - .init.text - should
- * start another segment - init.
- */
- PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
- ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
- "per-CPU data too large - increase CONFIG_PHYSICAL_START")
-#endif
-
INIT_TEXT_SECTION(PAGE_SIZE)
-#ifdef CONFIG_X86_64
- :init
-#endif
/*
* Section for code used exclusively before alternatives are run. All
@@ -347,9 +330,8 @@ SECTIONS
EXIT_DATA
}
-#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU_SECTION(INTERNODE_CACHE_BYTES)
-#endif
+ PERCPU_SECTION(L1_CACHE_BYTES)
+ ASSERT(__per_cpu_hot_end - __per_cpu_hot_start <= 64, "percpu cache hot data too large")
RUNTIME_CONST_VARIABLES
RUNTIME_CONST(ptr, USER_PTR_MAX)
@@ -493,19 +475,6 @@ SECTIONS
PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
#ifdef CONFIG_X86_64
-/*
- * Per-cpu symbols which need to be offset from __per_cpu_load
- * for the boot processor.
- */
-#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
-INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(fixed_percpu_data);
-INIT_PER_CPU(irq_stack_backing_store);
-
-#ifdef CONFIG_SMP
-. = ASSERT((fixed_percpu_data == 0),
- "fixed_percpu_data is not at start of per-cpu area");
-#endif
#ifdef CONFIG_MITIGATION_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 633c87e2fd92..96677576c836 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -118,7 +118,7 @@ do_exception:
#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
- asm volatile("1: vmread %2, %1\n\t"
+ asm volatile("1: vmread %[field], %[output]\n\t"
".byte 0x3e\n\t" /* branch taken hint */
"ja 3f\n\t"
@@ -127,24 +127,26 @@ do_exception:
* @field, and bounce through the trampoline to preserve
* volatile registers.
*/
- "xorl %k1, %k1\n\t"
+ "xorl %k[output], %k[output]\n\t"
"2:\n\t"
- "push %1\n\t"
- "push %2\n\t"
+ "push %[output]\n\t"
+ "push %[field]\n\t"
"call vmread_error_trampoline\n\t"
/*
* Unwind the stack. Note, the trampoline zeros out the
* memory for @fault so that the result is '0' on error.
*/
- "pop %2\n\t"
- "pop %1\n\t"
+ "pop %[field]\n\t"
+ "pop %[output]\n\t"
"3:\n\t"
/* VMREAD faulted. As above, except push '1' for @fault. */
- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %1)
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[output])
- : ASM_CALL_CONSTRAINT, "=&r"(value) : "r"(field) : "cc");
+ : ASM_CALL_CONSTRAINT, [output] "=&r" (value)
+ : [field] "r" (field)
+ : "cc");
return value;
#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 8a59c61624c2..64ccecedc9f8 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -56,7 +56,7 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += string_32.o
lib-y += memmove_32.o
lib-y += cmpxchg8b_emu.o
-ifneq ($(CONFIG_X86_CMPXCHG64),y)
+ifneq ($(CONFIG_X86_CX8),y)
lib-y += atomic64_386_32.o
endif
else
@@ -66,5 +66,6 @@ endif
lib-y += clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o copy_user_uncached_64.o
- lib-y += cmpxchg16b_emu.o
+ lib-y += cmpxchg16b_emu.o
+ lib-y += bhi.o
endif
diff --git a/arch/x86/lib/bhi.S b/arch/x86/lib/bhi.S
new file mode 100644
index 000000000000..58891681261b
--- /dev/null
+++ b/arch/x86/lib/bhi.S
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/unwind_hints.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * Notably, the FineIBT preamble calling these will have ZF set and r10 zero.
+ *
+ * The very last element is in fact larger than 32 bytes, but since its the
+ * last element, this does not matter,
+ *
+ * There are 2 #UD sites, located between 0,1-2,3 and 4,5-6,7 such that they
+ * can be reached using Jcc.d8, these elements (1 and 5) have sufficiently
+ * big alignment holes for this to not stagger the array.
+ */
+
+.pushsection .noinstr.text, "ax"
+
+ .align 32
+SYM_CODE_START(__bhi_args)
+
+#ifdef CONFIG_FINEIBT_BHI
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_0, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_1
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_1, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_1
+ cmovne %r10, %rdi
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 8
+ ANNOTATE_REACHABLE
+.Lud_1: ud2
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_2, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_1
+ cmovne %r10, %rdi
+ cmovne %r10, %rsi
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_3, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_1
+ cmovne %r10, %rdi
+ cmovne %r10, %rsi
+ cmovne %r10, %rdx
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_4, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_2
+ cmovne %r10, %rdi
+ cmovne %r10, %rsi
+ cmovne %r10, %rdx
+ cmovne %r10, %rcx
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_5, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_2
+ cmovne %r10, %rdi
+ cmovne %r10, %rsi
+ cmovne %r10, %rdx
+ cmovne %r10, %rcx
+ cmovne %r10, %r8
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 8
+ ANNOTATE_REACHABLE
+.Lud_2: ud2
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_6, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_2
+ cmovne %r10, %rdi
+ cmovne %r10, %rsi
+ cmovne %r10, %rdx
+ cmovne %r10, %rcx
+ cmovne %r10, %r8
+ cmovne %r10, %r9
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_7, SYM_L_LOCAL)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ jne .Lud_2
+ cmovne %r10, %rdi
+ cmovne %r10, %rsi
+ cmovne %r10, %rdx
+ cmovne %r10, %rcx
+ cmovne %r10, %r8
+ cmovne %r10, %r9
+ cmovne %r10, %rsp
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+
+#endif /* CONFIG_FINEIBT_BHI */
+
+ .align 32
+SYM_INNER_LABEL(__bhi_args_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ nop /* Work around toolchain+objtool quirk */
+SYM_CODE_END(__bhi_args)
+
+.popsection
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 2760a15fbc00..a508e4a8c66a 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
+#include <linux/objtool.h>
#include <asm/asm.h>
/*
@@ -14,7 +16,7 @@
* Zero a page.
* %rdi - page
*/
-SYM_FUNC_START(clear_page_rep)
+SYM_TYPED_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
@@ -22,7 +24,7 @@ SYM_FUNC_START(clear_page_rep)
SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
-SYM_FUNC_START(clear_page_orig)
+SYM_TYPED_FUNC_START(clear_page_orig)
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -44,7 +46,7 @@ SYM_FUNC_START(clear_page_orig)
SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
-SYM_FUNC_START(clear_page_erms)
+SYM_TYPED_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
@@ -63,6 +65,7 @@ EXPORT_SYMBOL_GPL(clear_page_erms)
* rcx: uncleared bytes or 0 if successful.
*/
SYM_FUNC_START(rep_stos_alternative)
+ ANNOTATE_NOENDBR
cmpq $64,%rcx
jae .Lunrolled
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 1c96be769adc..d4bb24347ff8 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -7,7 +7,7 @@
.text
-#ifndef CONFIG_X86_CMPXCHG64
+#ifndef CONFIG_X86_CX8
/*
* Emulate 'cmpxchg8b (%esi)' on UP
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index d6ae793d08fa..d8e87fedc20d 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -13,7 +14,7 @@
* prefetch distance based on SMP/UP.
*/
ALIGN
-SYM_FUNC_START(copy_page)
+SYM_TYPED_FUNC_START(copy_page)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index fc9fb5d06174..aa8c341b2441 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,6 +8,8 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
+#include <linux/objtool.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/asm.h>
@@ -30,6 +32,7 @@
* it simpler for us, we can clobber rsi/rdi and rax freely.
*/
SYM_FUNC_START(rep_movs_alternative)
+ ANNOTATE_NOENDBR
cmpq $64,%rcx
jae .Llarge
diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S
index 2918e36eece2..18350b343c2a 100644
--- a/arch/x86/lib/copy_user_uncached_64.S
+++ b/arch/x86/lib/copy_user_uncached_64.S
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/asm.h>
/*
@@ -27,6 +28,7 @@
* rax uncopied bytes or 0 if successful.
*/
SYM_FUNC_START(__copy_user_nocache)
+ ANNOTATE_NOENDBR
/* If destination is not 7-byte aligned, we'll have to align it */
testb $7,%dil
jne .Lalign
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 89ecd57c9d42..9d5654b8a72a 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -28,22 +28,20 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/page_types.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/runtime-const.h>
#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
.macro check_range size:req
.if IS_ENABLED(CONFIG_X86_64)
- movq $0x0123456789abcdef,%rdx
- 1:
- .pushsection runtime_ptr_USER_PTR_MAX,"a"
- .long 1b - 8 - .
- .popsection
+ RUNTIME_CONST_PTR USER_PTR_MAX, rdx
cmp %rdx, %rax
cmova %rdx, %rax
.else
@@ -62,6 +60,7 @@
.text
SYM_FUNC_START(__get_user_1)
+ ANNOTATE_NOENDBR
check_range size=1
ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
@@ -72,6 +71,7 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
+ ANNOTATE_NOENDBR
check_range size=2
ASM_STAC
UACCESS movzwl (%_ASM_AX),%edx
@@ -82,6 +82,7 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
+ ANNOTATE_NOENDBR
check_range size=4
ASM_STAC
UACCESS movl (%_ASM_AX),%edx
@@ -92,6 +93,7 @@ SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8)
+ ANNOTATE_NOENDBR
#ifndef CONFIG_X86_64
xor %ecx,%ecx
#endif
@@ -111,6 +113,7 @@ EXPORT_SYMBOL(__get_user_8)
/* .. and the same for __get_user, just without the range checks */
SYM_FUNC_START(__get_user_nocheck_1)
+ ANNOTATE_NOENDBR
ASM_STAC
ASM_BARRIER_NOSPEC
UACCESS movzbl (%_ASM_AX),%edx
@@ -121,6 +124,7 @@ SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
SYM_FUNC_START(__get_user_nocheck_2)
+ ANNOTATE_NOENDBR
ASM_STAC
ASM_BARRIER_NOSPEC
UACCESS movzwl (%_ASM_AX),%edx
@@ -131,6 +135,7 @@ SYM_FUNC_END(__get_user_nocheck_2)
EXPORT_SYMBOL(__get_user_nocheck_2)
SYM_FUNC_START(__get_user_nocheck_4)
+ ANNOTATE_NOENDBR
ASM_STAC
ASM_BARRIER_NOSPEC
UACCESS movl (%_ASM_AX),%edx
@@ -141,6 +146,7 @@ SYM_FUNC_END(__get_user_nocheck_4)
EXPORT_SYMBOL(__get_user_nocheck_4)
SYM_FUNC_START(__get_user_nocheck_8)
+ ANNOTATE_NOENDBR
ASM_STAC
ASM_BARRIER_NOSPEC
#ifdef CONFIG_X86_64
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 774bdf3e6f0a..edbeb3ecad38 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/asm.h>
@@ -9,6 +10,7 @@
* %rdi: w
*/
SYM_FUNC_START(__sw_hweight32)
+ ANNOTATE_NOENDBR
#ifdef CONFIG_X86_64
movl %edi, %eax # w
@@ -42,6 +44,7 @@ EXPORT_SYMBOL(__sw_hweight32)
*/
#ifdef CONFIG_X86_64
SYM_FUNC_START(__sw_hweight64)
+ ANNOTATE_NOENDBR
pushq %rdi
pushq %rdx
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 1b60ae81ecd8..aa1f92ee6b2e 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
*/
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -26,7 +27,7 @@
* Output:
* rax: dest
*/
-SYM_FUNC_START(__memmove)
+SYM_TYPED_FUNC_START(__memmove)
mov %rdi, %rax
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 0199d56cb479..d66b710d628f 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -28,7 +29,7 @@
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
-SYM_FUNC_START(__memset)
+SYM_TYPED_FUNC_START(__memset)
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index ebd259f31496..5ef8494896e8 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <linux/errno.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
#include <asm/msr.h>
@@ -12,7 +13,7 @@
*
*/
.macro op_safe_regs op
-SYM_FUNC_START(\op\()_safe_regs)
+SYM_TYPED_FUNC_START(\op\()_safe_regs)
pushq %rbx
pushq %r12
movq %rdi, %r10 /* Save pointer */
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 975c9c18263d..46d9e9b98a61 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -13,6 +13,7 @@
*/
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/thread_info.h>
#include <asm/errno.h>
#include <asm/asm.h>
@@ -45,6 +46,7 @@
.text
SYM_FUNC_START(__put_user_1)
+ ANNOTATE_NOENDBR
check_range size=1
ASM_STAC
1: movb %al,(%_ASM_CX)
@@ -55,6 +57,7 @@ SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
SYM_FUNC_START(__put_user_nocheck_1)
+ ANNOTATE_NOENDBR
ASM_STAC
2: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@@ -64,6 +67,7 @@ SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
+ ANNOTATE_NOENDBR
check_range size=2
ASM_STAC
3: movw %ax,(%_ASM_CX)
@@ -74,6 +78,7 @@ SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
SYM_FUNC_START(__put_user_nocheck_2)
+ ANNOTATE_NOENDBR
ASM_STAC
4: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
@@ -83,6 +88,7 @@ SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
+ ANNOTATE_NOENDBR
check_range size=4
ASM_STAC
5: movl %eax,(%_ASM_CX)
@@ -93,6 +99,7 @@ SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
SYM_FUNC_START(__put_user_nocheck_4)
+ ANNOTATE_NOENDBR
ASM_STAC
6: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
@@ -102,6 +109,7 @@ SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
+ ANNOTATE_NOENDBR
check_range size=8
ASM_STAC
7: mov %_ASM_AX,(%_ASM_CX)
@@ -115,6 +123,7 @@ SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
SYM_FUNC_START(__put_user_nocheck_8)
+ ANNOTATE_NOENDBR
ASM_STAC
9: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 391059b2c6fb..a26c43abd47d 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -326,6 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret)
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
+ ANNOTATE_NOENDBR
ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
@@ -342,7 +343,7 @@ SYM_FUNC_START(call_depth_return_thunk)
* case.
*/
CALL_THUNKS_DEBUG_INC_RETS
- shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
+ shlq $5, PER_CPU_VAR(__x86_call_depth)
jz 1f
ANNOTATE_UNRET_SAFE
ret
diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h
index 60f4dcc5edc3..93cbc89b34e2 100644
--- a/arch/x86/math-emu/control_w.h
+++ b/arch/x86/math-emu/control_w.h
@@ -11,7 +11,7 @@
#ifndef _CONTROLW_H_
#define _CONTROLW_H_
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _Const_(x) $##x
#else
#define _Const_(x) x
diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h
index 75230b977577..59961d350bc4 100644
--- a/arch/x86/math-emu/exception.h
+++ b/arch/x86/math-emu/exception.h
@@ -10,7 +10,7 @@
#ifndef _EXCEPTION_H_
#define _EXCEPTION_H_
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define Const_(x) $##x
#else
#define Const_(x) x
@@ -37,7 +37,7 @@
#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1))
#define PRECISION_LOST_DOWN Const_(EX_Precision)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef DEBUG
#define EXCEPTION(x) { printk("exception in %s at line %d\n", \
@@ -46,6 +46,6 @@
#define EXCEPTION(x) FPU_exception(x)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _EXCEPTION_H_ */
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index 0c122226ca56..def569c50b76 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -20,7 +20,7 @@
*/
#define PECULIAR_486
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include "fpu_asm.h"
#define Const(x) $##x
#else
@@ -68,7 +68,7 @@
#define FPU_Exception Const(0x80000000) /* Added to tag returns. */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include "fpu_system.h"
@@ -213,6 +213,6 @@ asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
#include "fpu_proto.h"
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _FPU_EMU_H_ */
diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h
index b77bafec9526..f642957330ef 100644
--- a/arch/x86/math-emu/status_w.h
+++ b/arch/x86/math-emu/status_w.h
@@ -13,7 +13,7 @@
#include "fpu_emu.h" /* for definition of PECULIAR_486 */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define Const__(x) $##x
#else
#define Const__(x) x
@@ -37,7 +37,7 @@
#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define COMP_A_gt_B 1
#define COMP_A_eq_B 2
@@ -63,6 +63,6 @@ static inline void setcc(int cc)
# define clear_C1()
#endif /* PECULIAR_486 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _STATUS_H_ */
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 5ab7bd2f1983..bd5d101c5c37 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -101,9 +101,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
pmd_t *pmd;
bool use_gbpage;
- next = (addr & PUD_MASK) + PUD_SIZE;
- if (next > end)
- next = end;
+ next = pud_addr_end(addr, end);
/* if this is already a gbpage, this portion is already mapped */
if (pud_leaf(*pud))
@@ -154,10 +152,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
p4d_t *p4d = p4d_page + p4d_index(addr);
pud_t *pud;
- next = (addr & P4D_MASK) + P4D_SIZE;
- if (next > end)
- next = end;
-
+ next = p4d_addr_end(addr, end);
if (p4d_present(*p4d)) {
pud = pud_offset(p4d, 0);
result = ident_pud_init(info, pud, addr, next);
@@ -199,10 +194,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
pgd_t *pgd = pgd_page + pgd_index(addr);
p4d_t *p4d;
- next = (addr & PGDIR_MASK) + PGDIR_SIZE;
- if (next > end)
- next = end;
-
+ next = pgd_addr_end(addr, end);
if (pgd_present(*pgd)) {
p4d = p4d_offset(pgd, 0);
result = ident_p4d_init(info, p4d, addr, next);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 62aa4d66a032..bfa444a7dbb0 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -645,8 +645,13 @@ static void __init memory_map_top_down(unsigned long map_start,
*/
addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
map_end);
- memblock_phys_free(addr, PMD_SIZE);
- real_end = addr + PMD_SIZE;
+ if (!addr) {
+ pr_warn("Failed to release memory for alloc_low_pages()");
+ real_end = max(map_start, ALIGN_DOWN(map_end, PMD_SIZE));
+ } else {
+ memblock_phys_free(addr, PMD_SIZE);
+ real_end = addr + PMD_SIZE;
+ }
/* step_size need to be small so pgt_buf from BRK could cover it */
step_size = PMD_SIZE;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ac41b1e0940d..f288aad8dc74 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -582,7 +582,7 @@ static void __init lowmem_pfn_init(void)
"only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
#define MSG_HIGHMEM_TRIMMED \
- "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
+ "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n"
/*
* We have more RAM than fits into lowmem - we try to put it into
* highmem, also taking the highmem=x boot parameter into account:
@@ -606,18 +606,13 @@ static void __init highmem_pfn_init(void)
#ifndef CONFIG_HIGHMEM
/* Maximum memory usable is what is directly addressable */
printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
- if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
- else
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
max_pfn = MAXMEM_PFN;
#else /* !CONFIG_HIGHMEM */
-#ifndef CONFIG_HIGHMEM64G
if (max_pfn > MAX_NONPAE_PFN) {
max_pfn = MAX_NONPAE_PFN;
printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
}
-#endif /* !CONFIG_HIGHMEM64G */
#endif /* !CONFIG_HIGHMEM */
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 38ff7791a9c7..42c90b420773 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -503,6 +503,14 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
+{
+ if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ return (void __force *)ioremap_cache(phys_addr, size);
+
+ return (void __force *)ioremap_encrypted(phys_addr, size);
+}
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 11a93542d198..3c306de52fd4 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -113,8 +113,14 @@ void __init kernel_randomize_memory(void)
memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
- /* Adapt physical memory region size based on available memory */
- if (memory_tb < kaslr_regions[0].size_tb)
+ /*
+ * Adapt physical memory region size based on available memory,
+ * except when CONFIG_PCI_P2PDMA is enabled. P2PDMA exposes the
+ * device BAR space assuming the direct map space is large enough
+ * for creating a ZONE_DEVICE mapping in the direct map corresponding
+ * to the physical BAR address.
+ */
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA) && (memory_tb < kaslr_regions[0].size_tb))
kaslr_regions[0].size_tb = memory_tb;
/*
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index e25288ee33c2..f8a33b25ae86 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -72,6 +72,7 @@ SYM_FUNC_START(sme_encrypt_execute)
SYM_FUNC_END(sme_encrypt_execute)
SYM_FUNC_START(__enc_copy)
+ ANNOTATE_NOENDBR
/*
* Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index b8a6ffffb451..5ed2109211da 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -84,7 +84,6 @@ static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
{
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
- unsigned long gap_min, gap_max;
/* Values close to RLIM_INFINITY can overflow. */
if (gap + pad > gap)
@@ -94,13 +93,7 @@ static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
* Top of mmap area (just below the process stack).
* Leave an at least ~128 MB hole with possible stack randomization.
*/
- gap_min = SIZE_128M;
- gap_max = (task_size / 6) * 5;
-
- if (gap < gap_min)
- gap = gap_min;
- else if (gap > gap_max)
- gap = gap_max;
+ gap = clamp(gap, SIZE_128M, (task_size / 6) * 5);
return PAGE_ALIGN(task_size - gap - rnd);
}
diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c
index 3d2f7f0a6ed1..ad3c1feec990 100644
--- a/arch/x86/mm/pat/cpa-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
@@ -183,7 +183,7 @@ static int pageattr_test(void)
break;
case 1:
- err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1);
+ err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1);
break;
case 2:
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index feb8cc6a12bf..e40861c9cb90 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -43,6 +43,7 @@
#include <linux/fs.h>
#include <linux/rbtree.h>
+#include <asm/cpu_device_id.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/processor.h>
@@ -290,9 +291,8 @@ void __init pat_bp_init(void)
return;
}
- if ((c->x86_vendor == X86_VENDOR_INTEL) &&
- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ if ((c->x86_vfm >= INTEL_PENTIUM_PRO && c->x86_vfm <= INTEL_PENTIUM_M_DOTHAN) ||
+ (c->x86_vfm >= INTEL_P4_WILLAMETTE && c->x86_vfm <= INTEL_P4_CEDARMILL)) {
/*
* PAT support with the lower four entries. Intel Pentium 2,
* 3, M, and 4 are affected by PAT errata, which makes the
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index ef4514d64c05..72405d315b41 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -73,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock);
#define CPA_ARRAY 2
#define CPA_PAGES_ARRAY 4
#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
+#define CPA_COLLAPSE 16 /* try to collapse large pages */
static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
{
@@ -105,6 +106,18 @@ static void split_page_count(int level)
direct_pages_count[level - 1] += PTRS_PER_PTE;
}
+static void collapse_page_count(int level)
+{
+ direct_pages_count[level]++;
+ if (system_state == SYSTEM_RUNNING) {
+ if (level == PG_LEVEL_2M)
+ count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE);
+ else if (level == PG_LEVEL_1G)
+ count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE);
+ }
+ direct_pages_count[level - 1] -= PTRS_PER_PTE;
+}
+
void arch_report_meminfo(struct seq_file *m)
{
seq_printf(m, "DirectMap4k: %8lu kB\n",
@@ -122,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m)
}
#else
static inline void split_page_count(int level) { }
+static inline void collapse_page_count(int level) { }
#endif
#ifdef CONFIG_X86_CPA_STATISTICS
@@ -211,14 +225,14 @@ within(unsigned long addr, unsigned long start, unsigned long end)
return addr >= start && addr < end;
}
+#ifdef CONFIG_X86_64
+
static inline int
within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
{
return addr >= start && addr <= end;
}
-#ifdef CONFIG_X86_64
-
/*
* The kernel image is mapped into two places in the virtual address space
* (addresses without KASLR, of course):
@@ -394,16 +408,49 @@ static void __cpa_flush_tlb(void *data)
flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
}
-static void cpa_flush(struct cpa_data *data, int cache)
+static int collapse_large_pages(unsigned long addr, struct list_head *pgtables);
+
+static void cpa_collapse_large_pages(struct cpa_data *cpa)
+{
+ unsigned long start, addr, end;
+ struct ptdesc *ptdesc, *tmp;
+ LIST_HEAD(pgtables);
+ int collapsed = 0;
+ int i;
+
+ if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
+ for (i = 0; i < cpa->numpages; i++)
+ collapsed += collapse_large_pages(__cpa_addr(cpa, i),
+ &pgtables);
+ } else {
+ addr = __cpa_addr(cpa, 0);
+ start = addr & PMD_MASK;
+ end = addr + PAGE_SIZE * cpa->numpages;
+
+ for (addr = start; within(addr, start, end); addr += PMD_SIZE)
+ collapsed += collapse_large_pages(addr, &pgtables);
+ }
+
+ if (!collapsed)
+ return;
+
+ flush_tlb_all();
+
+ list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) {
+ list_del(&ptdesc->pt_list);
+ __free_page(ptdesc_page(ptdesc));
+ }
+}
+
+static void cpa_flush(struct cpa_data *cpa, int cache)
{
- struct cpa_data *cpa = data;
unsigned int i;
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
cpa_flush_all(cache);
- return;
+ goto collapse_large_pages;
}
if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
@@ -412,7 +459,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
- return;
+ goto collapse_large_pages;
mb();
for (i = 0; i < cpa->numpages; i++) {
@@ -428,6 +475,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
}
mb();
+
+collapse_large_pages:
+ if (cpa->flags & CPA_COLLAPSE)
+ cpa_collapse_large_pages(cpa);
}
static bool overlaps(unsigned long r1_start, unsigned long r1_end,
@@ -1197,6 +1248,161 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
return 0;
}
+static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
+ struct list_head *pgtables)
+{
+ pmd_t _pmd, old_pmd;
+ pte_t *pte, first;
+ unsigned long pfn;
+ pgprot_t pgprot;
+ int i = 0;
+
+ addr &= PMD_MASK;
+ pte = pte_offset_kernel(pmd, addr);
+ first = *pte;
+ pfn = pte_pfn(first);
+
+ /* Make sure alignment is suitable */
+ if (PFN_PHYS(pfn) & ~PMD_MASK)
+ return 0;
+
+ /* The page is 4k intentionally */
+ if (pte_flags(first) & _PAGE_KERNEL_4K)
+ return 0;
+
+ /* Check that the rest of PTEs are compatible with the first one */
+ for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) {
+ pte_t entry = *pte;
+
+ if (!pte_present(entry))
+ return 0;
+ if (pte_flags(entry) != pte_flags(first))
+ return 0;
+ if (pte_pfn(entry) != pte_pfn(first) + i)
+ return 0;
+ }
+
+ old_pmd = *pmd;
+
+ /* Success: set up a large page */
+ pgprot = pgprot_4k_2_large(pte_pgprot(first));
+ pgprot_val(pgprot) |= _PAGE_PSE;
+ _pmd = pfn_pmd(pfn, pgprot);
+ set_pmd(pmd, _pmd);
+
+ /* Queue the page table to be freed after TLB flush */
+ list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables);
+
+ if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) {
+ struct page *page;
+
+ /* Update all PGD tables to use the same large page */
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+ /* Something is wrong if entries doesn't match */
+ if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd)))
+ continue;
+ set_pmd(pmd, _pmd);
+ }
+ }
+
+ if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1))
+ collapse_page_count(PG_LEVEL_2M);
+
+ return 1;
+}
+
+static int collapse_pud_page(pud_t *pud, unsigned long addr,
+ struct list_head *pgtables)
+{
+ unsigned long pfn;
+ pmd_t *pmd, first;
+ int i;
+
+ if (!direct_gbpages)
+ return 0;
+
+ addr &= PUD_MASK;
+ pmd = pmd_offset(pud, addr);
+ first = *pmd;
+
+ /*
+ * To restore PUD page all PMD entries must be large and
+ * have suitable alignment
+ */
+ pfn = pmd_pfn(first);
+ if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK))
+ return 0;
+
+ /*
+ * To restore PUD page, all following PMDs must be compatible with the
+ * first one.
+ */
+ for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) {
+ pmd_t entry = *pmd;
+
+ if (!pmd_present(entry) || !pmd_leaf(entry))
+ return 0;
+ if (pmd_flags(entry) != pmd_flags(first))
+ return 0;
+ if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE)
+ return 0;
+ }
+
+ /* Restore PUD page and queue page table to be freed after TLB flush */
+ list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables);
+ set_pud(pud, pfn_pud(pfn, pmd_pgprot(first)));
+
+ if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1))
+ collapse_page_count(PG_LEVEL_1G);
+
+ return 1;
+}
+
+/*
+ * Collapse PMD and PUD pages in the kernel mapping around the address where
+ * possible.
+ *
+ * Caller must flush TLB and free page tables queued on the list before
+ * touching the new entries. CPU must not see TLB entries of different size
+ * with different attributes.
+ */
+static int collapse_large_pages(unsigned long addr, struct list_head *pgtables)
+{
+ int collapsed = 0;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ addr &= PMD_MASK;
+
+ spin_lock(&pgd_lock);
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ goto out;
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ goto out;
+ pud = pud_offset(p4d, addr);
+ if (!pud_present(*pud) || pud_leaf(*pud))
+ goto out;
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd) || pmd_leaf(*pmd))
+ goto out;
+
+ collapsed = collapse_pmd_page(pmd, addr, pgtables);
+ if (collapsed)
+ collapsed += collapse_pud_page(pud, addr, pgtables);
+
+out:
+ spin_unlock(&pgd_lock);
+ return collapsed;
+}
+
static bool try_to_free_pte_page(pte_t *pte)
{
int i;
@@ -1942,19 +2148,6 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages,
CPA_PAGES_ARRAY, pages);
}
-/*
- * __set_memory_prot is an internal helper for callers that have been passed
- * a pgprot_t value from upper layers and a reservation has already been taken.
- * If you want to set the pgprot to a specific page protocol, use the
- * set_memory_xx() functions.
- */
-int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot)
-{
- return change_page_attr_set_clr(&addr, numpages, prot,
- __pgprot(~pgprot_val(prot)), 0, 0,
- NULL);
-}
-
int _set_memory_uc(unsigned long addr, int numpages)
{
/*
@@ -2120,7 +2313,8 @@ int set_memory_rox(unsigned long addr, int numpages)
if (__supported_pte_mask & _PAGE_NX)
clr.pgprot |= _PAGE_NX;
- return change_page_attr_clear(&addr, numpages, clr, 0);
+ return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0,
+ CPA_COLLAPSE, NULL);
}
int set_memory_rw(unsigned long addr, int numpages)
@@ -2147,7 +2341,8 @@ int set_memory_p(unsigned long addr, int numpages)
int set_memory_4k(unsigned long addr, int numpages)
{
- return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+ return change_page_attr_set_clr(&addr, numpages,
+ __pgprot(_PAGE_KERNEL_4K),
__pgprot(0), 1, 0, NULL);
}
@@ -2420,7 +2615,7 @@ static int __set_pages_np(struct page *page, int numpages)
.pgd = NULL,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY),
.flags = CPA_NO_CHECK_ALIAS };
/*
@@ -2507,7 +2702,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
.pgd = pgd,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
+ .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW|_PAGE_DIRTY)),
.flags = CPA_NO_CHECK_ALIAS,
};
@@ -2550,7 +2745,7 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
.pgd = pgd,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY),
.flags = CPA_NO_CHECK_ALIAS,
};
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1fef5ad32d5a..cec321fb74f2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -12,59 +12,15 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
#endif
-#ifdef CONFIG_HIGHPTE
-#define PGTABLE_HIGHMEM __GFP_HIGHMEM
-#else
-#define PGTABLE_HIGHMEM 0
-#endif
-
-#ifndef CONFIG_PARAVIRT
-#ifndef CONFIG_PT_RECLAIM
-static inline
-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct ptdesc *ptdesc = (struct ptdesc *)table;
-
- pagetable_dtor(ptdesc);
- tlb_remove_page(tlb, ptdesc_page(ptdesc));
-}
-#else
-static inline
-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- tlb_remove_table(tlb, table);
-}
-#endif /* !CONFIG_PT_RECLAIM */
-#endif /* !CONFIG_PARAVIRT */
-
-gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
-
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- return __pte_alloc_one(mm, __userpte_alloc_gfp);
-}
-
-static int __init setup_userpte(char *arg)
-{
- if (!arg)
- return -EINVAL;
-
- /*
- * "userpte=nohigh" disables allocation of user pagetables in
- * high memory.
- */
- if (strcmp(arg, "nohigh") == 0)
- __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
- else
- return -EINVAL;
- return 0;
+ return __pte_alloc_one(mm, GFP_PGTABLE_USER);
}
-early_param("userpte", setup_userpte);
void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
paravirt_release_pte(page_to_pfn(pte));
- paravirt_tlb_remove_table(tlb, page_ptdesc(pte));
+ tlb_remove_table(tlb, page_ptdesc(pte));
}
#if CONFIG_PGTABLE_LEVELS > 2
@@ -78,21 +34,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
#ifdef CONFIG_X86_PAE
tlb->need_flush_all = 1;
#endif
- paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd));
+ tlb_remove_table(tlb, virt_to_ptdesc(pmd));
}
#if CONFIG_PGTABLE_LEVELS > 3
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
- paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud));
+ tlb_remove_table(tlb, virt_to_ptdesc(pud));
}
#if CONFIG_PGTABLE_LEVELS > 4
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
{
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
- paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d));
+ tlb_remove_table(tlb, virt_to_ptdesc(p4d));
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6cf881a942bb..0925768d00cb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -74,13 +74,15 @@
* use different names for each of them:
*
* ASID - [0, TLB_NR_DYN_ASIDS-1]
- * the canonical identifier for an mm
+ * the canonical identifier for an mm, dynamically allocated on each CPU
+ * [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
+ * the canonical, global identifier for an mm, identical across all CPUs
*
- * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * kPCID - [1, MAX_ASID_AVAILABLE]
* the value we write into the PCID part of CR3; corresponds to the
* ASID+1, because PCID 0 is special.
*
- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
* for KPTI each mm has two address spaces and thus needs two
* PCID values, but we can still do with a single ASID denomination
* for each mm. Corresponds to kPCID + 2048.
@@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
return;
}
+ /*
+ * TLB consistency for global ASIDs is maintained with hardware assisted
+ * remote TLB flushing. Global ASIDs are always up to date.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ u16 global_asid = mm_global_asid(next);
+
+ if (global_asid) {
+ *new_asid = global_asid;
+ *need_flush = false;
+ return;
+ }
+ }
+
if (this_cpu_read(cpu_tlbstate.invalidate_other))
clear_asid_other();
@@ -252,6 +268,268 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
}
/*
+ * Global ASIDs are allocated for multi-threaded processes that are
+ * active on multiple CPUs simultaneously, giving each of those
+ * processes the same PCID on every CPU, for use with hardware-assisted
+ * TLB shootdown on remote CPUs, like AMD INVLPGB or Intel RAR.
+ *
+ * These global ASIDs are held for the lifetime of the process.
+ */
+static DEFINE_RAW_SPINLOCK(global_asid_lock);
+static u16 last_global_asid = MAX_ASID_AVAILABLE;
+static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE);
+static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE);
+static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
+
+/*
+ * When the search for a free ASID in the global ASID space reaches
+ * MAX_ASID_AVAILABLE, a global TLB flush guarantees that previously
+ * freed global ASIDs are safe to re-use.
+ *
+ * This way the global flush only needs to happen at ASID rollover
+ * time, and not at ASID allocation time.
+ */
+static void reset_global_asid_space(void)
+{
+ lockdep_assert_held(&global_asid_lock);
+
+ invlpgb_flush_all_nonglobals();
+
+ /*
+ * The TLB flush above makes it safe to re-use the previously
+ * freed global ASIDs.
+ */
+ bitmap_andnot(global_asid_used, global_asid_used,
+ global_asid_freed, MAX_ASID_AVAILABLE);
+ bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
+
+ /* Restart the search from the start of global ASID space. */
+ last_global_asid = TLB_NR_DYN_ASIDS;
+}
+
+static u16 allocate_global_asid(void)
+{
+ u16 asid;
+
+ lockdep_assert_held(&global_asid_lock);
+
+ /* The previous allocation hit the edge of available address space */
+ if (last_global_asid >= MAX_ASID_AVAILABLE - 1)
+ reset_global_asid_space();
+
+ asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid);
+
+ if (asid >= MAX_ASID_AVAILABLE && !global_asid_available) {
+ /* This should never happen. */
+ VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n",
+ global_asid_available);
+ return 0;
+ }
+
+ /* Claim this global ASID. */
+ __set_bit(asid, global_asid_used);
+ last_global_asid = asid;
+ global_asid_available--;
+ return asid;
+}
+
+/*
+ * Check whether a process is currently active on more than @threshold CPUs.
+ * This is a cheap estimation on whether or not it may make sense to assign
+ * a global ASID to this process, and use broadcast TLB invalidation.
+ */
+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
+{
+ int count = 0;
+ int cpu;
+
+ /* This quick check should eliminate most single threaded programs. */
+ if (cpumask_weight(mm_cpumask(mm)) <= threshold)
+ return false;
+
+ /* Slower check to make sure. */
+ for_each_cpu(cpu, mm_cpumask(mm)) {
+ /* Skip the CPUs that aren't really running this process. */
+ if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
+ continue;
+
+ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
+ continue;
+
+ if (++count > threshold)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Assign a global ASID to the current process, protecting against
+ * races between multiple threads in the process.
+ */
+static void use_global_asid(struct mm_struct *mm)
+{
+ u16 asid;
+
+ guard(raw_spinlock_irqsave)(&global_asid_lock);
+
+ /* This process is already using broadcast TLB invalidation. */
+ if (mm_global_asid(mm))
+ return;
+
+ /*
+ * The last global ASID was consumed while waiting for the lock.
+ *
+ * If this fires, a more aggressive ASID reuse scheme might be
+ * needed.
+ */
+ if (!global_asid_available) {
+ VM_WARN_ONCE(1, "Ran out of global ASIDs\n");
+ return;
+ }
+
+ asid = allocate_global_asid();
+ if (!asid)
+ return;
+
+ mm_assign_global_asid(mm, asid);
+}
+
+void mm_free_global_asid(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return;
+
+ if (!mm_global_asid(mm))
+ return;
+
+ guard(raw_spinlock_irqsave)(&global_asid_lock);
+
+ /* The global ASID can be re-used only after flush at wrap-around. */
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+ __set_bit(mm->context.global_asid, global_asid_freed);
+
+ mm->context.global_asid = 0;
+ global_asid_available++;
+#endif
+}
+
+/*
+ * Is the mm transitioning from a CPU-local ASID to a global ASID?
+ */
+static bool mm_needs_global_asid(struct mm_struct *mm, u16 asid)
+{
+ u16 global_asid = mm_global_asid(mm);
+
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false;
+
+ /* Process is transitioning to a global ASID */
+ if (global_asid && asid != global_asid)
+ return true;
+
+ return false;
+}
+
+/*
+ * x86 has 4k ASIDs (2k when compiled with KPTI), but the largest x86
+ * systems have over 8k CPUs. Because of this potential ASID shortage,
+ * global ASIDs are handed out to processes that have frequent TLB
+ * flushes and are active on 4 or more CPUs simultaneously.
+ */
+static void consider_global_asid(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return;
+
+ /* Check every once in a while. */
+ if ((current->pid & 0x1f) != (jiffies & 0x1f))
+ return;
+
+ /*
+ * Assign a global ASID if the process is active on
+ * 4 or more CPUs simultaneously.
+ */
+ if (mm_active_cpus_exceeds(mm, 3))
+ use_global_asid(mm);
+}
+
+static void finish_asid_transition(struct flush_tlb_info *info)
+{
+ struct mm_struct *mm = info->mm;
+ int bc_asid = mm_global_asid(mm);
+ int cpu;
+
+ if (!mm_in_asid_transition(mm))
+ return;
+
+ for_each_cpu(cpu, mm_cpumask(mm)) {
+ /*
+ * The remote CPU is context switching. Wait for that to
+ * finish, to catch the unlikely case of it switching to
+ * the target mm with an out of date ASID.
+ */
+ while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING)
+ cpu_relax();
+
+ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
+ continue;
+
+ /*
+ * If at least one CPU is not using the global ASID yet,
+ * send a TLB flush IPI. The IPI should cause stragglers
+ * to transition soon.
+ *
+ * This can race with the CPU switching to another task;
+ * that results in a (harmless) extra IPI.
+ */
+ if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) {
+ flush_tlb_multi(mm_cpumask(info->mm), info);
+ return;
+ }
+ }
+
+ /* All the CPUs running this process are using the global ASID. */
+ mm_clear_asid_transition(mm);
+}
+
+static void broadcast_tlb_flush(struct flush_tlb_info *info)
+{
+ bool pmd = info->stride_shift == PMD_SHIFT;
+ unsigned long asid = mm_global_asid(info->mm);
+ unsigned long addr = info->start;
+
+ /*
+ * TLB flushes with INVLPGB are kicked off asynchronously.
+ * The inc_mm_tlb_gen() guarantees page table updates are done
+ * before these TLB flushes happen.
+ */
+ if (info->end == TLB_FLUSH_ALL) {
+ invlpgb_flush_single_pcid_nosync(kern_pcid(asid));
+ /* Do any CPUs supporting INVLPGB need PTI? */
+ if (cpu_feature_enabled(X86_FEATURE_PTI))
+ invlpgb_flush_single_pcid_nosync(user_pcid(asid));
+ } else do {
+ unsigned long nr = 1;
+
+ if (info->stride_shift <= PMD_SHIFT) {
+ nr = (info->end - addr) >> info->stride_shift;
+ nr = clamp_val(nr, 1, invlpgb_count_max);
+ }
+
+ invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd);
+ if (cpu_feature_enabled(X86_FEATURE_PTI))
+ invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd);
+
+ addr += nr << info->stride_shift;
+ } while (addr < info->end);
+
+ finish_asid_transition(info);
+
+ /* Wait for the INVLPGBs kicked off above to finish. */
+ __tlbsync();
+}
+
+/*
* Given an ASID, flush the corresponding user ASID. We can delay this
* until the next time we switch to it.
*
@@ -556,7 +834,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
*/
if (prev == next) {
/* Not actually switching mm's */
- VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+ VM_WARN_ON(is_dyn_asid(prev_asid) &&
+ this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
/*
@@ -573,6 +852,20 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
!cpumask_test_cpu(cpu, mm_cpumask(next))))
cpumask_set_cpu(cpu, mm_cpumask(next));
+ /* Check if the current mm is transitioning to a global ASID */
+ if (mm_needs_global_asid(next, prev_asid)) {
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ goto reload_tlb;
+ }
+
+ /*
+ * Broadcast TLB invalidation keeps this ASID up to date
+ * all the time.
+ */
+ if (is_global_asid(prev_asid))
+ return;
+
/*
* If the CPU is not in lazy TLB mode, we are just switching
* from one thread in a process to another thread in the same
@@ -607,6 +900,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
cond_mitigation(tsk);
/*
+ * Let nmi_uaccess_okay() and finish_asid_transition()
+ * know that CR3 is changing.
+ */
+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ barrier();
+
+ /*
* Leave this CPU in prev's mm_cpumask. Atomic writes to
* mm_cpumask can be expensive under contention. The CPU
* will be removed lazily at TLB flush time.
@@ -620,14 +920,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
-
- /* Let nmi_uaccess_okay() know that we're changing CR3. */
- this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
- barrier();
}
+reload_tlb:
new_lam = mm_lam_cr3_mask(next);
if (need_flush) {
+ VM_WARN_ON_ONCE(is_global_asid(new_asid));
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
@@ -746,7 +1044,7 @@ static void flush_tlb_func(void *info)
const struct flush_tlb_info *f = info;
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
- u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+ u64 local_tlb_gen;
bool local = smp_processor_id() == f->initiating_cpu;
unsigned long nr_invalidate = 0;
u64 mm_tlb_gen;
@@ -769,6 +1067,16 @@ static void flush_tlb_func(void *info)
if (unlikely(loaded_mm == &init_mm))
return;
+ /* Reload the ASID if transitioning into or out of a global ASID */
+ if (mm_needs_global_asid(loaded_mm, loaded_mm_asid)) {
+ switch_mm_irqs_off(NULL, loaded_mm, NULL);
+ loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ }
+
+ /* Broadcast ASIDs are always kept up to date with INVLPGB. */
+ if (is_global_asid(loaded_mm_asid))
+ return;
+
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
loaded_mm->context.ctx_id);
@@ -786,6 +1094,8 @@ static void flush_tlb_func(void *info)
return;
}
+ local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+
if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
f->new_tlb_gen <= local_tlb_gen)) {
/*
@@ -953,7 +1263,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
* up on the new contents of what used to be page tables, while
* doing a speculative memory access.
*/
- if (info->freed_tables)
+ if (info->freed_tables || mm_in_asid_transition(info->mm))
on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
else
on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func,
@@ -1000,6 +1310,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
#endif
+ /*
+ * If the number of flushes is so large that a full flush
+ * would be faster, do a full flush.
+ */
+ if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
+ start = 0;
+ end = TLB_FLUSH_ALL;
+ }
+
info->start = start;
info->end = end;
info->mm = mm;
@@ -1026,17 +1345,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
bool freed_tables)
{
struct flush_tlb_info *info;
+ int cpu = get_cpu();
u64 new_tlb_gen;
- int cpu;
-
- cpu = get_cpu();
-
- /* Should we flush just the requested range? */
- if ((end == TLB_FLUSH_ALL) ||
- ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
- start = 0;
- end = TLB_FLUSH_ALL;
- }
/* This is also a barrier that synchronizes with switch_mm(). */
new_tlb_gen = inc_mm_tlb_gen(mm);
@@ -1049,9 +1359,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/
- if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+ if (mm_global_asid(mm)) {
+ broadcast_tlb_flush(info);
+ } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
info->trim_cpumask = should_trim_cpumask(mm);
flush_tlb_multi(mm_cpumask(mm), info);
+ consider_global_asid(mm);
} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
lockdep_assert_irqs_enabled();
local_irq_disable();
@@ -1064,7 +1377,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
-
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -1074,7 +1386,32 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+
+ /* First try (faster) hardware-assisted TLB invalidation. */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ /* Fall back to the IPI-based invalidation. */
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+/* Flush an arbitrarily large range of memory with INVLPGB. */
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+ unsigned long addr, nr;
+
+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+ nr = (info->end - addr) >> PAGE_SHIFT;
+
+ /*
+ * INVLPGB has a limit on the size of ranges it can
+ * flush. Break up large flushes.
+ */
+ nr = clamp_val(nr, 1, invlpgb_count_max);
+
+ invlpgb_flush_addr_nosync(addr, nr);
+ }
+ __tlbsync();
}
static void do_kernel_range_flush(void *info)
@@ -1087,24 +1424,37 @@ static void do_kernel_range_flush(void *info)
flush_tlb_one_kernel(addr);
}
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+static void kernel_tlb_flush_all(struct flush_tlb_info *info)
{
- /* Balance as user space task's flush, a bit conservative */
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
on_each_cpu(do_flush_tlb_all, NULL, 1);
- } else {
- struct flush_tlb_info *info;
-
- preempt_disable();
- info = get_flush_tlb_info(NULL, start, end, 0, false,
- TLB_GENERATION_INVALID);
+}
+static void kernel_tlb_flush_range(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_kernel_range_flush(info);
+ else
on_each_cpu(do_kernel_range_flush, info, 1);
+}
- put_flush_tlb_info();
- preempt_enable();
- }
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ struct flush_tlb_info *info;
+
+ guard(preempt)();
+
+ info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
+ TLB_GENERATION_INVALID);
+
+ if (info->end == TLB_FLUSH_ALL)
+ kernel_tlb_flush_all(info);
+ else
+ kernel_tlb_flush_range(info);
+
+ put_flush_tlb_info();
}
/*
@@ -1283,7 +1633,10 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* a local TLB flush is needed. Optimize this use-case by calling
* flush_tlb_func_local() directly in this case.
*/
- if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) {
+ invlpgb_flush_all_nonglobals();
+ batch->unmapped_pages = false;
+ } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
flush_tlb_multi(&batch->cpumask, info);
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
@@ -1325,7 +1678,7 @@ bool nmi_uaccess_okay(void)
if (loaded_mm != current_mm)
return false;
- VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+ VM_WARN_ON_ONCE(__pa(current_mm->pgd) != read_cr3_pa());
return true;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a43fc5af973d..72776dcb75aa 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -410,16 +410,20 @@ static void emit_nops(u8 **pprog, int len)
* Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT
* in arch/x86/kernel/alternative.c
*/
+static int emit_call(u8 **prog, void *func, void *ip);
-static void emit_fineibt(u8 **pprog, u32 hash)
+static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity)
{
u8 *prog = *pprog;
EMIT_ENDBR();
EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */
- EMIT2(0x74, 0x07); /* jz.d8 +7 */
- EMIT2(0x0f, 0x0b); /* ud2 */
- EMIT1(0x90); /* nop */
+ if (cfi_bhi) {
+ emit_call(&prog, __bhi_args[arity], ip + 11);
+ } else {
+ EMIT2(0x75, 0xf9); /* jne.d8 .-7 */
+ EMIT3(0x0f, 0x1f, 0x00); /* nop3 */
+ }
EMIT_ENDBR_POISON();
*pprog = prog;
@@ -448,13 +452,13 @@ static void emit_kcfi(u8 **pprog, u32 hash)
*pprog = prog;
}
-static void emit_cfi(u8 **pprog, u32 hash)
+static void emit_cfi(u8 **pprog, u8 *ip, u32 hash, int arity)
{
u8 *prog = *pprog;
switch (cfi_mode) {
case CFI_FINEIBT:
- emit_fineibt(&prog, hash);
+ emit_fineibt(&prog, ip, hash, arity);
break;
case CFI_KCFI:
@@ -505,13 +509,17 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
* while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+static void emit_prologue(u8 **pprog, u8 *ip, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog,
bool is_exception_cb)
{
u8 *prog = *pprog;
- emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
+ if (is_subprog) {
+ emit_cfi(&prog, ip, cfi_bpf_subprog_hash, 5);
+ } else {
+ emit_cfi(&prog, ip, cfi_bpf_hash, 1);
+ }
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
@@ -641,7 +649,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
* See emit_prologue(), for IBT builds the trampoline hook is preceded
* with an ENDBR instruction.
*/
- if (is_endbr(*(u32 *)ip))
+ if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;
return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
@@ -1480,7 +1488,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
detect_reg_usage(insn, insn_cnt, callee_regs_used);
- emit_prologue(&prog, stack_depth,
+ emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
/* Exception callback will clobber callee regs for its own use, and
@@ -3036,7 +3044,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
- if (is_endbr(*(u32 *)orig_call))
+ if (is_endbr(orig_call))
orig_call += ENDBR_INSN_SIZE;
orig_call += X86_PATCH_SIZE;
}
@@ -3047,7 +3055,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/*
* Indirect call for bpf_struct_ops
*/
- emit_cfi(&prog, cfi_get_func_hash(func_addr));
+ emit_cfi(&prog, image,
+ cfi_get_func_hash(func_addr),
+ cfi_get_func_arity(func_addr));
} else {
/*
* Direct-call fentry stub, as such it needs accounting for the
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 48bcada5cabe..4933fb337983 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -12,8 +12,6 @@ obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
obj-$(CONFIG_ACPI) += acpi.o
obj-y += legacy.o irq.o
-obj-$(CONFIG_STA2X11) += sta2x11-fixup.o
-
obj-$(CONFIG_X86_NUMACHIP) += numachip.o
obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
deleted file mode 100644
index 8c8ddc4dcc08..000000000000
--- a/arch/x86/pci/sta2x11-fixup.c
+++ /dev/null
@@ -1,233 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping
- *
- * ST Microelectronics ConneXt (STA2X11/STA2X10)
- *
- * Copyright (c) 2010-2011 Wind River Systems, Inc.
- */
-
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/export.h>
-#include <linux/list.h>
-#include <linux/dma-map-ops.h>
-#include <linux/swiotlb.h>
-#include <asm/iommu.h>
-#include <asm/sta2x11.h>
-
-#define STA2X11_SWIOTLB_SIZE (4*1024*1024)
-
-/*
- * We build a list of bus numbers that are under the ConneXt. The
- * main bridge hosts 4 busses, which are the 4 endpoints, in order.
- */
-#define STA2X11_NR_EP 4 /* 0..3 included */
-#define STA2X11_NR_FUNCS 8 /* 0..7 included */
-#define STA2X11_AMBA_SIZE (512 << 20)
-
-struct sta2x11_ahb_regs { /* saved during suspend */
- u32 base, pexlbase, pexhbase, crw;
-};
-
-struct sta2x11_mapping {
- int is_suspended;
- struct sta2x11_ahb_regs regs[STA2X11_NR_FUNCS];
-};
-
-struct sta2x11_instance {
- struct list_head list;
- int bus0;
- struct sta2x11_mapping map[STA2X11_NR_EP];
-};
-
-static LIST_HEAD(sta2x11_instance_list);
-
-/* At probe time, record new instances of this bridge (likely one only) */
-static void sta2x11_new_instance(struct pci_dev *pdev)
-{
- struct sta2x11_instance *instance;
-
- instance = kzalloc(sizeof(*instance), GFP_ATOMIC);
- if (!instance)
- return;
- /* This has a subordinate bridge, with 4 more-subordinate ones */
- instance->bus0 = pdev->subordinate->number + 1;
-
- if (list_empty(&sta2x11_instance_list)) {
- int size = STA2X11_SWIOTLB_SIZE;
- /* First instance: register your own swiotlb area */
- dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
- if (swiotlb_init_late(size, GFP_DMA, NULL))
- dev_emerg(&pdev->dev, "init swiotlb failed\n");
- }
- list_add(&instance->list, &sta2x11_instance_list);
-}
-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, 0xcc17, sta2x11_new_instance);
-
-/*
- * Utility functions used in this file from below
- */
-static struct sta2x11_instance *sta2x11_pdev_to_instance(struct pci_dev *pdev)
-{
- struct sta2x11_instance *instance;
- int ep;
-
- list_for_each_entry(instance, &sta2x11_instance_list, list) {
- ep = pdev->bus->number - instance->bus0;
- if (ep >= 0 && ep < STA2X11_NR_EP)
- return instance;
- }
- return NULL;
-}
-
-static int sta2x11_pdev_to_ep(struct pci_dev *pdev)
-{
- struct sta2x11_instance *instance;
-
- instance = sta2x11_pdev_to_instance(pdev);
- if (!instance)
- return -1;
-
- return pdev->bus->number - instance->bus0;
-}
-
-/* This is exported, as some devices need to access the MFD registers */
-struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev)
-{
- return sta2x11_pdev_to_instance(pdev);
-}
-EXPORT_SYMBOL(sta2x11_get_instance);
-
-/* At setup time, we use our own ops if the device is a ConneXt one */
-static void sta2x11_setup_pdev(struct pci_dev *pdev)
-{
- struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev);
-
- if (!instance) /* either a sta2x11 bridge or another ST device */
- return;
-
- /* We must enable all devices as master, for audio DMA to work */
- pci_set_master(pdev);
-}
-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_setup_pdev);
-
-/*
- * At boot we must set up the mappings for the pcie-to-amba bridge.
- * It involves device access, and the same happens at suspend/resume time
- */
-
-#define AHB_MAPB 0xCA4
-#define AHB_CRW(i) (AHB_MAPB + 0 + (i) * 0x10)
-#define AHB_CRW_SZMASK 0xfffffc00UL
-#define AHB_CRW_ENABLE (1 << 0)
-#define AHB_CRW_WTYPE_MEM (2 << 1)
-#define AHB_CRW_ROE (1UL << 3) /* Relax Order Ena */
-#define AHB_CRW_NSE (1UL << 4) /* No Snoop Enable */
-#define AHB_BASE(i) (AHB_MAPB + 4 + (i) * 0x10)
-#define AHB_PEXLBASE(i) (AHB_MAPB + 8 + (i) * 0x10)
-#define AHB_PEXHBASE(i) (AHB_MAPB + 12 + (i) * 0x10)
-
-/* At probe time, enable mapping for each endpoint, using the pdev */
-static void sta2x11_map_ep(struct pci_dev *pdev)
-{
- struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev);
- struct device *dev = &pdev->dev;
- u32 amba_base, max_amba_addr;
- int i, ret;
-
- if (!instance)
- return;
-
- pci_read_config_dword(pdev, AHB_BASE(0), &amba_base);
- max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1;
-
- ret = dma_direct_set_offset(dev, 0, amba_base, STA2X11_AMBA_SIZE);
- if (ret)
- dev_err(dev, "sta2x11: could not set DMA offset\n");
-
- dev->bus_dma_limit = max_amba_addr;
- dma_set_mask_and_coherent(&pdev->dev, max_amba_addr);
-
- /* Configure AHB mapping */
- pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0);
- pci_write_config_dword(pdev, AHB_PEXHBASE(0), 0);
- pci_write_config_dword(pdev, AHB_CRW(0), STA2X11_AMBA_SIZE |
- AHB_CRW_WTYPE_MEM | AHB_CRW_ENABLE);
-
- /* Disable all the other windows */
- for (i = 1; i < STA2X11_NR_FUNCS; i++)
- pci_write_config_dword(pdev, AHB_CRW(i), 0);
-
- dev_info(&pdev->dev,
- "sta2x11: Map EP %i: AMBA address %#8x-%#8x\n",
- sta2x11_pdev_to_ep(pdev), amba_base, max_amba_addr);
-}
-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_map_ep);
-
-#ifdef CONFIG_PM /* Some register values must be saved and restored */
-
-static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev)
-{
- struct sta2x11_instance *instance;
- int ep;
-
- instance = sta2x11_pdev_to_instance(pdev);
- if (!instance)
- return NULL;
- ep = sta2x11_pdev_to_ep(pdev);
- return instance->map + ep;
-}
-
-static void suspend_mapping(struct pci_dev *pdev)
-{
- struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
- int i;
-
- if (!map)
- return;
-
- if (map->is_suspended)
- return;
- map->is_suspended = 1;
-
- /* Save all window configs */
- for (i = 0; i < STA2X11_NR_FUNCS; i++) {
- struct sta2x11_ahb_regs *regs = map->regs + i;
-
- pci_read_config_dword(pdev, AHB_BASE(i), &regs->base);
- pci_read_config_dword(pdev, AHB_PEXLBASE(i), &regs->pexlbase);
- pci_read_config_dword(pdev, AHB_PEXHBASE(i), &regs->pexhbase);
- pci_read_config_dword(pdev, AHB_CRW(i), &regs->crw);
- }
-}
-DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, suspend_mapping);
-
-static void resume_mapping(struct pci_dev *pdev)
-{
- struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
- int i;
-
- if (!map)
- return;
-
-
- if (!map->is_suspended)
- goto out;
- map->is_suspended = 0;
-
- /* Restore all window configs */
- for (i = 0; i < STA2X11_NR_FUNCS; i++) {
- struct sta2x11_ahb_regs *regs = map->regs + i;
-
- pci_write_config_dword(pdev, AHB_BASE(i), regs->base);
- pci_write_config_dword(pdev, AHB_PEXLBASE(i), regs->pexlbase);
- pci_write_config_dword(pdev, AHB_PEXHBASE(i), regs->pexhbase);
- pci_write_config_dword(pdev, AHB_CRW(i), regs->crw);
- }
-out:
- pci_set_master(pdev); /* Like at boot, enable master on all devices */
-}
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, resume_mapping);
-
-#endif /* CONFIG_PM */
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 4733a5f467b8..cfa18ec7d55f 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -173,10 +173,14 @@ SYM_CODE_START(pvh_start_xen)
1:
UNWIND_HINT_END_OF_STACK
- /* Set base address in stack canary descriptor. */
- mov $MSR_GS_BASE,%ecx
- leal canary(%rip), %eax
- xor %edx, %edx
+ /*
+ * Set up GSBASE.
+ * Note that on SMP the boot CPU uses the init data section until
+ * the per-CPU areas are set up.
+ */
+ movl $MSR_GS_BASE,%ecx
+ xorl %eax, %eax
+ xorl %edx, %edx
wrmsr
/* Call xen_prepare_pvh() via the kernel virtual mapping */
@@ -238,8 +242,6 @@ SYM_DATA_START_LOCAL(gdt_start)
SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
.balign 16
-SYM_DATA_LOCAL(canary, .fill 48, 1, 0)
-
SYM_DATA_START_LOCAL(early_stack)
.fill BOOT_STACK_SIZE, 1, 0
SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 0a0539e1cc81..8c534c36adfa 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -26,6 +26,7 @@
/* code below belongs to the image kernel */
.align PAGE_SIZE
SYM_FUNC_START(restore_registers)
+ ANNOTATE_NOENDBR
/* go back to the original page tables */
movq %r9, %cr3
@@ -119,6 +120,7 @@ SYM_FUNC_END(restore_image)
/* code below has been relocated to a safe page */
SYM_FUNC_START(core_restore_code)
+ ANNOTATE_NOENDBR
/* switch to temporary page tables */
movq %rax, %cr3
/* flush TLB */
diff --git a/arch/x86/realmode/rm/realmode.h b/arch/x86/realmode/rm/realmode.h
index c76041a35397..867e55f1d6af 100644
--- a/arch/x86/realmode/rm/realmode.h
+++ b/arch/x86/realmode/rm/realmode.h
@@ -2,7 +2,7 @@
#ifndef ARCH_X86_REALMODE_RM_REALMODE_H
#define ARCH_X86_REALMODE_RM_REALMODE_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/*
* 16-bit ljmpw to the real_mode_seg
@@ -12,7 +12,7 @@
*/
#define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Signature at the end of the realmode region
diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h
index 0e4fd08ae447..3b6d8fa82d3e 100644
--- a/arch/x86/realmode/rm/wakeup.h
+++ b/arch/x86/realmode/rm/wakeup.h
@@ -7,7 +7,7 @@
#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H
#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
/* This must match data at wakeup.S */
diff --git a/arch/x86/tools/cpufeaturemasks.awk b/arch/x86/tools/cpufeaturemasks.awk
new file mode 100755
index 000000000000..173d5bf2d999
--- /dev/null
+++ b/arch/x86/tools/cpufeaturemasks.awk
@@ -0,0 +1,88 @@
+#!/usr/bin/awk
+#
+# Convert cpufeatures.h to a list of compile-time masks
+# Note: this blithely assumes that each word has at least one
+# feature defined in it; if not, something else is wrong!
+#
+
+BEGIN {
+ printf "#ifndef _ASM_X86_CPUFEATUREMASKS_H\n";
+ printf "#define _ASM_X86_CPUFEATUREMASKS_H\n\n";
+
+ file = 0
+}
+
+FNR == 1 {
+ ++file;
+
+ # arch/x86/include/asm/cpufeatures.h
+ if (file == 1)
+ FS = "[ \t()*+]+";
+
+ # .config
+ if (file == 2)
+ FS = "=";
+}
+
+# Create a dictionary of sorts, containing all defined feature bits
+file == 1 && $1 ~ /^#define$/ && $2 ~ /^X86_FEATURE_/ {
+ nfeat = $3 * $4 + $5;
+ feat = $2;
+ sub(/^X86_FEATURE_/, "", feat);
+ feats[nfeat] = feat;
+}
+file == 1 && $1 ~ /^#define$/ && $2 == "NCAPINTS" {
+ ncapints = int($3);
+}
+
+# Create a dictionary featstat[REQUIRED|DISABLED, FEATURE_NAME] = on | off
+file == 2 && $1 ~ /^CONFIG_X86_(REQUIRED|DISABLED)_FEATURE_/ {
+ on = ($2 == "y");
+ if (split($1, fs, "CONFIG_X86_|_FEATURE_") == 3)
+ featstat[fs[2], fs[3]] = on;
+}
+
+END {
+ sets[1] = "REQUIRED";
+ sets[2] = "DISABLED";
+
+ for (ns in sets) {
+ s = sets[ns];
+
+ printf "/*\n";
+ printf " * %s features:\n", s;
+ printf " *\n";
+ fstr = "";
+ for (i = 0; i < ncapints; i++) {
+ mask = 0;
+ for (j = 0; j < 32; j++) {
+ feat = feats[i*32 + j];
+ if (featstat[s, feat]) {
+ nfstr = fstr " " feat;
+ if (length(nfstr) > 72) {
+ printf " * %s\n", fstr;
+ nfstr = " " feat;
+ }
+ fstr = nfstr;
+ mask += (2 ^ j);
+ }
+ }
+ masks[i] = mask;
+ }
+ printf " * %s\n */\n", fstr;
+
+ for (i = 0; i < ncapints; i++)
+ printf "#define %s_MASK%d\t0x%08xU\n", s, i, masks[i];
+
+ printf "\n#define %s_MASK_BIT_SET(x)\t\t\t\\\n", s;
+ printf "\t((\t\t\t\t\t";
+ for (i = 0; i < ncapints; i++) {
+ if (masks[i])
+ printf "\t\\\n\t\t((x) >> 5) == %2d ? %s_MASK%d :", i, s, i;
+ }
+ printf " 0\t\\\n";
+ printf "\t) & (1U << ((x) & 31)))\n\n";
+ }
+
+ printf "#endif /* _ASM_X86_CPUFEATUREMASKS_H */\n";
+}
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index e937be979ec8..5778bc498415 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -29,9 +29,13 @@ static struct relocs relocs16;
static struct relocs relocs32;
#if ELF_BITS == 64
-static struct relocs relocs32neg;
static struct relocs relocs64;
# define FMT PRIu64
+
+#ifndef R_X86_64_REX_GOTPCRELX
+# define R_X86_64_REX_GOTPCRELX 42
+#endif
+
#else
# define FMT PRIu32
#endif
@@ -86,8 +90,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"__initramfs_start|"
"(jiffies|jiffies_64)|"
#if ELF_BITS == 64
- "__per_cpu_load|"
- "init_per_cpu__.*|"
"__end_rodata_hpage_align|"
#endif
"_end)$"
@@ -227,6 +229,7 @@ static const char *rel_type(unsigned type)
REL_TYPE(R_X86_64_PC16),
REL_TYPE(R_X86_64_8),
REL_TYPE(R_X86_64_PC8),
+ REL_TYPE(R_X86_64_REX_GOTPCRELX),
#else
REL_TYPE(R_386_NONE),
REL_TYPE(R_386_32),
@@ -284,34 +287,6 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
return name;
}
-static Elf_Sym *sym_lookup(const char *symname)
-{
- int i;
-
- for (i = 0; i < shnum; i++) {
- struct section *sec = &secs[i];
- long nsyms;
- char *strtab;
- Elf_Sym *symtab;
- Elf_Sym *sym;
-
- if (sec->shdr.sh_type != SHT_SYMTAB)
- continue;
-
- nsyms = sec->shdr.sh_size/sizeof(Elf_Sym);
- symtab = sec->symtab;
- strtab = sec->link->strtab;
-
- for (sym = symtab; --nsyms >= 0; sym++) {
- if (!sym->st_name)
- continue;
- if (strcmp(symname, strtab + sym->st_name) == 0)
- return sym;
- }
- }
- return 0;
-}
-
#if BYTE_ORDER == LITTLE_ENDIAN
# define le16_to_cpu(val) (val)
# define le32_to_cpu(val) (val)
@@ -760,84 +735,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
}
}
-/*
- * The .data..percpu section is a special case for x86_64 SMP kernels.
- * It is used to initialize the actual per_cpu areas and to provide
- * definitions for the per_cpu variables that correspond to their offsets
- * within the percpu area. Since the values of all of the symbols need
- * to be offsets from the start of the per_cpu area the virtual address
- * (sh_addr) of .data..percpu is 0 in SMP kernels.
- *
- * This means that:
- *
- * Relocations that reference symbols in the per_cpu area do not
- * need further relocation (since the value is an offset relative
- * to the start of the per_cpu area that does not change).
- *
- * Relocations that apply to the per_cpu area need to have their
- * offset adjusted by by the value of __per_cpu_load to make them
- * point to the correct place in the loaded image (because the
- * virtual address of .data..percpu is 0).
- *
- * For non SMP kernels .data..percpu is linked as part of the normal
- * kernel data and does not require special treatment.
- *
- */
-static int per_cpu_shndx = -1;
-static Elf_Addr per_cpu_load_addr;
-
-static void percpu_init(void)
-{
- int i;
-
- for (i = 0; i < shnum; i++) {
- ElfW(Sym) *sym;
-
- if (strcmp(sec_name(i), ".data..percpu"))
- continue;
-
- if (secs[i].shdr.sh_addr != 0) /* non SMP kernel */
- return;
-
- sym = sym_lookup("__per_cpu_load");
- if (!sym)
- die("can't find __per_cpu_load\n");
-
- per_cpu_shndx = i;
- per_cpu_load_addr = sym->st_value;
-
- return;
- }
-}
-
#if ELF_BITS == 64
-/*
- * Check to see if a symbol lies in the .data..percpu section.
- *
- * The linker incorrectly associates some symbols with the
- * .data..percpu section so we also need to check the symbol
- * name to make sure that we classify the symbol correctly.
- *
- * The GNU linker incorrectly associates:
- * __init_begin
- * __per_cpu_load
- *
- * The "gold" linker incorrectly associates:
- * init_per_cpu__fixed_percpu_data
- * init_per_cpu__gdt_page
- */
-static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
-{
- int shndx = sym_index(sym);
-
- return (shndx == per_cpu_shndx) &&
- strcmp(symname, "__init_begin") &&
- strcmp(symname, "__per_cpu_load") &&
- strncmp(symname, "init_per_cpu_", 13);
-}
-
-
static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
const char *symname)
{
@@ -848,12 +747,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
if (sym->st_shndx == SHN_UNDEF)
return 0;
- /*
- * Adjust the offset if this reloc applies to the percpu section.
- */
- if (sec->shdr.sh_info == per_cpu_shndx)
- offset += per_cpu_load_addr;
-
switch (r_type) {
case R_X86_64_NONE:
/* NONE can be ignored. */
@@ -861,33 +754,23 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
case R_X86_64_PC32:
case R_X86_64_PLT32:
+ case R_X86_64_REX_GOTPCRELX:
/*
- * PC relative relocations don't need to be adjusted unless
- * referencing a percpu symbol.
+ * PC relative relocations don't need to be adjusted.
*
* NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
*/
- if (is_percpu_sym(sym, symname))
- add_reloc(&relocs32neg, offset);
break;
case R_X86_64_PC64:
/*
* Only used by jump labels
*/
- if (is_percpu_sym(sym, symname))
- die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n", symname);
break;
case R_X86_64_32:
case R_X86_64_32S:
case R_X86_64_64:
- /*
- * References to the percpu area don't need to be adjusted.
- */
- if (is_percpu_sym(sym, symname))
- break;
-
if (shn_abs) {
/*
* Whitelisted absolute symbols do not require
@@ -1055,7 +938,8 @@ static int cmp_relocs(const void *va, const void *vb)
static void sort_relocs(struct relocs *r)
{
- qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
+ if (r->count)
+ qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
}
static int write32(uint32_t v, FILE *f)
@@ -1099,7 +983,6 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Order the relocations for more efficient processing */
sort_relocs(&relocs32);
#if ELF_BITS == 64
- sort_relocs(&relocs32neg);
sort_relocs(&relocs64);
#else
sort_relocs(&relocs16);
@@ -1131,13 +1014,6 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Now print each relocation */
for (i = 0; i < relocs64.count; i++)
write_reloc(relocs64.offset[i], stdout);
-
- /* Print a stop */
- write_reloc(0, stdout);
-
- /* Now print each inverse 32-bit relocation */
- for (i = 0; i < relocs32neg.count; i++)
- write_reloc(relocs32neg.offset[i], stdout);
#endif
/* Print a stop */
@@ -1190,9 +1066,6 @@ void process(FILE *fp, int use_real_mode, int as_text,
read_symtabs(fp);
read_relocs(fp);
- if (ELF_BITS == 64)
- percpu_init();
-
if (show_absolute_syms) {
print_absolute_symbols();
return;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 77e788e928cd..98d8a50d2aed 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -9,7 +9,7 @@ config XEN
select PARAVIRT_CLOCK
select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
- depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8)
+ depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MATOM)
depends on X86_LOCAL_APIC && X86_TSC
help
This is the Linux Xen port. Enabling this will allow the
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5e57835e999d..dcc2041f8e61 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -73,6 +73,7 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#include <asm/irq_stack.h>
#ifdef CONFIG_X86_IOPL_IOPERM
#include <asm/io_bitmap.h>
#endif
@@ -94,6 +95,44 @@ void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);
+#ifndef CONFIG_PREEMPTION
+/*
+ * Some hypercalls issued by the toolstack can take many 10s of
+ * seconds. Allow tasks running hypercalls via the privcmd driver to
+ * be voluntarily preempted even if full kernel preemption is
+ * disabled.
+ *
+ * Such preemptible hypercalls are bracketed by
+ * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
+ * calls.
+ */
+DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
+EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+
+/*
+ * In case of scheduling the flag must be cleared and restored after
+ * returning from schedule as the task might move to a different CPU.
+ */
+static __always_inline bool get_and_clear_inhcall(void)
+{
+ bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
+
+ __this_cpu_write(xen_in_preemptible_hcall, false);
+ return inhcall;
+}
+
+static __always_inline void restore_inhcall(bool inhcall)
+{
+ __this_cpu_write(xen_in_preemptible_hcall, inhcall);
+}
+
+#else
+
+static __always_inline bool get_and_clear_inhcall(void) { return false; }
+static __always_inline void restore_inhcall(bool inhcall) { }
+
+#endif
+
struct tls_descs {
struct desc_struct desc[3];
};
@@ -687,6 +726,36 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
}
#endif
+static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ xen_evtchn_do_upcall();
+
+ set_irq_regs(old_regs);
+}
+
+__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+ bool inhcall;
+
+ instrumentation_begin();
+ run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
+
+ inhcall = get_and_clear_inhcall();
+ if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
+ irqentry_exit_cond_resched();
+ instrumentation_end();
+ restore_inhcall(inhcall);
+ } else {
+ instrumentation_end();
+ irqentry_exit(regs, state);
+ }
+}
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index d078de2c952b..38971c6dcd4b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2189,7 +2189,6 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = {
.flush_tlb_kernel = xen_flush_tlb,
.flush_tlb_one_user = xen_flush_tlb_one_user,
.flush_tlb_multi = xen_flush_tlb_multi,
- .tlb_remove_table = tlb_remove_table,
.pgd_alloc = xen_pgd_alloc,
.pgd_free = xen_pgd_free,
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 6863d3da7dec..688ff59318ae 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -70,7 +70,7 @@ static void cpu_bringup(void)
xen_enable_syscall();
}
cpu = smp_processor_id();
- smp_store_cpu_info(cpu);
+ identify_secondary_cpu(cpu);
set_cpu_sibling_map(cpu);
speculative_store_bypass_ht_init();
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index b518f36d1ca2..109af12f7647 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -51,6 +51,7 @@ SYM_FUNC_END(xen_hypercall_pv)
* non-zero.
*/
SYM_FUNC_START(xen_irq_disable_direct)
+ ENDBR
movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
RET
SYM_FUNC_END(xen_irq_disable_direct)
@@ -90,6 +91,7 @@ SYM_FUNC_END(check_events)
* then enter the hypervisor to get them handled.
*/
SYM_FUNC_START(xen_irq_enable_direct)
+ ENDBR
FRAME_BEGIN
/* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
@@ -120,6 +122,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
* x86 use opposite senses (mask vs enable).
*/
SYM_FUNC_START(xen_save_fl_direct)
+ ENDBR
testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
setz %ah
addb %ah, %ah
@@ -127,6 +130,7 @@ SYM_FUNC_START(xen_save_fl_direct)
SYM_FUNC_END(xen_save_fl_direct)
SYM_FUNC_START(xen_read_cr2)
+ ENDBR
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
@@ -135,6 +139,7 @@ SYM_FUNC_START(xen_read_cr2)
SYM_FUNC_END(xen_read_cr2);
SYM_FUNC_START(xen_read_cr2_direct)
+ ENDBR
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX
FRAME_END
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 894edf8d6d62..5dad6c51cdc3 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -31,16 +31,14 @@ SYM_CODE_START(startup_xen)
leaq __top_init_kernel_stack(%rip), %rsp
- /* Set up %gs.
- *
- * The base of %gs always points to fixed_percpu_data. If the
- * stack protector canary is enabled, it is located at %gs:40.
+ /*
+ * Set up GSBASE.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
- cdq
+ xorl %eax, %eax
+ xorl %edx, %edx
wrmsr
mov %rsi, %rdi
@@ -133,11 +131,13 @@ SYM_FUNC_START(xen_hypercall_hvm)
SYM_FUNC_END(xen_hypercall_hvm)
SYM_FUNC_START(xen_hypercall_amd)
+ ANNOTATE_NOENDBR
vmmcall
RET
SYM_FUNC_END(xen_hypercall_amd)
SYM_FUNC_START(xen_hypercall_intel)
+ ANNOTATE_NOENDBR
vmcall
RET
SYM_FUNC_END(xen_hypercall_intel)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 698897b29de2..586cc7d1d8aa 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -590,6 +590,8 @@ static void acpi_idle_play_dead(struct cpuidle_device *dev, int index)
raw_safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
io_idle(cx->address);
+ } else if (cx->entry_method == ACPI_CSTATE_FFH) {
+ acpi_processor_ffh_play_dead(cx);
} else
return;
}
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9c4cc01fd51a..f06b9bc99945 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2200,28 +2200,20 @@ static int knl_get_turbo_pstate(int cpu)
return ret;
}
-static void hybrid_get_type(void *data)
-{
- u8 *cpu_type = data;
-
- *cpu_type = get_this_hybrid_cpu_type();
-}
-
static int hwp_get_cpu_scaling(int cpu)
{
if (hybrid_scaling_factor) {
- u8 cpu_type = 0;
-
- smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1);
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ u8 cpu_type = c->topo.intel_type;
/*
* Return the hybrid scaling factor for P-cores and use the
* default core scaling for E-cores.
*/
- if (cpu_type == 0x40)
+ if (cpu_type == INTEL_CPU_TYPE_CORE)
return hybrid_scaling_factor;
- if (cpu_type == 0x20)
+ if (cpu_type == INTEL_CPU_TYPE_ATOM)
return core_get_scaling();
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 0fdb1d1316c4..5687089e406a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -58,6 +58,7 @@
#include <asm/spec-ctrl.h>
#include <asm/tsc.h>
#include <asm/fpu/api.h>
+#include <asm/smp.h>
#define INTEL_IDLE_VERSION "0.5.1"
@@ -229,6 +230,15 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
return 0;
}
+static void intel_idle_enter_dead(struct cpuidle_device *dev, int index)
+{
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ mwait_play_dead(eax);
+}
+
/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
@@ -1804,6 +1814,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
mark_tsc_unstable("TSC halts in idle");
state->enter = intel_idle;
+ state->enter_dead = intel_idle_enter_dead;
state->enter_s2idle = intel_idle_s2idle;
}
}
@@ -2153,6 +2164,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
!cpuidle_state_table[cstate].enter_s2idle)
break;
+ if (!cpuidle_state_table[cstate].enter_dead)
+ cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
+
/* If marked as unusable, skip this state. */
if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
pr_debug("state %s is disabled\n",
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig
index 67d9391f1855..7575fee96cc6 100644
--- a/drivers/misc/mei/Kconfig
+++ b/drivers/misc/mei/Kconfig
@@ -3,7 +3,7 @@
config INTEL_MEI
tristate "Intel Management Engine Interface"
depends on X86 && PCI
- default GENERIC_CPU || MCORE2 || MATOM || X86_GENERIC
+ default X86_64 || MATOM
help
The Intel Management Engine (Intel ME) provides Manageability,
Security and Media services for system containing Intel chipsets.
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2fbd379923fd..5c3054aaec8c 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -203,6 +203,12 @@ config PCI_P2PDMA
P2P DMA transactions must be between devices behind the same root
port.
+ Enabling this option will reduce the entropy of x86 KASLR memory
+ regions. For example - on a 46 bit system, the entropy goes down
+ from 16 bits to 15 bits. The actual reduction in entropy depends
+ on the physical address bits, on processor features, kernel config
+ (5 level page table) and physical memory present on the system.
+
If unsure, say N.
config PCI_LABEL
diff --git a/drivers/platform/x86/amd/hsmp/Kconfig b/drivers/platform/x86/amd/hsmp/Kconfig
index 7d10d4462a45..d6f7a62d55b5 100644
--- a/drivers/platform/x86/amd/hsmp/Kconfig
+++ b/drivers/platform/x86/amd/hsmp/Kconfig
@@ -7,7 +7,7 @@ config AMD_HSMP
tristate
menu "AMD HSMP Driver"
- depends on AMD_NB || COMPILE_TEST
+ depends on AMD_NODE || COMPILE_TEST
config AMD_HSMP_ACPI
tristate "AMD HSMP ACPI device driver"
diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c
index 444b43be35a2..c1eccb3c80c5 100644
--- a/drivers/platform/x86/amd/hsmp/acpi.c
+++ b/drivers/platform/x86/amd/hsmp/acpi.c
@@ -10,7 +10,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <asm/amd_hsmp.h>
-#include <asm/amd_nb.h>
#include <linux/acpi.h>
#include <linux/device.h>
@@ -24,6 +23,8 @@
#include <uapi/asm-generic/errno-base.h>
+#include <asm/amd_node.h>
+
#include "hsmp.h"
#define DRIVER_NAME "amd_hsmp"
@@ -321,8 +322,8 @@ static int hsmp_acpi_probe(struct platform_device *pdev)
return -ENOMEM;
if (!hsmp_pdev->is_probed) {
- hsmp_pdev->num_sockets = amd_nb_num();
- if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_SOCKETS)
+ hsmp_pdev->num_sockets = amd_num_nodes();
+ if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_NUM_NODES)
return -ENODEV;
hsmp_pdev->sock = devm_kcalloc(&pdev->dev, hsmp_pdev->num_sockets,
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c
index 03164e30b3a5..a3ac09a90de4 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp/hsmp.c
@@ -8,7 +8,6 @@
*/
#include <asm/amd_hsmp.h>
-#include <asm/amd_nb.h>
#include <linux/acpi.h>
#include <linux/delay.h>
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h
index e852f0a947e4..af8b21f821d6 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.h
+++ b/drivers/platform/x86/amd/hsmp/hsmp.h
@@ -21,8 +21,6 @@
#define HSMP_ATTR_GRP_NAME_SIZE 10
-#define MAX_AMD_SOCKETS 8
-
#define HSMP_CDEV_NAME "hsmp_cdev"
#define HSMP_DEVNODE_NAME "hsmp"
@@ -41,7 +39,6 @@ struct hsmp_socket {
void __iomem *virt_base_addr;
struct semaphore hsmp_sem;
char name[HSMP_ATTR_GRP_NAME_SIZE];
- struct pci_dev *root;
struct device *dev;
u16 sock_ind;
int (*amd_hsmp_rdwr)(struct hsmp_socket *sock, u32 off, u32 *val, bool rw);
diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c
index 02ca85762b68..b9782a078dbd 100644
--- a/drivers/platform/x86/amd/hsmp/plat.c
+++ b/drivers/platform/x86/amd/hsmp/plat.c
@@ -10,14 +10,16 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <asm/amd_hsmp.h>
-#include <asm/amd_nb.h>
+#include <linux/build_bug.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/sysfs.h>
+#include <asm/amd_node.h>
+
#include "hsmp.h"
#define DRIVER_NAME "amd_hsmp"
@@ -34,28 +36,12 @@
#define SMN_HSMP_MSG_RESP 0x0010980
#define SMN_HSMP_MSG_DATA 0x00109E0
-#define HSMP_INDEX_REG 0xc4
-#define HSMP_DATA_REG 0xc8
-
static struct hsmp_plat_device *hsmp_pdev;
static int amd_hsmp_pci_rdwr(struct hsmp_socket *sock, u32 offset,
u32 *value, bool write)
{
- int ret;
-
- if (!sock->root)
- return -ENODEV;
-
- ret = pci_write_config_dword(sock->root, HSMP_INDEX_REG,
- sock->mbinfo.base_addr + offset);
- if (ret)
- return ret;
-
- ret = (write ? pci_write_config_dword(sock->root, HSMP_DATA_REG, *value)
- : pci_read_config_dword(sock->root, HSMP_DATA_REG, value));
-
- return ret;
+ return amd_smn_hsmp_rdwr(sock->sock_ind, sock->mbinfo.base_addr + offset, value, write);
}
static ssize_t hsmp_metric_tbl_plat_read(struct file *filp, struct kobject *kobj,
@@ -95,7 +81,12 @@ static umode_t hsmp_is_sock_attr_visible(struct kobject *kobj,
* Static array of 8 + 1(for NULL) elements is created below
* to create sysfs groups for sockets.
* is_bin_visible function is used to show / hide the necessary groups.
+ *
+ * Validate the maximum number against MAX_AMD_NUM_NODES. If this changes,
+ * then the attributes and groups below must be adjusted.
*/
+static_assert(MAX_AMD_NUM_NODES == 8);
+
#define HSMP_BIN_ATTR(index, _list) \
static const struct bin_attribute attr##index = { \
.attr = { .name = HSMP_METRICS_TABLE_NAME, .mode = 0444}, \
@@ -159,10 +150,7 @@ static int init_platform_device(struct device *dev)
int ret, i;
for (i = 0; i < hsmp_pdev->num_sockets; i++) {
- if (!node_to_amd_nb(i))
- return -ENODEV;
sock = &hsmp_pdev->sock[i];
- sock->root = node_to_amd_nb(i)->root;
sock->sock_ind = i;
sock->dev = dev;
sock->mbinfo.base_addr = SMN_HSMP_BASE;
@@ -305,11 +293,11 @@ static int __init hsmp_plt_init(void)
return -ENOMEM;
/*
- * amd_nb_num() returns number of SMN/DF interfaces present in the system
+ * amd_num_nodes() returns number of SMN/DF interfaces present in the system
* if we have N SMN/DF interfaces that ideally means N sockets
*/
- hsmp_pdev->num_sockets = amd_nb_num();
- if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_SOCKETS)
+ hsmp_pdev->num_sockets = amd_num_nodes();
+ if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_NUM_NODES)
return ret;
ret = platform_driver_register(&amd_hsmp_driver);
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 77d75e1f14a9..5ccde3982314 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1274,7 +1274,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt),
X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht),
X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng),
- X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &rapl_defaults_ann),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2,&rapl_defaults_ann),
X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core),
X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core),
diff --git a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
index 049246774ced..6146555fe9cf 100644
--- a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
+++ b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
@@ -172,10 +172,10 @@ void atomisp_unregister_subdev(struct v4l2_subdev *subdev);
#define IS_BYT __IS_SOC(INTEL_ATOM_SILVERMONT)
#define IS_CHT __IS_SOC(INTEL_ATOM_AIRMONT)
#define IS_MRFD __IS_SOC(INTEL_ATOM_SILVERMONT_MID)
-#define IS_MOFD __IS_SOC(INTEL_ATOM_AIRMONT_MID)
+#define IS_MOFD __IS_SOC(INTEL_ATOM_SILVERMONT_MID2)
/* Both CHT and MOFD come with ISP2401 */
#define IS_ISP2401 __IS_SOCS(INTEL_ATOM_AIRMONT, \
- INTEL_ATOM_AIRMONT_MID)
+ INTEL_ATOM_SILVERMONT_MID2)
#endif /* ATOMISP_PLATFORM_H_ */
diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
index 817421508d5c..b2a615aea7c1 100644
--- a/drivers/thermal/intel/intel_tcc.c
+++ b/drivers/thermal/intel/intel_tcc.c
@@ -106,7 +106,7 @@ static const struct x86_cpu_id intel_tcc_cpu_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D, &temp_broadwell),
X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &temp_broadwell),
X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &temp_broadwell),
- X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &temp_broadwell),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2, &temp_broadwell),
X86_MATCH_VFM(INTEL_ATOM_AIRMONT_NP, &temp_broadwell),
X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &temp_goldmont),
X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &temp_goldmont),
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 70fbc9a3e703..cf3fb61f4d5b 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -23,6 +23,7 @@
#include <linux/cleanup.h>
#include <linux/uuid.h>
#include <linux/configfs.h>
+#include <linux/mm.h>
#include <uapi/linux/sev-guest.h>
#include <uapi/linux/psp-sev.h>
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index a17e97e634a6..d0eccbd920e5 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -280,6 +280,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct acpi_processor_cx *cx,
struct acpi_power_register *reg);
void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate);
+void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx);
#else
static inline void acpi_processor_power_init_bm_check(struct
acpi_processor_flags
@@ -300,6 +301,10 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
{
return;
}
+static inline void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx)
+{
+ BUG();
+}
#endif
static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg,
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index c768de6f19a9..0755bc39b0d8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -39,7 +39,7 @@ extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char __start_ro_after_init[], __end_ro_after_init[];
extern char _end[];
-extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
+extern char __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __entry_text_start[], __entry_text_end[];
extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0d5b186abee8..4925441bc471 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
. = ALIGN(PAGE_SIZE); \
__nosave_end = .;
+#define CACHE_HOT_DATA(align) \
+ . = ALIGN(align); \
+ *(SORT_BY_ALIGNMENT(.data..hot.*)) \
+ . = ALIGN(align);
+
#define PAGE_ALIGNED_DATA(page_align) \
. = ALIGN(page_align); \
*(.data..page_aligned) \
@@ -1062,10 +1067,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#define PERCPU_INPUT(cacheline) \
__per_cpu_start = .; \
- *(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
+ __per_cpu_hot_start = .; \
+ *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \
+ __per_cpu_hot_end = .; \
+ . = ALIGN(cacheline); \
*(.data..percpu..read_mostly) \
. = ALIGN(cacheline); \
*(.data..percpu) \
@@ -1074,52 +1082,17 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
__per_cpu_end = .;
/**
- * PERCPU_VADDR - define output section for percpu area
+ * PERCPU_SECTION - define output section for percpu area
* @cacheline: cacheline size
- * @vaddr: explicit base address (optional)
- * @phdr: destination PHDR (optional)
*
* Macro which expands to output section for percpu area.
*
* @cacheline is used to align subsections to avoid false cacheline
* sharing between subsections for different purposes.
- *
- * If @vaddr is not blank, it specifies explicit base address and all
- * percpu symbols will be offset from the given address. If blank,
- * @vaddr always equals @laddr + LOAD_OFFSET.
- *
- * @phdr defines the output PHDR to use if not blank. Be warned that
- * output PHDR is sticky. If @phdr is specified, the next output
- * section in the linker script will go there too. @phdr should have
- * a leading colon.
- *
- * Note that this macros defines __per_cpu_load as an absolute symbol.
- * If there is no need to put the percpu section at a predetermined
- * address, use PERCPU_SECTION.
- */
-#define PERCPU_VADDR(cacheline, vaddr, phdr) \
- __per_cpu_load = .; \
- .data..percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \
- PERCPU_INPUT(cacheline) \
- } phdr \
- . = __per_cpu_load + SIZEOF(.data..percpu);
-
-/**
- * PERCPU_SECTION - define output section for percpu area, simple version
- * @cacheline: cacheline size
- *
- * Align to PAGE_SIZE and outputs output section for percpu area. This
- * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and
- * __per_cpu_start will be identical.
- *
- * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,)
- * except that __per_cpu_load is defined as a relative symbol against
- * .data..percpu which is required for relocatable x86_32 configuration.
*/
#define PERCPU_SECTION(cacheline) \
. = ALIGN(PAGE_SIZE); \
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
- __per_cpu_load = .; \
PERCPU_INPUT(cacheline) \
}
@@ -1148,6 +1121,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
INIT_TASK_DATA(inittask) \
NOSAVE_DATA \
PAGE_ALIGNED_DATA(pagealigned) \
+ CACHE_HOT_DATA(cacheline) \
CACHELINE_ALIGNED_DATA(cacheline) \
READ_MOSTLY_DATA(cacheline) \
DATA_DATA \
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4e495b29c640..a70e62d69dc7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -330,7 +330,6 @@ static inline bool acpi_sci_irq_valid(void)
}
extern int sbf_port;
-extern unsigned long acpi_realmode_flags;
int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index f0df518e11dd..1db17ecbb86c 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -11,6 +11,8 @@
#include <linux/module.h>
#include <asm/cfi.h>
+extern bool cfi_warn;
+
#ifndef cfi_get_offset
static inline int cfi_get_offset(void)
{
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 6db889c9efcc..9fc30b6b80c9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -228,6 +228,16 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#endif /* __KERNEL__ */
+#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+/*
+ * Force a reference to the external symbol so the compiler generates
+ * __kcfi_typid.
+ */
+#define KCFI_REFERENCE(sym) __ADDRESSABLE(sym)
+#else
+#define KCFI_REFERENCE(sym)
+#endif
+
/**
* offset_to_ptr - convert a relative memory offset to an absolute pointer
* @off: the address of the 32-bit offset value
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 64130ae19690..65655a5d1be2 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -65,6 +65,37 @@ enum execmem_range_flags {
* Architectures that use EXECMEM_ROX_CACHE must implement this.
*/
void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
+
+/**
+ * execmem_make_temp_rw - temporarily remap region with read-write
+ * permissions
+ * @ptr: address of the region to remap
+ * @size: size of the region to remap
+ *
+ * Remaps a part of the cached large page in the ROX cache in the range
+ * [@ptr, @ptr + @size) as writable and not executable. The caller must
+ * have exclusive ownership of this range and ensure nothing will try to
+ * execute code in this range.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int execmem_make_temp_rw(void *ptr, size_t size);
+
+/**
+ * execmem_restore_rox - restore read-only-execute permissions
+ * @ptr: address of the region to remap
+ * @size: size of the region to remap
+ *
+ * Restores read-only-execute permissions on a range [@ptr, @ptr + @size)
+ * after it was temporarily remapped as writable. Relies on architecture
+ * implementation of set_memory_rox() to restore mapping using large pages.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int execmem_restore_rox(void *ptr, size_t size);
+#else
+static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
+static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
#endif
/**
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index d67614f7b7f1..bd7e60c0b72f 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -692,6 +692,7 @@ struct x86_cpu_id {
__u16 feature; /* bit index */
/* Solely for kernel-internal use: DO NOT EXPORT to userspace! */
__u16 flags;
+ __u8 type;
kernel_ulong_t driver_data;
};
@@ -703,6 +704,7 @@ struct x86_cpu_id {
#define X86_STEP_MIN 0
#define X86_STEP_MAX 0xf
#define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */
+#define X86_CPU_TYPE_ANY 0
/*
* Generic table type for matching CPU features.
diff --git a/include/linux/module.h b/include/linux/module.h
index 30e5b19bafa9..9937e71a3b5b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -370,7 +370,6 @@ enum mod_mem_type {
struct module_memory {
void *base;
- void *rw_copy;
bool is_rox;
unsigned int size;
@@ -772,16 +771,6 @@ static inline bool is_livepatch_module(struct module *mod)
void set_module_sig_enforced(void);
-void *__module_writable_address(struct module *mod, void *loc);
-
-static inline void *module_writable_address(struct module *mod, void *loc)
-{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod ||
- mod->state != MODULE_STATE_UNFORMED)
- return loc;
- return __module_writable_address(mod, loc);
-}
-
#else /* !CONFIG_MODULES... */
static inline struct module *__module_address(unsigned long addr)
@@ -889,11 +878,6 @@ static inline bool module_is_coming(struct module *mod)
{
return false;
}
-
-static inline void *module_writable_address(struct module *mod, void *loc)
-{
- return loc;
-}
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 1f5507ba5a12..e395461d59e5 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod);
-int module_post_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *mod);
-
#ifdef CONFIG_MODULES
void flush_module_init_free_work(void);
#else
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index c722a921165b..3ca965a2ddc8 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -128,7 +128,7 @@
#define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t"
#define STACK_FRAME_NON_STANDARD(func)
#define STACK_FRAME_NON_STANDARD_FP(func)
-#define __ASM_ANNOTATE(label, type)
+#define __ASM_ANNOTATE(label, type) ""
#define ASM_ANNOTATE(type)
#else
.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0
@@ -147,6 +147,8 @@
* these relocations will never be used for indirect calls.
*/
#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR)
+#define ANNOTATE_NOENDBR_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR))
+
/*
* This should be used immediately before an indirect jump/call. It tells
* objtool the subsequent indirect jump/call is vouched safe for retpoline
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 5b520fe86b60..0fcacb909778 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -26,13 +26,11 @@
#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned"
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
#endif
-#define PER_CPU_FIRST_SECTION "..first"
#else
#define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
-#define PER_CPU_FIRST_SECTION ""
#endif
@@ -115,14 +113,17 @@
DEFINE_PER_CPU_SECTION(type, name, "")
/*
- * Declaration/definition used for per-CPU variables that must come first in
- * the set of variables.
+ * Declaration/definition used for per-CPU variables that are frequently
+ * accessed and should be in a single cacheline.
+ *
+ * For use only by architecture and core code. Only use scalar or pointer
+ * types to maximize density.
*/
-#define DECLARE_PER_CPU_FIRST(type, name) \
- DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
+#define DECLARE_PER_CPU_CACHE_HOT(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name)
-#define DEFINE_PER_CPU_FIRST(type, name) \
- DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
+#define DEFINE_PER_CPU_CACHE_HOT(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name)
/*
* Declaration/definition used for per-CPU variables that must be cacheline
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 3e9808f2b549..b0af8d4ef6e6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -319,6 +319,7 @@ do { \
#ifdef CONFIG_PREEMPT_NOTIFIERS
struct preempt_notifier;
+struct task_struct;
/**
* preempt_ops - notifiers called when a task is preempted and rescheduled
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index c3a00b967d18..49039494076f 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -23,17 +23,25 @@
#define SZ_4K 0x00001000
#define SZ_8K 0x00002000
#define SZ_16K 0x00004000
+#define SZ_24K 0x00006000
#define SZ_32K 0x00008000
#define SZ_64K 0x00010000
#define SZ_128K 0x00020000
+#define SZ_192K 0x00030000
#define SZ_256K 0x00040000
+#define SZ_384K 0x00060000
#define SZ_512K 0x00080000
#define SZ_1M 0x00100000
#define SZ_2M 0x00200000
+#define SZ_3M 0x00300000
#define SZ_4M 0x00400000
+#define SZ_6M 0x00600000
#define SZ_8M 0x00800000
+#define SZ_12M 0x00c00000
#define SZ_16M 0x01000000
+#define SZ_18M 0x01200000
+#define SZ_24M 0x01800000
#define SZ_32M 0x02000000
#define SZ_64M 0x04000000
#define SZ_128M 0x08000000
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index f70d0958095c..5a37cb2b6f93 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_X86
DIRECT_MAP_LEVEL2_SPLIT,
DIRECT_MAP_LEVEL3_SPLIT,
+ DIRECT_MAP_LEVEL2_COLLAPSE,
+ DIRECT_MAP_LEVEL3_COLLAPSE,
#endif
#ifdef CONFIG_PER_VMA_LOCK_STATS
VMA_LOCK_SUCCESS,
diff --git a/init/Kconfig b/init/Kconfig
index aeea36d11dda..681f38ee68db 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1880,11 +1880,6 @@ config KALLSYMS_ALL
Say N unless you really need all symbols, or kernel live patching.
-config KALLSYMS_ABSOLUTE_PERCPU
- bool
- depends on KALLSYMS
- default X86_64 && SMP
-
# end of the "standard kernel features (expert users)" menu
config ARCH_HAS_MEMBARRIER_CALLBACKS
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 60611df77957..6e604caa870c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21702,12 +21702,12 @@ patch_map_ops_generic:
if (insn->imm == BPF_FUNC_get_smp_processor_id &&
verifier_inlines_helper_call(env, insn->imm)) {
/* BPF_FUNC_get_smp_processor_id inlining is an
- * optimization, so if pcpu_hot.cpu_number is ever
+ * optimization, so if cpu_number is ever
* changed in some incompatible and hard to support
* way, it's fine to back out this inlining logic
*/
#ifdef CONFIG_SMP
- insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number);
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
cnt = 3;
diff --git a/kernel/cfi.c b/kernel/cfi.c
index 08caad776717..19be79639542 100644
--- a/kernel/cfi.c
+++ b/kernel/cfi.c
@@ -7,6 +7,8 @@
#include <linux/cfi.h>
+bool cfi_warn __ro_after_init = IS_ENABLED(CONFIG_CFI_PERMISSIVE);
+
enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
unsigned long *target, u32 type)
{
@@ -17,7 +19,7 @@ enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
pr_err("CFI failure at %pS (no target information)\n",
(void *)addr);
- if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) {
+ if (cfi_warn) {
__warn(NULL, 0, (void *)addr, 0, regs, NULL);
return BUG_TRAP_TYPE_WARN;
}
diff --git a/kernel/iomem.c b/kernel/iomem.c
index dc2120776e1c..75e61c1c6bc0 100644
--- a/kernel/iomem.c
+++ b/kernel/iomem.c
@@ -6,7 +6,8 @@
#include <linux/ioremap.h>
#ifndef arch_memremap_wb
-static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
+static void *arch_memremap_wb(resource_size_t offset, unsigned long size,
+ unsigned long flags)
{
#ifdef ioremap_cache
return (__force void *)ioremap_cache(offset, size);
@@ -91,7 +92,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
if (is_ram == REGION_INTERSECTS)
addr = try_ram_remap(offset, size, flags);
if (!addr)
- addr = arch_memremap_wb(offset, size);
+ addr = arch_memremap_wb(offset, size, flags);
}
/*
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index a9a0ca605d4a..4198f30aac3c 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -148,16 +148,8 @@ static unsigned int get_symbol_offset(unsigned long pos)
unsigned long kallsyms_sym_address(int idx)
{
- /* values are unsigned offsets if --absolute-percpu is not in effect */
- if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU))
- return kallsyms_relative_base + (u32)kallsyms_offsets[idx];
-
- /* ...otherwise, positive offsets are absolute values */
- if (kallsyms_offsets[idx] >= 0)
- return kallsyms_offsets[idx];
-
- /* ...and negative offsets are relative to kallsyms_relative_base - 1 */
- return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
+ /* values are unsigned offsets */
+ return kallsyms_relative_base + (u32)kallsyms_offsets[idx];
}
static unsigned int get_symbol_seq(int index)
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 1fb9ad289a6f..a256cc919ad7 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1221,18 +1221,6 @@ void __weak module_arch_freeing_init(struct module *mod)
{
}
-void *__module_writable_address(struct module *mod, void *loc)
-{
- for_class_mod_mem_type(type, text) {
- struct module_memory *mem = &mod->mem[type];
-
- if (loc >= mem->base && loc < mem->base + mem->size)
- return loc + (mem->rw_copy - mem->base);
- }
-
- return loc;
-}
-
static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
{
unsigned int size = PAGE_ALIGN(mod->mem[type].size);
@@ -1250,21 +1238,15 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
if (!ptr)
return -ENOMEM;
- mod->mem[type].base = ptr;
-
if (execmem_is_rox(execmem_type)) {
- ptr = vzalloc(size);
+ int err = execmem_make_temp_rw(ptr, size);
- if (!ptr) {
- execmem_free(mod->mem[type].base);
+ if (err) {
+ execmem_free(ptr);
return -ENOMEM;
}
- mod->mem[type].rw_copy = ptr;
mod->mem[type].is_rox = true;
- } else {
- mod->mem[type].rw_copy = mod->mem[type].base;
- memset(mod->mem[type].base, 0, size);
}
/*
@@ -1278,18 +1260,29 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
* *do* eventually get freed, but let's just keep things simple
* and avoid *any* false positives.
*/
- kmemleak_not_leak(ptr);
+ if (!mod->mem[type].is_rox)
+ kmemleak_not_leak(ptr);
+
+ memset(ptr, 0, size);
+ mod->mem[type].base = ptr;
return 0;
}
+static void module_memory_restore_rox(struct module *mod)
+{
+ for_class_mod_mem_type(type, text) {
+ struct module_memory *mem = &mod->mem[type];
+
+ if (mem->is_rox)
+ execmem_restore_rox(mem->base, mem->size);
+ }
+}
+
static void module_memory_free(struct module *mod, enum mod_mem_type type)
{
struct module_memory *mem = &mod->mem[type];
- if (mem->is_rox)
- vfree(mem->rw_copy);
-
execmem_free(mem->base);
}
@@ -2642,7 +2635,6 @@ static int move_module(struct module *mod, struct load_info *info)
for_each_mod_mem_type(type) {
if (!mod->mem[type].size) {
mod->mem[type].base = NULL;
- mod->mem[type].rw_copy = NULL;
continue;
}
@@ -2659,7 +2651,6 @@ static int move_module(struct module *mod, struct load_info *info)
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
const char *sname;
- unsigned long addr;
if (!(shdr->sh_flags & SHF_ALLOC))
continue;
@@ -2680,14 +2671,12 @@ static int move_module(struct module *mod, struct load_info *info)
ret = PTR_ERR(dest);
goto out_err;
}
- addr = (unsigned long)dest;
codetag_section_found = true;
} else {
enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT;
unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK;
- addr = (unsigned long)mod->mem[type].base + offset;
- dest = mod->mem[type].rw_copy + offset;
+ dest = mod->mem[type].base + offset;
}
if (shdr->sh_type != SHT_NOBITS) {
@@ -2710,13 +2699,14 @@ static int move_module(struct module *mod, struct load_info *info)
* users of info can keep taking advantage and using the newly
* minted official memory area.
*/
- shdr->sh_addr = addr;
+ shdr->sh_addr = (unsigned long)dest;
pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr,
(long)shdr->sh_size, info->secstrings + shdr->sh_name);
}
return 0;
out_err:
+ module_memory_restore_rox(mod);
for (t--; t >= 0; t--)
module_memory_free(mod, t);
if (codetag_section_found)
@@ -2863,17 +2853,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr,
return 0;
}
-int __weak module_post_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
-{
- return 0;
-}
-
static int post_relocation(struct module *mod, const struct load_info *info)
{
- int ret;
-
/* Sort exception table now relocations are done. */
sort_extable(mod->extable, mod->extable + mod->num_exentries);
@@ -2885,24 +2866,7 @@ static int post_relocation(struct module *mod, const struct load_info *info)
add_kallsyms(mod, info);
/* Arch-specific module finalizing. */
- ret = module_finalize(info->hdr, info->sechdrs, mod);
- if (ret)
- return ret;
-
- for_each_mod_mem_type(type) {
- struct module_memory *mem = &mod->mem[type];
-
- if (mem->is_rox) {
- if (!execmem_update_copy(mem->base, mem->rw_copy,
- mem->size))
- return -ENOMEM;
-
- vfree(mem->rw_copy);
- mem->rw_copy = NULL;
- }
- }
-
- return module_post_finalize(info->hdr, info->sechdrs, mod);
+ return module_finalize(info->hdr, info->sechdrs, mod);
}
/* Call module constructors. */
@@ -3499,6 +3463,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
mod->mem[type].size);
}
+ module_memory_restore_rox(mod);
module_deallocate(mod, info);
free_copy:
/*
diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c
index 74834ba15615..03f4142cfbf4 100644
--- a/kernel/module/strict_rwx.c
+++ b/kernel/module/strict_rwx.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
+#include <linux/execmem.h>
#include "internal.h"
static int module_set_memory(const struct module *mod, enum mod_mem_type type,
@@ -32,12 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type,
int module_enable_text_rox(const struct module *mod)
{
for_class_mod_mem_type(type, text) {
+ const struct module_memory *mem = &mod->mem[type];
int ret;
- if (mod->mem[type].is_rox)
- continue;
-
- if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ if (mem->is_rox)
+ ret = execmem_restore_rox(mem->base, mem->size);
+ else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
ret = module_set_memory(mod, type, set_memory_rox);
else
ret = module_set_memory(mod, type, set_memory_x);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4484cdb504c7..4ebe6136b08d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1824,16 +1824,6 @@ static const struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
- {
- .procname = "unknown_nmi_panic",
- .data = &unknown_nmi_panic,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-
#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
defined(CONFIG_DEBUG_STACKOVERFLOW)
{
@@ -1844,43 +1834,6 @@ static const struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#if defined(CONFIG_X86)
- {
- .procname = "panic_on_unrecovered_nmi",
- .data = &panic_on_unrecovered_nmi,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "panic_on_io_nmi",
- .data = &panic_on_io_nmi,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "bootloader_type",
- .data = &bootloader_type,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "bootloader_version",
- .data = &bootloader_version,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "io_delay_type",
- .data = &io_delay_type,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#if defined(CONFIG_MMU)
{
.procname = "randomize_va_space",
@@ -1899,15 +1852,6 @@ static const struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
- {
- .procname = "acpi_video_flags",
- .data = &acpi_realmode_flags,
- .maxlen = sizeof (unsigned long),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
-#endif
#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
{
.procname = "ignore-unaligned-usertrap",
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index adc947587eb8..997fb2a47c92 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1038,27 +1038,14 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
.arg1_type = ARG_PTR_TO_CTX,
};
-#ifdef CONFIG_X86_KERNEL_IBT
-static unsigned long get_entry_ip(unsigned long fentry_ip)
+static inline unsigned long get_entry_ip(unsigned long fentry_ip)
{
- u32 instr;
-
- /* We want to be extra safe in case entry ip is on the page edge,
- * but otherwise we need to avoid get_kernel_nofault()'s overhead.
- */
- if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
- if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
- return fentry_ip;
- } else {
- instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
- }
- if (is_endbr(instr))
+#ifdef CONFIG_X86_KERNEL_IBT
+ if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
fentry_ip -= ENDBR_INSN_SIZE;
+#endif
return fentry_ip;
}
-#else
-#define get_entry_ip(fentry_ip) fentry_ip
-#endif
BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
{
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 759ea1783cc5..d726068358c7 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -254,7 +254,7 @@ static __init int test_atomics_init(void)
pr_info("passed for %s platform %s CX8 and %s SSE\n",
#ifdef CONFIG_X86_64
"x86-64",
-#elif defined(CONFIG_X86_CMPXCHG64)
+#elif defined(CONFIG_X86_CX8)
"i586+",
#else
"i386+",
diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h
index 0e3b2c0a527d..0dde8bf56595 100644
--- a/lib/zstd/common/portability_macros.h
+++ b/lib/zstd/common/portability_macros.h
@@ -55,7 +55,7 @@
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
- && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+ && (__GNUC__ >= 11))) \
&& (defined(__x86_64__) || defined(_M_X64)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
diff --git a/mm/execmem.c b/mm/execmem.c
index 317b6a8d35be..e6c4f5076ca8 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -257,7 +257,6 @@ out_unlock:
static int execmem_cache_populate(struct execmem_range *range, size_t size)
{
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
- unsigned long start, end;
struct vm_struct *vm;
size_t alloc_size;
int err = -ENOMEM;
@@ -275,26 +274,18 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
/* fill memory with instructions that will trap */
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
- start = (unsigned long)p;
- end = start + alloc_size;
-
- vunmap_range(start, end);
-
- err = execmem_set_direct_map_valid(vm, false);
- if (err)
- goto err_free_mem;
-
- err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages,
- PMD_SHIFT);
+ err = set_memory_rox((unsigned long)p, vm->nr_pages);
if (err)
goto err_free_mem;
err = execmem_cache_add(p, alloc_size);
if (err)
- goto err_free_mem;
+ goto err_reset_direct_map;
return 0;
+err_reset_direct_map:
+ execmem_set_direct_map_valid(vm, true);
err_free_mem:
vfree(p);
return err;
@@ -344,6 +335,28 @@ static bool execmem_cache_free(void *ptr)
return true;
}
+
+int execmem_make_temp_rw(void *ptr, size_t size)
+{
+ unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long addr = (unsigned long)ptr;
+ int ret;
+
+ ret = set_memory_nx(addr, nr);
+ if (ret)
+ return ret;
+
+ return set_memory_rw(addr, nr);
+}
+
+int execmem_restore_rox(void *ptr, size_t size)
+{
+ unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long addr = (unsigned long)ptr;
+
+ return set_memory_rox(addr, nr);
+}
+
#else /* CONFIG_ARCH_HAS_EXECMEM_ROX */
static void *execmem_cache_alloc(struct execmem_range *range, size_t size)
{
diff --git a/mm/percpu.c b/mm/percpu.c
index ac61e3fc5f15..7b5835356d1e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3071,7 +3071,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
continue;
}
/* copy and return the unused part */
- memcpy(ptr, __per_cpu_load, ai->static_size);
+ memcpy(ptr, __per_cpu_start, ai->static_size);
pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
}
}
@@ -3240,7 +3240,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t
flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size);
/* copy static data */
- memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
+ memcpy((void *)unit_addr, __per_cpu_start, ai->static_size);
}
/* we're ready, commit */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 16bfe1c694dd..88998725f1c5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1435,6 +1435,8 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_X86
"direct_map_level2_splits",
"direct_map_level3_splits",
+ "direct_map_level2_collapses",
+ "direct_map_level3_collapses",
#endif
#ifdef CONFIG_PER_VMA_LOCK_STATS
"vma_lock_success",
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
deleted file mode 100755
index 9459ca4f0f11..000000000000
--- a/scripts/gcc-x86_32-has-stack-protector.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# This requires GCC 8.1 or better. Specifically, we require
-# -mstack-protector-guard-reg, added by
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708
-
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs"
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
deleted file mode 100755
index f680bb01aeeb..000000000000
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py
index 13eb8b3901b8..8f7c4fb78c2c 100644
--- a/scripts/gdb/linux/cpus.py
+++ b/scripts/gdb/linux/cpus.py
@@ -164,7 +164,7 @@ def get_current_task(cpu):
var_ptr = gdb.parse_and_eval("(struct task_struct *)cpu_tasks[0].task")
return var_ptr.dereference()
else:
- var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task")
+ var_ptr = gdb.parse_and_eval("&current_task")
return per_cpu(var_ptr, cpu).dereference()
elif utils.is_target_arch("aarch64"):
current_task_addr = gdb.parse_and_eval("(unsigned long)$SP_EL0")
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 03852da3d249..4b0234e4b12f 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -5,7 +5,7 @@
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
*
- * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S
+ * Usage: kallsyms [--all-symbols] in.map > out.S
*
* Table compression uses all the unused char codes on the symbols and
* maps these to the most used substrings (tokens). For instance, it might
@@ -37,7 +37,6 @@ struct sym_entry {
unsigned long long addr;
unsigned int len;
unsigned int seq;
- bool percpu_absolute;
unsigned char sym[];
};
@@ -55,14 +54,9 @@ static struct addr_range text_ranges[] = {
#define text_range_text (&text_ranges[0])
#define text_range_inittext (&text_ranges[1])
-static struct addr_range percpu_range = {
- "__per_cpu_start", "__per_cpu_end", -1ULL, 0
-};
-
static struct sym_entry **table;
static unsigned int table_size, table_cnt;
static int all_symbols;
-static int absolute_percpu;
static int token_profit[0x10000];
@@ -73,7 +67,7 @@ static unsigned char best_table_len[256];
static void usage(void)
{
- fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n");
+ fprintf(stderr, "Usage: kallsyms [--all-symbols] in.map > out.S\n");
exit(1);
}
@@ -164,7 +158,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
return NULL;
check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
- check_symbol_range(name, addr, &percpu_range, 1);
/* include the type field in the symbol name, so that it gets
* compressed together */
@@ -175,7 +168,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
sym->len = len;
sym->sym[0] = type;
strcpy(sym_name(sym), name);
- sym->percpu_absolute = false;
return sym;
}
@@ -319,11 +311,6 @@ static int expand_symbol(const unsigned char *data, int len, char *result)
return total;
}
-static bool symbol_absolute(const struct sym_entry *s)
-{
- return s->percpu_absolute;
-}
-
static int compare_names(const void *a, const void *b)
{
int ret;
@@ -455,22 +442,11 @@ static void write_src(void)
*/
long long offset;
- bool overflow;
-
- if (!absolute_percpu) {
- offset = table[i]->addr - relative_base;
- overflow = offset < 0 || offset > UINT_MAX;
- } else if (symbol_absolute(table[i])) {
- offset = table[i]->addr;
- overflow = offset < 0 || offset > INT_MAX;
- } else {
- offset = relative_base - table[i]->addr - 1;
- overflow = offset < INT_MIN || offset >= 0;
- }
- if (overflow) {
+
+ offset = table[i]->addr - relative_base;
+ if (offset < 0 || offset > UINT_MAX) {
fprintf(stderr, "kallsyms failure: "
- "%s symbol value %#llx out of range in relative mode\n",
- symbol_absolute(table[i]) ? "absolute" : "relative",
+ "relative symbol value %#llx out of range\n",
table[i]->addr);
exit(EXIT_FAILURE);
}
@@ -725,36 +701,15 @@ static void sort_symbols(void)
qsort(table, table_cnt, sizeof(table[0]), compare_symbols);
}
-static void make_percpus_absolute(void)
-{
- unsigned int i;
-
- for (i = 0; i < table_cnt; i++)
- if (symbol_in_range(table[i], &percpu_range, 1)) {
- /*
- * Keep the 'A' override for percpu symbols to
- * ensure consistent behavior compared to older
- * versions of this tool.
- */
- table[i]->sym[0] = 'A';
- table[i]->percpu_absolute = true;
- }
-}
-
/* find the minimum non-absolute symbol address */
static void record_relative_base(void)
{
- unsigned int i;
-
- for (i = 0; i < table_cnt; i++)
- if (!symbol_absolute(table[i])) {
- /*
- * The table is sorted by address.
- * Take the first non-absolute symbol value.
- */
- relative_base = table[i]->addr;
- return;
- }
+ /*
+ * The table is sorted by address.
+ * Take the first symbol value.
+ */
+ if (table_cnt)
+ relative_base = table[0]->addr;
}
int main(int argc, char **argv)
@@ -762,7 +717,6 @@ int main(int argc, char **argv)
while (1) {
static const struct option long_options[] = {
{"all-symbols", no_argument, &all_symbols, 1},
- {"absolute-percpu", no_argument, &absolute_percpu, 1},
{},
};
@@ -779,8 +733,6 @@ int main(int argc, char **argv)
read_map(argv[optind]);
shrink_table();
- if (absolute_percpu)
- make_percpus_absolute();
sort_symbols();
record_relative_base();
optimize_token_table();
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 56a077d204cf..67e66333bd2a 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -144,10 +144,6 @@ kallsyms()
kallsymopt="${kallsymopt} --all-symbols"
fi
- if is_enabled CONFIG_KALLSYMS_ABSOLUTE_PERCPU; then
- kallsymopt="${kallsymopt} --absolute-percpu"
- fi
-
info KSYMS "${2}.S"
scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S"
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 91c91201212c..787868183b84 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -19,12 +19,14 @@ binutils)
gcc)
if [ "$ARCH" = parisc64 ]; then
echo 12.0.0
+ elif [ "$SRCARCH" = x86 ]; then
+ echo 8.1.0
else
echo 5.1.0
fi
;;
llvm)
- if [ "$SRCARCH" = s390 ]; then
+ if [ "$SRCARCH" = s390 -o "$SRCARCH" = x86 ]; then
echo 15.0.0
elif [ "$SRCARCH" = loongarch ]; then
echo 18.0.0
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index 3ad3da9a7d97..dbe39b44256b 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
@@ -123,7 +123,7 @@
#ifdef __KERNEL__
/* Exception table entry */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \
.balign 4 ; \
@@ -154,7 +154,7 @@
# define _ASM_NOKPROBE(entry)
# endif
-#else /* ! __ASSEMBLY__ */
+#else /* ! __ASSEMBLER__ */
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
" .pushsection \"__ex_table\",\"a\"\n" \
@@ -186,7 +186,7 @@
*/
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 17b6590748c0..c691481d59ce 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -2,14 +2,6 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
/*
* Defines x86 CPU feature bits
*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index c492bdc97b05..000000000000
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef. Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_UMIP
-# define DISABLE_UMIP 0
-#else
-# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID 0
-#else
-# define DISABLE_VME 0
-# define DISABLE_K6_MTRR 0
-# define DISABLE_CYRIX_ARR 0
-# define DISABLE_CENTAUR_MCR 0
-# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU 0
-# define DISABLE_OSPKE 0
-#else
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57 0
-#else
-# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI 0
-#else
-# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETPOLINE
-# define DISABLE_RETPOLINE 0
-#else
-# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETHUNK
-# define DISABLE_RETHUNK 0
-#else
-# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_UNRET_ENTRY
-# define DISABLE_UNRET 0
-#else
-# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
-# define DISABLE_CALL_DEPTH_TRACKING 0
-#else
-# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
-#endif
-
-#ifdef CONFIG_ADDRESS_MASKING
-# define DISABLE_LAM 0
-#else
-# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
-#endif
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
-
-#ifdef CONFIG_X86_SGX
-# define DISABLE_SGX 0
-#else
-# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
-#endif
-
-#ifdef CONFIG_XEN_PV
-# define DISABLE_XENPV 0
-#else
-# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31))
-#endif
-
-#ifdef CONFIG_INTEL_TDX_GUEST
-# define DISABLE_TDX_GUEST 0
-#else
-# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
-#endif
-
-#ifdef CONFIG_X86_USER_SHADOW_STACK
-#define DISABLE_USER_SHSTK 0
-#else
-#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
-#endif
-
-#ifdef CONFIG_X86_KERNEL_IBT
-#define DISABLE_IBT 0
-#else
-#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
-#endif
-
-#ifdef CONFIG_X86_FRED
-# define DISABLE_FRED 0
-#else
-# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
-#endif
-
-#ifdef CONFIG_KVM_AMD_SEV
-#define DISABLE_SEV_SNP 0
-#else
-#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0 (DISABLE_VME)
-#define DISABLED_MASK1 0
-#define DISABLED_MASK2 0
-#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 (DISABLE_PCID)
-#define DISABLED_MASK5 0
-#define DISABLED_MASK6 0
-#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
-#define DISABLED_MASK9 (DISABLE_SGX)
-#define DISABLED_MASK10 0
-#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13 0
-#define DISABLED_MASK14 0
-#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
- DISABLE_ENQCMD)
-#define DISABLED_MASK17 0
-#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 (DISABLE_SEV_SNP)
-#define DISABLED_MASK20 0
-#define DISABLED_MASK21 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3ae84c3b8e6d..dc1c1057f26e 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
@@ -34,6 +35,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_TCE (1<<_EFER_TCE)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/*
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index 1c1b7550fa55..cd94221d8335 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -82,7 +82,7 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 46d7e06763c9..e0125afa53fb 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -45,7 +45,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/byteorder.h>
/*
@@ -73,6 +73,6 @@ struct orc_entry {
#endif
} __packed;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/include/asm/pvclock-abi.h b/tools/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/tools/arch/x86/include/asm/pvclock-abi.h
+++ b/tools/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PVCLOCK_ABI_H
#define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
deleted file mode 100644
index e9187ddd3d1f..000000000000
--- a/tools/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
- really early to actually display a visible error message before the
- kernel dies. Make sure to assign features to the proper mask!
-
- Some requirements that are not in CPUID yet are also in the
- CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
- The real information is in arch/x86/Kconfig.cpu, this just converts
- the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU 0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE 0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8 0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV 0
-#endif
-
-# define NEED_3DNOW 0
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL 0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE 0
-#define NEED_PGE 0
-#else
-#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM (1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE 0
-#define NEED_MSR 0
-#define NEED_PGE 0
-#define NEED_FXSR 0
-#define NEED_XMM 0
-#define NEED_XMM2 0
-#define NEED_LM 0
-#endif
-
-#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
- NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
- NEED_XMM|NEED_XMM2)
-#define SSE_MASK (NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2 0
-#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 (NEED_MOVBE)
-#define REQUIRED_MASK5 0
-#define REQUIRED_MASK6 0
-#define REQUIRED_MASK7 0
-#define REQUIRED_MASK8 0
-#define REQUIRED_MASK9 0
-#define REQUIRED_MASK10 0
-#define REQUIRED_MASK11 0
-#define REQUIRED_MASK12 0
-#define REQUIRED_MASK13 0
-#define REQUIRED_MASK14 0
-#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
-#define REQUIRED_MASK17 0
-#define REQUIRED_MASK18 0
-#define REQUIRED_MASK19 0
-#define REQUIRED_MASK20 0
-#define REQUIRED_MASK21 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index fb9691a34d92..7567c893f45e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -850,6 +850,7 @@ bool arch_is_rethunk(struct symbol *sym)
bool arch_is_embedded_insn(struct symbol *sym)
{
return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_alias_safe_ret") ||
!strcmp(sym->name, "srso_safe_ret");
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 48d7bc5b4736..ca3435acc326 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1284,15 +1284,6 @@ static void annotate_call_site(struct objtool_file *file,
if (!sym)
sym = reloc->sym;
- /*
- * Alternative replacement code is just template code which is
- * sometimes copied to the original instruction. For now, don't
- * annotate it. (In the future we might consider annotating the
- * original instruction if/when it ever makes sense to do so.)
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- return;
-
if (sym->static_call_tramp) {
list_add_tail(&insn->call_node, &file->static_call_list);
return;
@@ -1350,7 +1341,8 @@ static void annotate_call_site(struct objtool_file *file,
return;
}
- if (insn->type == INSN_CALL && !insn->sec->init)
+ if (insn->type == INSN_CALL && !insn->sec->init &&
+ !insn->_call_dest->embedded_insn)
list_add_tail(&insn->call_node, &file->call_list);
if (!sibling && dead_end_function(file, sym))
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 6bb7edda3094..eacfe3b0a8d1 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -16,6 +16,7 @@ NORETURN(__tdx_hypercall_failed)
NORETURN(__ubsan_handle_builtin_unreachable)
NORETURN(__x64_sys_exit)
NORETURN(__x64_sys_exit_group)
+NORETURN(acpi_processor_ffh_play_dead)
NORETURN(arch_cpu_idle_dead)
NORETURN(bch2_trans_in_restart_error)
NORETURN(bch2_trans_restart_error)
@@ -34,6 +35,7 @@ NORETURN(kunit_try_catch_throw)
NORETURN(machine_real_restart)
NORETURN(make_task_dead)
NORETURN(mpt_halt_firmware)
+NORETURN(mwait_play_dead)
NORETURN(nmi_panic_self_stop)
NORETURN(panic)
NORETURN(panic_smp_self_stop)
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index d3c6e10dce73..a4499e5a6f9c 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -26,8 +26,6 @@ FILES=(
"include/linux/hash.h"
"include/linux/list-sort.h"
"include/uapi/linux/hw_breakpoint.h"
- "arch/x86/include/asm/disabled-features.h"
- "arch/x86/include/asm/required-features.h"
"arch/x86/include/asm/cpufeatures.h"
"arch/x86/include/asm/inat_types.h"
"arch/x86/include/asm/emulate_prefix.h"
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8d5011a0bf60..26057af6b5a1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1056,7 +1056,7 @@ static const struct platform_data turbostat_pdata[] = {
* Missing support for
* INTEL_ICELAKE
* INTEL_ATOM_SILVERMONT_MID
- * INTEL_ATOM_AIRMONT_MID
+ * INTEL_ATOM_SILVERMONT_MID2
* INTEL_ATOM_AIRMONT_NP
*/
{ 0, NULL },
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 4d4a76532dc9..18d736640ece 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h>
+#include <sys/ioctl.h>
#include <time.h>
#include <signal.h>
#include <setjmp.h>
@@ -43,7 +44,15 @@
#define FUNC_INHERITE 0x20
#define FUNC_PASID 0x40
+/* get_user() pointer test cases */
+#define GET_USER_USER 0
+#define GET_USER_KERNEL_TOP 1
+#define GET_USER_KERNEL_BOT 2
+#define GET_USER_KERNEL 3
+
#define TEST_MASK 0x7f
+#define L5_SIGN_EXT_MASK (0xFFUL << 56)
+#define L4_SIGN_EXT_MASK (0x1FFFFUL << 47)
#define LOW_ADDR (0x1UL << 30)
#define HIGH_ADDR (0x3UL << 48)
@@ -115,23 +124,42 @@ static void segv_handler(int sig)
siglongjmp(segv_env, 1);
}
-static inline int cpu_has_lam(void)
+static inline int lam_is_available(void)
{
unsigned int cpuinfo[4];
+ unsigned long bits = 0;
+ int ret;
__cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
- return (cpuinfo[0] & (1 << 26));
+ /* Check if cpu supports LAM */
+ if (!(cpuinfo[0] & (1 << 26))) {
+ ksft_print_msg("LAM is not supported!\n");
+ return 0;
+ }
+
+ /* Return 0 if CONFIG_ADDRESS_MASKING is not set */
+ ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
+ if (ret) {
+ ksft_print_msg("LAM is disabled in the kernel!\n");
+ return 0;
+ }
+
+ return 1;
}
-/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
-static inline int cpu_has_la57(void)
+static inline int la57_enabled(void)
{
- unsigned int cpuinfo[4];
+ int ret;
+ void *p;
+
+ p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+ ret = p == MAP_FAILED ? 0 : 1;
- return (cpuinfo[2] & (1 << 16));
+ munmap(p, PAGE_SIZE);
+ return ret;
}
/*
@@ -322,7 +350,7 @@ static int handle_mmap(struct testcases *test)
flags, -1, 0);
if (ptr == MAP_FAILED) {
if (test->addr == HIGH_ADDR)
- if (!cpu_has_la57())
+ if (!la57_enabled())
return 3; /* unsupport LA57 */
return 1;
}
@@ -370,6 +398,78 @@ static int handle_syscall(struct testcases *test)
return ret;
}
+static int get_user_syscall(struct testcases *test)
+{
+ uint64_t ptr_address, bitmask;
+ int fd, ret = 0;
+ void *ptr;
+
+ if (la57_enabled()) {
+ bitmask = L5_SIGN_EXT_MASK;
+ ptr_address = HIGH_ADDR;
+ } else {
+ bitmask = L4_SIGN_EXT_MASK;
+ ptr_address = LOW_ADDR;
+ }
+
+ ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+
+ if (ptr == MAP_FAILED) {
+ perror("failed to map byte to pass into get_user");
+ return 1;
+ }
+
+ if (set_lam(test->lam) != 0) {
+ ret = 2;
+ goto error;
+ }
+
+ fd = memfd_create("lam_ioctl", 0);
+ if (fd == -1) {
+ munmap(ptr, PAGE_SIZE);
+ exit(EXIT_FAILURE);
+ }
+
+ switch (test->later) {
+ case GET_USER_USER:
+ /* Control group - properly tagged user pointer */
+ ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
+ break;
+ case GET_USER_KERNEL_TOP:
+ /* Kernel address with top bit cleared */
+ bitmask &= (bitmask >> 1);
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ case GET_USER_KERNEL_BOT:
+ /* Kernel address with bottom sign-extension bit cleared */
+ bitmask &= (bitmask << 1);
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ case GET_USER_KERNEL:
+ /* Try to pass a kernel address */
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ default:
+ printf("Invalid test case value passed!\n");
+ break;
+ }
+
+ /*
+ * Use FIOASYNC ioctl because it utilizes get_user() internally and is
+ * very non-invasive to the system. Pass differently tagged pointers to
+ * get_user() in order to verify that valid user pointers are going
+ * through and invalid kernel/non-canonical pointers are not.
+ */
+ if (ioctl(fd, FIOASYNC, ptr) != 0)
+ ret = 1;
+
+ close(fd);
+error:
+ munmap(ptr, PAGE_SIZE);
+ return ret;
+}
+
int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
{
return (int)syscall(__NR_io_uring_setup, entries, p);
@@ -596,8 +696,10 @@ int do_uring(unsigned long lam)
fi->file_fd = file_fd;
ring = malloc(sizeof(*ring));
- if (!ring)
+ if (!ring) {
+ free(fi);
return 1;
+ }
memset(ring, 0, sizeof(struct io_ring));
@@ -883,6 +985,33 @@ static struct testcases syscall_cases[] = {
.test_func = handle_syscall,
.msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
},
+ {
+ .later = GET_USER_USER,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
+ },
+ {
+ .later = GET_USER_KERNEL_TOP,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
+ },
+ {
+ .later = GET_USER_KERNEL_BOT,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
+ },
+ {
+ .later = GET_USER_KERNEL,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
+ },
};
static struct testcases mmap_cases[] = {
@@ -1181,10 +1310,8 @@ int main(int argc, char **argv)
tests_cnt = 0;
- if (!cpu_has_lam()) {
- ksft_print_msg("Unsupported LAM feature!\n");
+ if (!lam_is_available())
return KSFT_SKIP;
- }
while ((c = getopt(argc, argv, "ht:")) != -1) {
switch (c) {