diff options
346 files changed, 5002 insertions, 4671 deletions
diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst index 0585d02b9a6c..ad15417d39f9 100644 --- a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -29,14 +29,6 @@ Below is the list of affected Intel processors [#f1]_: RAPTORLAKE_S 06_BFH =================== ============ -As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and -RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as -vulnerable in Linux because they share the same family/model with an affected -part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or -CPUID.HYBRID. This information could be used to distinguish between the -affected and unaffected parts, but it is deemed not worth adding complexity as -the reporting is fixed automatically when these parts enumerate RFDS_NO. - Mitigation ========== Intel released a microcode update that enables software to clear sensitive diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 5376890adbeb..1f7f14c6e184 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -180,10 +180,6 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64) 1) On i386, enable high memory support under "Processor type and features":: - CONFIG_HIGHMEM64G=y - - or:: - CONFIG_HIGHMEM4G 2) With CONFIG_SMP=y, usually nr_cpus=1 need specified on the kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 60f98de36f82..866427da6add 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -416,10 +416,6 @@ Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. - For X86-32, this can also be used to specify an APIC - driver name. - Format: apic=driver_name - Examples: apic=bigsmp apic_extnmi= [APIC,X86,EARLY] External NMI delivery setting Format: { bsp (default) | all | none } @@ -7679,13 +7675,6 @@ 16 - SIGBUS faults Example: user_debug=31 - userpte= - [X86,EARLY] Flags controlling user PTE allocations. - - nohigh = do not allocate PTE pages in - HIGHMEM regardless of setting - of CONFIG_HIGHPTE. - vdso= [X86,SH,SPARC] On X86_32, this is an alias for vdso32=. Otherwise: diff --git a/Documentation/arch/x86/usb-legacy-support.rst b/Documentation/arch/x86/usb-legacy-support.rst index e01c08b7c981..b17bf122270a 100644 --- a/Documentation/arch/x86/usb-legacy-support.rst +++ b/Documentation/arch/x86/usb-legacy-support.rst @@ -20,11 +20,7 @@ It has several drawbacks, though: features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may not be available. -2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause - system crashes, because the SMM BIOS is not expecting to be in PAE mode. - The Intel E7505 is a typical machine where this happens. - -3) If AMD64 64-bit mode is enabled, again system crashes often happen, +2) If AMD64 64-bit mode is enabled, again system crashes often happen, because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit yet. @@ -38,11 +34,6 @@ Problem 1) compiled-in, too. Problem 2) - can currently only be solved by either disabling HIGHMEM64G - in the kernel config or USB Legacy support in the BIOS. A BIOS update - could help, but so far no such update exists. - -Problem 3) is usually fixed by a BIOS update. Check the board manufacturers web site. If an update is not available, disable USB Legacy support in the BIOS. If this alone doesn't help, try also adding @@ -1014,6 +1014,9 @@ CC_FLAGS_CFI := -fsanitize=kcfi ifdef CONFIG_CFI_ICALL_NORMALIZE_INTEGERS CC_FLAGS_CFI += -fsanitize-cfi-icall-experimental-normalize-integers endif +ifdef CONFIG_FINEIBT_BHI + CC_FLAGS_CFI += -fsanitize-kcfi-arity +endif ifdef CONFIG_RUST # Always pass -Zsanitizer-cfi-normalize-integers as CONFIG_RUST selects # CONFIG_CFI_ICALL_NORMALIZE_INTEGERS. diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1815748f5d2a..bae5edf348ef 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -381,7 +381,7 @@ void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); void iounmap(volatile void __iomem *io_addr); #define iounmap iounmap -void *arch_memremap_wb(phys_addr_t phys_addr, size_t size); +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags); #define arch_memremap_wb arch_memremap_wb /* diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 89f1c97f3079..748698e91a4b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -436,7 +436,7 @@ void __arm_iomem_set_ro(void __iomem *ptr, size_t size) set_memory_ro((unsigned long)ptr, PAGE_ALIGN(size) / PAGE_SIZE); } -void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags) { return (__force void *)arch_ioremap_caller(phys_addr, size, MT_MEMORY_RW, diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 1a8f6914ee59..d638cc87807e 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -248,7 +248,7 @@ void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size) EXPORT_SYMBOL_GPL(pci_remap_cfgspace); #endif -void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags) { return (void *)phys_addr; } diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 1c5c641075d2..0257f4aa7ff4 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -136,7 +136,7 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #include <asm-generic/io.h> #ifdef CONFIG_MMU -#define arch_memremap_wb(addr, size) \ +#define arch_memremap_wb(addr, size, flags) \ ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) #endif diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 79ea97d4797e..8be91974e786 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -440,25 +440,24 @@ void __init arch_cpu_finalize_init(void) os_check_bugs(); } -void apply_seal_endbr(s32 *start, s32 *end, struct module *mod) +void apply_seal_endbr(s32 *start, s32 *end) { } -void apply_retpolines(s32 *start, s32 *end, struct module *mod) +void apply_retpolines(s32 *start, s32 *end) { } -void apply_returns(s32 *start, s32 *end, struct module *mod) +void apply_returns(s32 *start, s32 *end) { } void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi) { } -void apply_alternatives(struct alt_instr *start, struct alt_instr *end, - struct module *mod) +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0e27ebd7e36a..98bd4935280c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_EXECMEM_ROX if X86_64 select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -132,7 +133,7 @@ config X86 select ARCH_SUPPORTS_AUTOFDO_CLANG select ARCH_SUPPORTS_PROPELLER_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF if X86_CMPXCHG64 + select ARCH_USE_CMPXCHG_LOCKREF if X86_CX8 select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS @@ -232,7 +233,7 @@ config X86 select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS - select HAVE_EISA + select HAVE_EISA if X86_32 select HAVE_EXIT_THREAD select HAVE_GUP_FAST select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE @@ -277,7 +278,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API @@ -285,7 +286,7 @@ config X86 select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_SETUP_PER_CPU_AREA select HAVE_SOFTIRQ_ON_OWN_STACK - select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR + select HAVE_STACKPROTECTOR select HAVE_STACK_VALIDATION if HAVE_OBJTOOL select HAVE_STATIC_CALL select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL @@ -426,15 +427,6 @@ config PGTABLE_LEVELS default 3 if X86_PAE default 2 -config CC_HAS_SANE_STACKPROTECTOR - bool - default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT - default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) - help - We have to make sure stack protector is unconditionally disabled if - the compiler produces broken code or if it does not let us control - the segment on 32-bit kernels. - menu "Processor type and features" config SMP @@ -530,12 +522,6 @@ config X86_FRED ring transitions and exception/interrupt handling if the system supports it. -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" - depends on SMP && X86_32 - help - This option is needed for the systems that have more than 8 CPUs. - config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" default y @@ -553,13 +539,12 @@ config X86_EXTENDED_PLATFORM AMD Elan RDC R-321x SoC SGI 320/540 (Visual Workstation) - STA2X11-based (e.g. Northville) - Moorestown MID devices 64-bit platforms (CONFIG_64BIT=y): Numascale NumaChip ScaleMP vSMP SGI Ultraviolet + Merrifield/Moorefield MID devices If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. @@ -604,8 +589,31 @@ config X86_UV This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. -# Following is an alphabetically sorted list of 32 bit extended platforms -# Please maintain the alphabetic order if and when there are additions +config X86_INTEL_MID + bool "Intel Z34xx/Z35xx MID platform support" + depends on X86_EXTENDED_PLATFORM + depends on X86_PLATFORM_DEVICES + depends on PCI + depends on X86_64 || (EXPERT && PCI_GOANY) + depends on X86_IO_APIC + select I2C + select DW_APB_TIMER + select INTEL_SCU_PCI + help + Select to build a kernel capable of supporting 64-bit Intel MID + (Mobile Internet Device) platform systems which do not have + the PCI legacy interfaces. + + The only supported devices are the 22nm Merrified (Z34xx) + and Moorefield (Z35xx) SoC used in the Intel Edison board and + a small number of Android devices such as the Asus Zenfone 2, + Asus FonePad 8 and Dell Venue 7. + + If you are building for a PC class system or non-MID tablet + SoCs like Bay Trail (Z36xx/Z37xx), say N here. + + Intel MID platforms are based on an Intel processor and chipset which + consume less power than most of the x86 derivatives. config X86_GOLDFISH bool "Goldfish (Virtual Platform)" @@ -615,6 +623,9 @@ config X86_GOLDFISH for Android development. Unless you are building for the Android Goldfish emulator say N here. +# Following is an alphabetically sorted list of 32 bit extended platforms +# Please maintain the alphabetic order if and when there are additions + config X86_INTEL_CE bool "CE4100 TV platform" depends on PCI @@ -630,24 +641,6 @@ config X86_INTEL_CE This option compiles in support for the CE4100 SOC for settop boxes and media devices. -config X86_INTEL_MID - bool "Intel MID platform support" - depends on X86_EXTENDED_PLATFORM - depends on X86_PLATFORM_DEVICES - depends on PCI - depends on X86_64 || (PCI_GOANY && X86_32) - depends on X86_IO_APIC - select I2C - select DW_APB_TIMER - select INTEL_SCU_PCI - help - Select to build a kernel capable of supporting Intel MID (Mobile - Internet Device) platform systems which do not have the PCI legacy - interfaces. If you are building for a PC class system say N here. - - Intel MID platforms are based on an Intel processor and chipset which - consume less power than most of the x86 derivatives. - config X86_INTEL_QUARK bool "Intel Quark platform support" depends on X86_32 @@ -729,18 +722,6 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. -config X86_32_NON_STANDARD - bool "Support non-standard 32-bit SMP architectures" - depends on X86_32 && SMP - depends on X86_EXTENDED_PLATFORM - help - This option compiles in the bigsmp and STA2X11 default - subarchitectures. It is intended for a generic binary - kernel. If you select them all, kernel will probe it one by - one and will fallback to default. - -# Alphabetically sorted list of Non standard 32 bit platforms - config X86_SUPPORTS_MEMORY_FAILURE def_bool y # MCE code calls memory_failure(): @@ -750,19 +731,6 @@ config X86_SUPPORTS_MEMORY_FAILURE depends on X86_64 || !SPARSEMEM select ARCH_SUPPORTS_MEMORY_FAILURE -config STA2X11 - bool "STA2X11 Companion Chip Support" - depends on X86_32_NON_STANDARD && PCI - select SWIOTLB - select MFD_STA2X11 - select GPIOLIB - help - This adds support for boards based on the STA2X11 IO-Hub, - a.k.a. "ConneXt". The chip is used in place of the standard - PC chipset, so all "standard" peripherals are missing. If this - option is selected the kernel will still be able to boot on - standard PC machines. - config X86_32_IRIS tristate "Eurobraille/Iris poweroff module" depends on X86_32 @@ -1012,8 +980,7 @@ config NR_CPUS_RANGE_BEGIN config NR_CPUS_RANGE_END int depends on X86_32 - default 64 if SMP && X86_BIGSMP - default 8 if SMP && !X86_BIGSMP + default 8 if SMP default 1 if !SMP config NR_CPUS_RANGE_END @@ -1026,7 +993,6 @@ config NR_CPUS_RANGE_END config NR_CPUS_DEFAULT int depends on X86_32 - default 32 if X86_BIGSMP default 8 if SMP default 1 if !SMP @@ -1102,7 +1068,7 @@ config UP_LATE_INIT config X86_UP_APIC bool "Local APIC support on uniprocessors" if !PCI_MSI default PCI_MSI - depends on X86_32 && !SMP && !X86_32_NON_STANDARD + depends on X86_32 && !SMP help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -1127,7 +1093,7 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI + depends on X86_64 || SMP || X86_UP_APIC || PCI_MSI select IRQ_DOMAIN_HIERARCHY config ACPI_MADT_WAKEUP @@ -1396,15 +1362,11 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -choice - prompt "High Memory Support" - default HIGHMEM4G +config HIGHMEM4G + bool "High Memory Support" depends on X86_32 - -config NOHIGHMEM - bool "off" help - Linux can use up to 64 Gigabytes of physical memory on x86 systems. + Linux can use up to 4 Gigabytes of physical memory on x86 systems. However, the address space of 32-bit x86 processors is only 4 Gigabytes large. That means that, if you have a large amount of physical memory, not all of it can be "permanently mapped" by the @@ -1420,38 +1382,9 @@ config NOHIGHMEM possible. If the machine has between 1 and 4 Gigabytes physical RAM, then - answer "4GB" here. - - If more than 4 Gigabytes is used then answer "64GB" here. This - selection turns Intel PAE (Physical Address Extension) mode on. - PAE implements 3-level paging on IA32 processors. PAE is fully - supported by Linux, PAE mode is implemented on all recent Intel - processors (Pentium Pro and better). NOTE: If you say "64GB" here, - then the kernel will not boot on CPUs that don't support PAE! - - The actual amount of total physical memory will either be - auto detected or can be forced by using a kernel command line option - such as "mem=256M". (Try "man bootparam" or see the documentation of - your boot loader (lilo or loadlin) about how to pass options to the - kernel at boot time.) - - If unsure, say "off". - -config HIGHMEM4G - bool "4GB" - help - Select this if you have a 32-bit processor and between 1 and 4 - gigabytes of physical RAM. - -config HIGHMEM64G - bool "64GB" - depends on X86_HAVE_PAE - select X86_PAE - help - Select this if you have a 32-bit processor and more than 4 - gigabytes of physical RAM. + answer "Y" here. -endchoice + If unsure, say N. choice prompt "Memory split" if EXPERT @@ -1497,14 +1430,12 @@ config PAGE_OFFSET depends on X86_32 config HIGHMEM - def_bool y - depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) + def_bool HIGHMEM4G config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && X86_HAVE_PAE select PHYS_ADDR_T_64BIT - select SWIOTLB help PAE is required for NX support, and furthermore enables larger swapspace support for non-overcommit purposes. It @@ -1574,8 +1505,7 @@ config AMD_MEM_ENCRYPT config NUMA bool "NUMA Memory Allocation and Scheduler Support" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) - default y if X86_BIGSMP + depends on X86_64 select USE_PERCPU_NUMA_NODE_ID select OF_NUMA if OF help @@ -1588,9 +1518,6 @@ config NUMA For 64-bit this is recommended if the system is Intel Core i7 (or later), AMD Opteron, or EM64T NUMA. - For 32-bit this is only needed if you boot a 32-bit - kernel on a 64-bit NUMA platform. - Otherwise, you should say N. config AMD_NUMA @@ -1629,7 +1556,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD + depends on X86_64 || NUMA || X86_32 select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 @@ -1675,15 +1602,6 @@ config X86_PMEM_LEGACY Say Y if unsure. -config HIGHPTE - bool "Allocate 3rd-level pagetables from highmem" - depends on HIGHMEM - help - The VM uses one page table entry for each page of physical memory. - For systems with a lot of RAM, this can be wasteful of precious - low memory. Setting this option will put user-space page table - entries in high memory. - config X86_CHECK_BIOS_CORRUPTION bool "Check for low memory corruption" help @@ -2451,18 +2369,20 @@ config CC_HAS_NAMED_AS def_bool $(success,echo 'int __seg_fs fs; int __seg_gs gs;' | $(CC) -x c - -S -o /dev/null) depends on CC_IS_GCC +# +# -fsanitize=kernel-address (KASAN) and -fsanitize=thread (KCSAN) +# are incompatible with named address spaces with GCC < 13.3 +# (see GCC PR sanitizer/111736 and also PR sanitizer/115172). +# + config CC_HAS_NAMED_AS_FIXED_SANITIZERS - def_bool CC_IS_GCC && GCC_VERSION >= 130300 + def_bool y + depends on !(KASAN || KCSAN) || GCC_VERSION >= 130300 + depends on !(UBSAN_BOOL && KASAN) || GCC_VERSION >= 140200 config USE_X86_SEG_SUPPORT - def_bool y - depends on CC_HAS_NAMED_AS - # - # -fsanitize=kernel-address (KASAN) and -fsanitize=thread - # (KCSAN) are incompatible with named address spaces with - # GCC < 13.3 - see GCC PR sanitizer/111736. - # - depends on !(KASAN || KCSAN) || CC_HAS_NAMED_AS_FIXED_SANITIZERS + def_bool CC_HAS_NAMED_AS + depends on CC_HAS_NAMED_AS_FIXED_SANITIZERS config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) @@ -2473,6 +2393,10 @@ config CC_HAS_RETURN_THUNK config CC_HAS_ENTRY_PADDING def_bool $(cc-option,-fpatchable-function-entry=16,16) +config CC_HAS_KCFI_ARITY + def_bool $(cc-option,-fsanitize=kcfi -fsanitize-kcfi-arity) + depends on CC_IS_CLANG && !RUST + config FUNCTION_PADDING_CFI int default 59 if FUNCTION_ALIGNMENT_64B @@ -2498,6 +2422,10 @@ config FINEIBT depends on X86_KERNEL_IBT && CFI_CLANG && MITIGATION_RETPOLINE select CALL_PADDING +config FINEIBT_BHI + def_bool y + depends on FINEIBT && CC_HAS_KCFI_ARITY + config HAVE_CALL_THUNKS def_bool y depends on CC_HAS_ENTRY_PADDING && MITIGATION_RETHUNK && OBJTOOL @@ -3202,4 +3130,6 @@ config HAVE_ATOMIC_IOMAP source "arch/x86/kvm/Kconfig" +source "arch/x86/Kconfig.cpufeatures" + source "arch/x86/Kconfig.assembler" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2a7279d80460..753b8763abae 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Put here option for CPU selection and depending optimization choice - prompt "Processor family" - default M686 if X86_32 - default GENERIC_CPU if X86_64 + prompt "x86-32 Processor family" + depends on X86_32 + default M686 help This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel @@ -31,7 +31,6 @@ choice - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). - - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs. - "Crusoe" for the Transmeta Crusoe series. - "Efficeon" for the Transmeta Efficeon series. - "Winchip-C6" for original IDT Winchip. @@ -42,13 +41,10 @@ choice - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). - "VIA C7" for VIA C7. - - "Intel P4" for the Pentium 4/Netburst microarchitecture. - - "Core 2/newer Xeon" for all core2 and newer Intel CPUs. - "Intel Atom" for the Atom-microarchitecture CPUs. - - "Generic-x86-64" for a kernel which runs on any x86-64 CPU. See each option's help text for additional details. If you don't know - what to do, choose "486". + what to do, choose "Pentium-Pro". config M486SX bool "486SX" @@ -114,11 +110,11 @@ config MPENTIUMIII extensions. config MPENTIUMM - bool "Pentium M" + bool "Pentium M/Pentium Dual Core/Core Solo/Core Duo" depends on X86_32 help Select this for Intel Pentium M (not Pentium-4 M) - notebook chips. + "Merom" Core Solo/Duo notebook chips config MPENTIUM4 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" @@ -139,22 +135,10 @@ config MPENTIUM4 -Mobile Pentium 4 -Mobile Pentium 4 M -Extreme Edition (Gallatin) - -Prescott - -Prescott 2M - -Cedar Mill - -Presler - -Smithfiled Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename: -Foster -Prestonia -Gallatin - -Nocona - -Irwindale - -Cranford - -Potomac - -Paxville - -Dempsey - config MK6 bool "K6/K6-II/K6-III" @@ -172,13 +156,6 @@ config MK7 some extended instructions, and passes appropriate optimization flags to GCC. -config MK8 - bool "Opteron/Athlon64/Hammer/K8" - help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. - Enables use of some extended instructions, and passes appropriate - optimization flags to GCC. - config MCRUSOE bool "Crusoe" depends on X86_32 @@ -258,42 +235,14 @@ config MVIAC7 Select this for a VIA C7. Selecting this uses the correct cache shift and tells gcc to treat the CPU as a 686. -config MPSC - bool "Intel P4 / older Netburst based Xeon" - depends on X86_64 - help - Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey - Xeon CPUs with Intel 64bit which is compatible with x86-64. - Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the - Netburst core and shouldn't use this option. You can distinguish them - using the cpu family field - in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. - -config MCORE2 - bool "Core 2/newer Xeon" - help - - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and - 53xx) CPUs. You can distinguish newer from older Xeons by the CPU - family in /proc/cpuinfo. Newer ones have 6 and older ones 15 - (not a typo) - config MATOM bool "Intel Atom" help - Select this for the Intel Atom platform. Intel Atom CPUs have an in-order pipelining architecture and thus can benefit from accordingly optimized code. Use a recent GCC with specific Atom support in order to fully benefit from selecting this option. -config GENERIC_CPU - bool "Generic-x86-64" - depends on X86_64 - help - Generic x86-64 CPU. - Run equally well on all x86-64 CPUs. - endchoice config X86_GENERIC @@ -317,8 +266,8 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "7" if MPENTIUM4 + default "6" if MK7 || MPENTIUMM || MATOM || MVIAC7 || X86_GENERIC || X86_64 default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,51 +285,35 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK7 || MEFFICEON config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM - -# -# P6_NOPs are a relatively minor optimization that require a family >= -# 6 processor, except that it is broken on certain VIA chips. -# Furthermore, AMD chips prefer a totally different sequence of NOPs -# (which work on all CPUs). In addition, it looks like Virtual PC -# does not understand them. -# -# As a result, disallow these if we're not compiling for X86_64 (these -# NOPs do work on all x86-64 capable chips); the list of processors in -# the right-hand clause are the cores that benefit from this optimization. -# -config X86_P6_NOP - def_bool y - depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MATOM config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MATOM) || X86_64 config X86_HAVE_PAE def_bool y - depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 + depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC7 || MATOM || X86_64 -config X86_CMPXCHG64 +config X86_CX8 def_bool y - depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 + depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 || MGEODEGX1 || MGEODE_LX # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || MATOM || MGEODE_LX || X86_64) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) - default "5" if X86_32 && X86_CMPXCHG64 + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MK7) + default "5" if X86_32 && X86_CX8 default "4" config X86_DEBUGCTLMSR @@ -401,6 +334,10 @@ menuconfig PROCESSOR_SELECT This lets you choose what x86 vendor support code your kernel will include. +config BROADCAST_TLB_FLUSH + def_bool y + depends on CPU_SUP_AMD && 64BIT + config CPU_SUP_INTEL default y bool "Support Intel processors" if PROCESSOR_SELECT diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures new file mode 100644 index 000000000000..e12d5b7e39a2 --- /dev/null +++ b/arch/x86/Kconfig.cpufeatures @@ -0,0 +1,201 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# x86 feature bits (see arch/x86/include/asm/cpufeatures.h) that are +# either REQUIRED to be enabled, or DISABLED (always ignored) for this +# particular compile-time configuration. The tests for these features +# are turned into compile-time constants via the generated +# <asm/cpufeaturemasks.h>. +# +# The naming of these variables *must* match asm/cpufeatures.h, e.g., +# X86_FEATURE_ALWAYS <==> X86_REQUIRED_FEATURE_ALWAYS +# X86_FEATURE_FRED <==> X86_DISABLED_FEATURE_FRED +# +# And these REQUIRED and DISABLED config options are manipulated in an +# AWK script as the following example: +# +# +----------------------+ +# | X86_FRED = y ? | +# +----------------------+ +# / \ +# Y / \ N +# +-------------------------------------+ +-------------------------------+ +# | X86_DISABLED_FEATURE_FRED undefined | | X86_DISABLED_FEATURE_FRED = y | +# +-------------------------------------+ +-------------------------------+ +# | +# | +# +-------------------------------------------+ | +# | X86_FEATURE_FRED: feature word 12, bit 17 | ---->| +# +-------------------------------------------+ | +# | +# | +# +-------------------------------+ +# | set bit 17 of DISABLED_MASK12 | +# +-------------------------------+ +# + +config X86_REQUIRED_FEATURE_ALWAYS + def_bool y + +config X86_REQUIRED_FEATURE_NOPL + def_bool y + depends on X86_64 || X86_P6_NOP + +config X86_REQUIRED_FEATURE_CX8 + def_bool y + depends on X86_CX8 + +# this should be set for all -march=.. options where the compiler +# generates cmov. +config X86_REQUIRED_FEATURE_CMOV + def_bool y + depends on X86_CMOV + +# this should be set for all -march= options where the compiler +# generates movbe. +config X86_REQUIRED_FEATURE_MOVBE + def_bool y + depends on MATOM + +config X86_REQUIRED_FEATURE_CPUID + def_bool y + depends on X86_64 + +config X86_REQUIRED_FEATURE_UP + def_bool y + depends on !SMP + +config X86_REQUIRED_FEATURE_FPU + def_bool y + depends on !MATH_EMULATION + +config X86_REQUIRED_FEATURE_PAE + def_bool y + depends on X86_64 || X86_PAE + +config X86_REQUIRED_FEATURE_PSE + def_bool y + depends on X86_64 && !PARAVIRT_XXL + +config X86_REQUIRED_FEATURE_PGE + def_bool y + depends on X86_64 && !PARAVIRT_XXL + +config X86_REQUIRED_FEATURE_MSR + def_bool y + depends on X86_64 + +config X86_REQUIRED_FEATURE_FXSR + def_bool y + depends on X86_64 + +config X86_REQUIRED_FEATURE_XMM + def_bool y + depends on X86_64 + +config X86_REQUIRED_FEATURE_XMM2 + def_bool y + depends on X86_64 + +config X86_REQUIRED_FEATURE_LM + def_bool y + depends on X86_64 + +config X86_DISABLED_FEATURE_UMIP + def_bool y + depends on !X86_UMIP + +config X86_DISABLED_FEATURE_VME + def_bool y + depends on X86_64 + +config X86_DISABLED_FEATURE_K6_MTRR + def_bool y + depends on X86_64 + +config X86_DISABLED_FEATURE_CYRIX_ARR + def_bool y + depends on X86_64 + +config X86_DISABLED_FEATURE_CENTAUR_MCR + def_bool y + depends on X86_64 + +config X86_DISABLED_FEATURE_PCID + def_bool y + depends on !X86_64 + +config X86_DISABLED_FEATURE_PKU + def_bool y + depends on !X86_INTEL_MEMORY_PROTECTION_KEYS + +config X86_DISABLED_FEATURE_OSPKE + def_bool y + depends on !X86_INTEL_MEMORY_PROTECTION_KEYS + +config X86_DISABLED_FEATURE_LA57 + def_bool y + depends on !X86_5LEVEL + +config X86_DISABLED_FEATURE_PTI + def_bool y + depends on !MITIGATION_PAGE_TABLE_ISOLATION + +config X86_DISABLED_FEATURE_RETPOLINE + def_bool y + depends on !MITIGATION_RETPOLINE + +config X86_DISABLED_FEATURE_RETPOLINE_LFENCE + def_bool y + depends on !MITIGATION_RETPOLINE + +config X86_DISABLED_FEATURE_RETHUNK + def_bool y + depends on !MITIGATION_RETHUNK + +config X86_DISABLED_FEATURE_UNRET + def_bool y + depends on !MITIGATION_UNRET_ENTRY + +config X86_DISABLED_FEATURE_CALL_DEPTH + def_bool y + depends on !MITIGATION_CALL_DEPTH_TRACKING + +config X86_DISABLED_FEATURE_LAM + def_bool y + depends on !ADDRESS_MASKING + +config X86_DISABLED_FEATURE_ENQCMD + def_bool y + depends on !INTEL_IOMMU_SVM + +config X86_DISABLED_FEATURE_SGX + def_bool y + depends on !X86_SGX + +config X86_DISABLED_FEATURE_XENPV + def_bool y + depends on !XEN_PV + +config X86_DISABLED_FEATURE_TDX_GUEST + def_bool y + depends on !INTEL_TDX_GUEST + +config X86_DISABLED_FEATURE_USER_SHSTK + def_bool y + depends on !X86_USER_SHADOW_STACK + +config X86_DISABLED_FEATURE_IBT + def_bool y + depends on !X86_KERNEL_IBT + +config X86_DISABLED_FEATURE_FRED + def_bool y + depends on !X86_FRED + +config X86_DISABLED_FEATURE_SEV_SNP + def_bool y + depends on !KVM_AMD_SEV + +config X86_DISABLED_FEATURE_INVLPGB + def_bool y + depends on !BROADCAST_TLB_FLUSH diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3ba7e185924e..0fc7e8fd1a2e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -142,14 +142,7 @@ ifeq ($(CONFIG_X86_32),y) KBUILD_CFLAGS += -ffreestanding endif - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) - KBUILD_CFLAGS += -mstack-protector-guard-reg=fs \ - -mstack-protector-guard-symbol=__ref_stack_chk_guard - else - KBUILD_CFLAGS += -mstack-protector-guard=global - endif - endif + percpu_seg := fs else BITS := 64 UTS_MACHINE := x86_64 @@ -180,25 +173,24 @@ else # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) - # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) - cflags-$(CONFIG_MK8) += -march=k8 - cflags-$(CONFIG_MPSC) += -march=nocona - cflags-$(CONFIG_MCORE2) += -march=core2 - cflags-$(CONFIG_MATOM) += -march=atom - cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic - KBUILD_CFLAGS += $(cflags-y) - - rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 - rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona - rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 - rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom - rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic - KBUILD_RUSTFLAGS += $(rustflags-y) + KBUILD_CFLAGS += -march=x86-64 -mtune=generic + KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel KBUILD_RUSTFLAGS += -Cno-redzone=y KBUILD_RUSTFLAGS += -Ccode-model=kernel + + percpu_seg := gs +endif + +ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) + KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) + KBUILD_CFLAGS += -mstack-protector-guard-symbol=__ref_stack_chk_guard + else + KBUILD_CFLAGS += -mstack-protector-guard=global + endif endif # @@ -279,6 +271,21 @@ archheaders: $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all ### +# <asm/cpufeaturemasks.h> header generation + +cpufeaturemasks.hdr := arch/x86/include/generated/asm/cpufeaturemasks.h +cpufeaturemasks.awk := $(srctree)/arch/x86/tools/cpufeaturemasks.awk +cpufeatures_hdr := $(srctree)/arch/x86/include/asm/cpufeatures.h +targets += $(cpufeaturemasks.hdr) +quiet_cmd_gen_featuremasks = GEN $@ + cmd_gen_featuremasks = $(AWK) -f $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) > $@ + +$(cpufeaturemasks.hdr): $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) FORCE + $(shell mkdir -p $(dir $@)) + $(call if_changed,gen_featuremasks) +archprepare: $(cpufeaturemasks.hdr) + +### # Kernel objects libs-y += arch/x86/lib/ diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 94834c4b5e5e..af7de9a42752 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -24,7 +24,6 @@ cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += -march=athlon -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align) cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align) cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) @@ -32,9 +31,7 @@ cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align) cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 -cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) -cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) +cflags-$(CONFIG_MATOM) += -march=atom # AMD Elan support cflags-$(CONFIG_MELAN) += -march=i486 diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 0f24f7ebec9b..38f17a1e1e36 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -16,7 +16,7 @@ #define STACK_SIZE 1024 /* Minimum number of bytes for stack */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stdarg.h> #include <linux/types.h> @@ -327,6 +327,6 @@ void probe_cards(int unsafe); /* video-vesa.c */ void vesa_store_edid(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 0d37420cad02..1cdcd4aaf395 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -235,7 +235,7 @@ static void handle_relocations(void *output, unsigned long output_len, /* * Process relocations: 32 bit relocations first then 64 bit after. - * Three sets of binary relocations are added to the end of the kernel + * Two sets of binary relocations are added to the end of the kernel * before compression. Each relocation table entry is the kernel * address of the location which needs to be updated stored as a * 32-bit value which is sign extended to 64 bits. @@ -245,8 +245,6 @@ static void handle_relocations(void *output, unsigned long output_len, * kernel bits... * 0 - zero terminator for 64 bit relocations * 64 bit relocation repeated - * 0 - zero terminator for inverse 32 bit relocations - * 32 bit inverse relocation repeated * 0 - zero terminator for 32 bit relocations * 32 bit relocation repeated * @@ -263,16 +261,6 @@ static void handle_relocations(void *output, unsigned long output_len, *(uint32_t *)ptr += delta; } #ifdef CONFIG_X86_64 - while (*--reloc) { - long extended = *reloc; - extended += map; - - ptr = (unsigned long)extended; - if (ptr < min_addr || ptr > max_addr) - error("inverse 32-bit relocation outside of kernel!\n"); - - *(int32_t *)ptr -= delta; - } for (reloc--; *reloc; reloc--) { long extended = *reloc; extended += map; diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 0aae4d4ed615..f82de8de5dc6 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -22,10 +22,11 @@ # include "boot.h" #endif #include <linux/types.h> +#include <asm/cpufeaturemasks.h> #include <asm/intel-family.h> #include <asm/processor-flags.h> -#include <asm/required-features.h> #include <asm/msr-index.h> + #include "string.h" #include "msr.h" diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index d75237ba7ce9..0cabdacb2a2f 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -3,7 +3,6 @@ #include "bitops.h" #include <asm/processor-flags.h> -#include <asm/required-features.h> #include <asm/msr-index.h> #include "cpuflags.h" diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index da0ccc5de538..22d730b227e3 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -12,8 +12,6 @@ #include <stdio.h> -#include "../include/asm/required-features.h" -#include "../include/asm/disabled-features.h" #include "../include/asm/cpufeatures.h" #include "../include/asm/vmxfeatures.h" #include "../kernel/cpu/capflags.c" @@ -23,6 +21,7 @@ int main(void) int i, j; const char *str; + printf("#include <asm/cpufeaturemasks.h>\n\n"); printf("static const char x86_cap_strs[] =\n"); for (i = 0; i < NCAPINTS; i++) { diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config index 581296255b39..d5d091e03bd3 100644 --- a/arch/x86/configs/xen.config +++ b/arch/x86/configs/xen.config @@ -1,6 +1,4 @@ # global x86 required specific stuff -# On 32-bit HIGHMEM4G is not allowed -CONFIG_HIGHMEM64G=y CONFIG_64BIT=y # These enable us to allow some of the diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb153eff9331..b37881bb9f15 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -17,6 +17,7 @@ */ #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/frame.h> #define STATE1 %xmm0 @@ -1071,6 +1072,7 @@ SYM_FUNC_END(_aesni_inc) * size_t len, u8 *iv) */ SYM_FUNC_START(aesni_ctr_enc) + ANNOTATE_NOENDBR FRAME_BEGIN cmp $16, LEN jb .Lctr_enc_just_ret diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 646477a13e11..1dfef28c1266 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -16,6 +16,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/frame.h> #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -882,7 +883,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk16) jmp .Ldec_max24; SYM_FUNC_END(__camellia_dec_blk16) -SYM_FUNC_START(camellia_ecb_enc_16way) +SYM_TYPED_FUNC_START(camellia_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -907,7 +908,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way) RET; SYM_FUNC_END(camellia_ecb_enc_16way) -SYM_FUNC_START(camellia_ecb_dec_16way) +SYM_TYPED_FUNC_START(camellia_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -937,7 +938,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way) RET; SYM_FUNC_END(camellia_ecb_dec_16way) -SYM_FUNC_START(camellia_cbc_dec_16way) +SYM_TYPED_FUNC_START(camellia_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index a0eb94e53b1b..b1c9b9450555 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/frame.h> #define CAMELLIA_TABLE_BYTE_LEN 272 diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 816b6bb8bded..824cb94de6c2 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> .file "camellia-x86_64-asm_64.S" .text @@ -177,7 +178,7 @@ bswapq RAB0; \ movq RAB0, 4*2(RIO); -SYM_FUNC_START(__camellia_enc_blk) +SYM_TYPED_FUNC_START(__camellia_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -224,7 +225,7 @@ SYM_FUNC_START(__camellia_enc_blk) RET; SYM_FUNC_END(__camellia_enc_blk) -SYM_FUNC_START(camellia_dec_blk) +SYM_TYPED_FUNC_START(camellia_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -411,7 +412,7 @@ SYM_FUNC_END(camellia_dec_blk) bswapq RAB1; \ movq RAB1, 12*2(RIO); -SYM_FUNC_START(__camellia_enc_blk_2way) +SYM_TYPED_FUNC_START(__camellia_enc_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -460,7 +461,7 @@ SYM_FUNC_START(__camellia_enc_blk_2way) RET; SYM_FUNC_END(__camellia_enc_blk_2way) -SYM_FUNC_START(camellia_dec_blk_2way) +SYM_TYPED_FUNC_START(camellia_dec_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 97e283621851..84e47f7f6188 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/frame.h> #include "glue_helper-asm-avx.S" @@ -656,7 +657,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) RET; SYM_FUNC_END(__serpent_dec_blk8_avx) -SYM_FUNC_START(serpent_ecb_enc_8way_avx) +SYM_TYPED_FUNC_START(serpent_ecb_enc_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -674,7 +675,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx) RET; SYM_FUNC_END(serpent_ecb_enc_8way_avx) -SYM_FUNC_START(serpent_ecb_dec_8way_avx) +SYM_TYPED_FUNC_START(serpent_ecb_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -692,7 +693,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx) RET; SYM_FUNC_END(serpent_ecb_dec_8way_avx) -SYM_FUNC_START(serpent_cbc_dec_8way_avx) +SYM_TYPED_FUNC_START(serpent_cbc_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index d2288bf38a8a..071e90e7f0d8 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> .file "twofish-x86_64-asm-3way.S" .text @@ -220,7 +221,7 @@ rorq $32, RAB2; \ outunpack3(mov, RIO, 2, RAB, 2); -SYM_FUNC_START(__twofish_enc_blk_3way) +SYM_TYPED_FUNC_START(__twofish_enc_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -269,7 +270,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) RET; SYM_FUNC_END(__twofish_enc_blk_3way) -SYM_FUNC_START(twofish_dec_blk_3way) +SYM_TYPED_FUNC_START(twofish_dec_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 775af290cd19..e08b4ba07b93 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -8,6 +8,7 @@ .text #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/asm-offsets.h> #define a_offset 0 @@ -202,7 +203,7 @@ xor %r8d, d ## D;\ ror $1, d ## D; -SYM_FUNC_START(twofish_enc_blk) +SYM_TYPED_FUNC_START(twofish_enc_blk) pushq R1 /* %rdi contains the ctx address */ @@ -255,7 +256,7 @@ SYM_FUNC_START(twofish_enc_blk) RET SYM_FUNC_END(twofish_enc_blk) -SYM_FUNC_START(twofish_dec_blk) +SYM_TYPED_FUNC_START(twofish_dec_blk) pushq R1 /* %rdi contains the ctx address */ diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index ce1cc1622385..72cae8e0ce85 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -7,12 +7,13 @@ KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n -CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) -CFLAGS_common.o += -fno-stack-protector +CFLAGS_syscall_32.o += -fno-stack-protector +CFLAGS_syscall_64.o += -fno-stack-protector obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o -obj-y += common.o obj-y += vdso/ obj-y += vsyscall/ @@ -23,4 +24,3 @@ CFLAGS_REMOVE_entry_fred.o += -pg $(CC_FLAGS_FTRACE) obj-$(CONFIG_X86_FRED) += entry_64_fred.o entry_fred.o obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o -obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index ea81770629ee..cb0911c5dc5d 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -431,6 +431,7 @@ For 32-bit we have the following conventions - kernel is built with /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func SYM_FUNC_START(\name) + ANNOTATE_NOENDBR pushq %rbp movq %rsp, %rbp diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c deleted file mode 100644 index 14db5b85114c..000000000000 --- a/arch/x86/entry/common.c +++ /dev/null @@ -1,524 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * common.c - C code for kernel entry and exit - * Copyright (c) 2015 Andrew Lutomirski - * - * Based on asm and ptrace code by many authors. The code here originated - * in ptrace.c and signal.c. - */ - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/entry-common.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/ptrace.h> -#include <linux/export.h> -#include <linux/nospec.h> -#include <linux/syscalls.h> -#include <linux/uaccess.h> -#include <linux/init.h> - -#ifdef CONFIG_XEN_PV -#include <xen/xen-ops.h> -#include <xen/events.h> -#endif - -#include <asm/apic.h> -#include <asm/desc.h> -#include <asm/traps.h> -#include <asm/vdso.h> -#include <asm/cpufeature.h> -#include <asm/fpu/api.h> -#include <asm/nospec-branch.h> -#include <asm/io_bitmap.h> -#include <asm/syscall.h> -#include <asm/irq_stack.h> - -#ifdef CONFIG_X86_64 - -static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) -{ - /* - * Convert negative numbers to very high and thus out of range - * numbers for comparisons. - */ - unsigned int unr = nr; - - if (likely(unr < NR_syscalls)) { - unr = array_index_nospec(unr, NR_syscalls); - regs->ax = x64_sys_call(regs, unr); - return true; - } - return false; -} - -static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) -{ - /* - * Adjust the starting offset of the table, and convert numbers - * < __X32_SYSCALL_BIT to very high and thus out of range - * numbers for comparisons. - */ - unsigned int xnr = nr - __X32_SYSCALL_BIT; - - if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { - xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call(regs, xnr); - return true; - } - return false; -} - -/* Returns true to return using SYSRET, or false to use IRET */ -__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) -{ - add_random_kstack_offset(); - nr = syscall_enter_from_user_mode(regs, nr); - - instrumentation_begin(); - - if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { - /* Invalid system call, but still a system call. */ - regs->ax = __x64_sys_ni_syscall(regs); - } - - instrumentation_end(); - syscall_exit_to_user_mode(regs); - - /* - * Check that the register state is valid for using SYSRET to exit - * to userspace. Otherwise use the slower but fully capable IRET - * exit path. - */ - - /* XEN PV guests always use the IRET path */ - if (cpu_feature_enabled(X86_FEATURE_XENPV)) - return false; - - /* SYSRET requires RCX == RIP and R11 == EFLAGS */ - if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags)) - return false; - - /* CS and SS must match the values set in MSR_STAR */ - if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS)) - return false; - - /* - * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP - * in kernel space. This essentially lets the user take over - * the kernel, since userspace controls RSP. - * - * TASK_SIZE_MAX covers all user-accessible addresses other than - * the deprecated vsyscall page. - */ - if (unlikely(regs->ip >= TASK_SIZE_MAX)) - return false; - - /* - * SYSRET cannot restore RF. It can restore TF, but unlike IRET, - * restoring TF results in a trap from userspace immediately after - * SYSRET. - */ - if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF))) - return false; - - /* Use SYSRET to exit to userspace */ - return true; -} -#endif - -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) -static __always_inline int syscall_32_enter(struct pt_regs *regs) -{ - if (IS_ENABLED(CONFIG_IA32_EMULATION)) - current_thread_info()->status |= TS_COMPAT; - - return (int)regs->orig_ax; -} - -#ifdef CONFIG_IA32_EMULATION -bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED); - -static int ia32_emulation_override_cmdline(char *arg) -{ - return kstrtobool(arg, &__ia32_enabled); -} -early_param("ia32_emulation", ia32_emulation_override_cmdline); -#endif - -/* - * Invoke a 32-bit syscall. Called with IRQs on in CT_STATE_KERNEL. - */ -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) -{ - /* - * Convert negative numbers to very high and thus out of range - * numbers for comparisons. - */ - unsigned int unr = nr; - - if (likely(unr < IA32_NR_syscalls)) { - unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call(regs, unr); - } else if (nr != -1) { - regs->ax = __ia32_sys_ni_syscall(regs); - } -} - -#ifdef CONFIG_IA32_EMULATION -static __always_inline bool int80_is_external(void) -{ - const unsigned int offs = (0x80 / 32) * 0x10; - const u32 bit = BIT(0x80 % 32); - - /* The local APIC on XENPV guests is fake */ - if (cpu_feature_enabled(X86_FEATURE_XENPV)) - return false; - - /* - * If vector 0x80 is set in the APIC ISR then this is an external - * interrupt. Either from broken hardware or injected by a VMM. - * - * Note: In guest mode this is only valid for secure guests where - * the secure module fully controls the vAPIC exposed to the guest. - */ - return apic_read(APIC_ISR + offs) & bit; -} - -/** - * do_int80_emulation - 32-bit legacy syscall C entry from asm - * @regs: syscall arguments in struct pt_args on the stack. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * The arguments for the INT $0x80 based syscall are on stack in the - * pt_regs structure: - * eax: system call number - * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 - */ -__visible noinstr void do_int80_emulation(struct pt_regs *regs) -{ - int nr; - - /* Kernel does not use INT $0x80! */ - if (unlikely(!user_mode(regs))) { - irqentry_enter(regs); - instrumentation_begin(); - panic("Unexpected external interrupt 0x80\n"); - } - - /* - * Establish kernel context for instrumentation, including for - * int80_is_external() below which calls into the APIC driver. - * Identical for soft and external interrupts. - */ - enter_from_user_mode(regs); - - instrumentation_begin(); - add_random_kstack_offset(); - - /* Validate that this is a soft interrupt to the extent possible */ - if (unlikely(int80_is_external())) - panic("Unexpected external interrupt 0x80\n"); - - /* - * The low level idtentry code pushed -1 into regs::orig_ax - * and regs::ax contains the syscall number. - * - * User tracing code (ptrace or signal handlers) might assume - * that the regs::orig_ax contains a 32-bit number on invoking - * a 32-bit syscall. - * - * Establish the syscall convention by saving the 32bit truncated - * syscall number in regs::orig_ax and by invalidating regs::ax. - */ - regs->orig_ax = regs->ax & GENMASK(31, 0); - regs->ax = -ENOSYS; - - nr = syscall_32_enter(regs); - - local_irq_enable(); - nr = syscall_enter_from_user_mode_work(regs, nr); - do_syscall_32_irqs_on(regs, nr); - - instrumentation_end(); - syscall_exit_to_user_mode(regs); -} - -#ifdef CONFIG_X86_FRED -/* - * A FRED-specific INT80 handler is warranted for the follwing reasons: - * - * 1) As INT instructions and hardware interrupts are separate event - * types, FRED does not preclude the use of vector 0x80 for external - * interrupts. As a result, the FRED setup code does not reserve - * vector 0x80 and calling int80_is_external() is not merely - * suboptimal but actively incorrect: it could cause a system call - * to be incorrectly ignored. - * - * 2) It is called only for handling vector 0x80 of event type - * EVENT_TYPE_SWINT and will never be called to handle any external - * interrupt (event type EVENT_TYPE_EXTINT). - * - * 3) FRED has separate entry flows depending on if the event came from - * user space or kernel space, and because the kernel does not use - * INT insns, the FRED kernel entry handler fred_entry_from_kernel() - * falls through to fred_bad_type() if the event type is - * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling - * an INT insn, it can only be from a user level. - * - * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will - * likely take a different approach if it is ever needed: it - * probably belongs in either fred_intx()/ fred_other() or - * asm_fred_entrypoint_user(), depending on if this ought to be done - * for all entries from userspace or only system - * calls. - * - * 5) INT $0x80 is the fast path for 32-bit system calls under FRED. - */ -DEFINE_FREDENTRY_RAW(int80_emulation) -{ - int nr; - - enter_from_user_mode(regs); - - instrumentation_begin(); - add_random_kstack_offset(); - - /* - * FRED pushed 0 into regs::orig_ax and regs::ax contains the - * syscall number. - * - * User tracing code (ptrace or signal handlers) might assume - * that the regs::orig_ax contains a 32-bit number on invoking - * a 32-bit syscall. - * - * Establish the syscall convention by saving the 32bit truncated - * syscall number in regs::orig_ax and by invalidating regs::ax. - */ - regs->orig_ax = regs->ax & GENMASK(31, 0); - regs->ax = -ENOSYS; - - nr = syscall_32_enter(regs); - - local_irq_enable(); - nr = syscall_enter_from_user_mode_work(regs, nr); - do_syscall_32_irqs_on(regs, nr); - - instrumentation_end(); - syscall_exit_to_user_mode(regs); -} -#endif -#else /* CONFIG_IA32_EMULATION */ - -/* Handles int $0x80 on a 32bit kernel */ -__visible noinstr void do_int80_syscall_32(struct pt_regs *regs) -{ - int nr = syscall_32_enter(regs); - - add_random_kstack_offset(); - /* - * Subtlety here: if ptrace pokes something larger than 2^31-1 into - * orig_ax, the int return value truncates it. This matches - * the semantics of syscall_get_nr(). - */ - nr = syscall_enter_from_user_mode(regs, nr); - instrumentation_begin(); - - do_syscall_32_irqs_on(regs, nr); - - instrumentation_end(); - syscall_exit_to_user_mode(regs); -} -#endif /* !CONFIG_IA32_EMULATION */ - -static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) -{ - int nr = syscall_32_enter(regs); - int res; - - add_random_kstack_offset(); - /* - * This cannot use syscall_enter_from_user_mode() as it has to - * fetch EBP before invoking any of the syscall entry work - * functions. - */ - syscall_enter_from_user_mode_prepare(regs); - - instrumentation_begin(); - /* Fetch EBP from where the vDSO stashed it. */ - if (IS_ENABLED(CONFIG_X86_64)) { - /* - * Micro-optimization: the pointer we're following is - * explicitly 32 bits, so it can't be out of range. - */ - res = __get_user(*(u32 *)®s->bp, - (u32 __user __force *)(unsigned long)(u32)regs->sp); - } else { - res = get_user(*(u32 *)®s->bp, - (u32 __user __force *)(unsigned long)(u32)regs->sp); - } - - if (res) { - /* User code screwed up. */ - regs->ax = -EFAULT; - - local_irq_disable(); - instrumentation_end(); - irqentry_exit_to_user_mode(regs); - return false; - } - - nr = syscall_enter_from_user_mode_work(regs, nr); - - /* Now this is just like a normal syscall. */ - do_syscall_32_irqs_on(regs, nr); - - instrumentation_end(); - syscall_exit_to_user_mode(regs); - return true; -} - -/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */ -__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) -{ - /* - * Called using the internal vDSO SYSENTER/SYSCALL32 calling - * convention. Adjust regs so it looks like we entered using int80. - */ - unsigned long landing_pad = (unsigned long)current->mm->context.vdso + - vdso_image_32.sym_int80_landing_pad; - - /* - * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward - * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. - * Fix it up. - */ - regs->ip = landing_pad; - - /* Invoke the syscall. If it failed, keep it simple: use IRET. */ - if (!__do_fast_syscall_32(regs)) - return false; - - /* - * Check that the register state is valid for using SYSRETL/SYSEXIT - * to exit to userspace. Otherwise use the slower but fully capable - * IRET exit path. - */ - - /* XEN PV guests always use the IRET path */ - if (cpu_feature_enabled(X86_FEATURE_XENPV)) - return false; - - /* EIP must point to the VDSO landing pad */ - if (unlikely(regs->ip != landing_pad)) - return false; - - /* CS and SS must match the values set in MSR_STAR */ - if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS)) - return false; - - /* If the TF, RF, or VM flags are set, use IRET */ - if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM))) - return false; - - /* Use SYSRETL/SYSEXIT to exit to userspace */ - return true; -} - -/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */ -__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs) -{ - /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ - regs->sp = regs->bp; - - /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */ - regs->flags |= X86_EFLAGS_IF; - - return do_fast_syscall_32(regs); -} -#endif - -SYSCALL_DEFINE0(ni_syscall) -{ - return -ENOSYS; -} - -#ifdef CONFIG_XEN_PV -#ifndef CONFIG_PREEMPTION -/* - * Some hypercalls issued by the toolstack can take many 10s of - * seconds. Allow tasks running hypercalls via the privcmd driver to - * be voluntarily preempted even if full kernel preemption is - * disabled. - * - * Such preemptible hypercalls are bracketed by - * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() - * calls. - */ -DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); -EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); - -/* - * In case of scheduling the flag must be cleared and restored after - * returning from schedule as the task might move to a different CPU. - */ -static __always_inline bool get_and_clear_inhcall(void) -{ - bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); - - __this_cpu_write(xen_in_preemptible_hcall, false); - return inhcall; -} - -static __always_inline void restore_inhcall(bool inhcall) -{ - __this_cpu_write(xen_in_preemptible_hcall, inhcall); -} -#else -static __always_inline bool get_and_clear_inhcall(void) { return false; } -static __always_inline void restore_inhcall(bool inhcall) { } -#endif - -static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - inc_irq_stat(irq_hv_callback_count); - - xen_evtchn_do_upcall(); - - set_irq_regs(old_regs); -} - -__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) -{ - irqentry_state_t state = irqentry_enter(regs); - bool inhcall; - - instrumentation_begin(); - run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); - - inhcall = get_and_clear_inhcall(); - if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { - irqentry_exit_cond_resched(); - instrumentation_end(); - restore_inhcall(inhcall); - } else { - instrumentation_end(); - irqentry_exit(regs, state); - } -} -#endif /* CONFIG_XEN_PV */ diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index b7ea3e8e9ecc..d3caa31240ed 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -5,6 +5,7 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/msr-index.h> #include <asm/unwind_hints.h> #include <asm/segment.h> @@ -17,6 +18,7 @@ .pushsection .noinstr.text, "ax" SYM_FUNC_START(entry_ibpb) + ANNOTATE_NOENDBR movl $MSR_IA32_PRED_CMD, %ecx movl $PRED_CMD_IBPB, %eax xorl %edx, %edx @@ -52,7 +54,6 @@ EXPORT_SYMBOL_GPL(mds_verw_sel); THUNK warn_thunk_thunk, __warn_thunk -#ifndef CONFIG_X86_64 /* * Clang's implementation of TLS stack cookies requires the variable in * question to be a TLS variable. If the variable happens to be defined as an @@ -63,7 +64,6 @@ THUNK warn_thunk_thunk, __warn_thunk * entirely in the C code, and use an alias emitted by the linker script * instead. */ -#ifdef CONFIG_STACKPROTECTOR +#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP) EXPORT_SYMBOL(__ref_stack_chk_guard); #endif -#endif diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 20be5758c2d2..92c0b4a94e0a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1153,7 +1153,7 @@ SYM_CODE_START(asm_exc_nmi) * is using the thread stack right now, so it's safe for us to use it. */ movl %esp, %ebx - movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp call exc_nmi movl %ebx, %esp @@ -1217,7 +1217,7 @@ SYM_CODE_START(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi + movl PER_CPU_VAR(cpu_current_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp call make_task_dead diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f52dbe0ad93c..f40bdf97d390 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64) /* tss.sp2 is scratch space. */ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -175,6 +175,7 @@ SYM_CODE_END(entry_SYSCALL_64) */ .pushsection .text, "ax" SYM_FUNC_START(__switch_to_asm) + ANNOTATE_NOENDBR /* * Save callee-saved registers * This must match the order in inactive_task_frame @@ -192,7 +193,7 @@ SYM_FUNC_START(__switch_to_asm) #ifdef CONFIG_STACKPROTECTOR movq TASK_stack_canary(%rsi), %rbx - movq %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary) + movq %rbx, PER_CPU_VAR(__stack_chk_guard) #endif /* @@ -742,6 +743,7 @@ _ASM_NOKPROBE(common_interrupt_return) * Is in entry.text as it shouldn't be instrumented. */ SYM_FUNC_START(asm_load_gs_index) + ANNOTATE_NOENDBR FRAME_BEGIN swapgs .Lgs_change: @@ -1166,7 +1168,7 @@ SYM_CODE_START(asm_exc_nmi) FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ @@ -1484,7 +1486,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax + movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS @@ -1526,6 +1528,7 @@ SYM_CODE_END(rewind_stack_and_make_dead) * refactored in the future if needed. */ SYM_FUNC_START(clear_bhb_loop) + ANNOTATE_NOENDBR push %rbp mov %rsp, %rbp movl $5, %ecx diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index ed0a5f2dc129..a45e1125fc6c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -57,7 +57,7 @@ SYM_CODE_START(entry_SYSENTER_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rax popq %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -193,7 +193,7 @@ SYM_CODE_START(entry_SYSCALL_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp /* Switch to the kernel stack */ - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index a02bc6f3d2e6..29c5c32c16c3 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -58,6 +58,7 @@ SYM_CODE_END(asm_fred_entrypoint_kernel) #if IS_ENABLED(CONFIG_KVM_INTEL) SYM_FUNC_START(asm_fred_entry_from_kvm) + ANNOTATE_NOENDBR push %rbp mov %rsp, %rbp diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cc9950d7104..2b15ea17bb7c 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -1,10 +1,16 @@ -// SPDX-License-Identifier: GPL-2.0 -/* System call table for i386. */ +// SPDX-License-Identifier: GPL-2.0-only +/* 32-bit system call dispatch */ #include <linux/linkage.h> #include <linux/sys.h> #include <linux/cache.h> #include <linux/syscalls.h> +#include <linux/entry-common.h> +#include <linux/nospec.h> +#include <linux/uaccess.h> +#include <asm/apic.h> +#include <asm/traps.h> +#include <asm/cpufeature.h> #include <asm/syscall.h> #ifdef CONFIG_IA32_EMULATION @@ -41,4 +47,324 @@ long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) #include <asm/syscalls_32.h> default: return __ia32_sys_ni_syscall(regs); } -}; +} + +static __always_inline int syscall_32_enter(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_IA32_EMULATION)) + current_thread_info()->status |= TS_COMPAT; + + return (int)regs->orig_ax; +} + +#ifdef CONFIG_IA32_EMULATION +bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED); + +static int __init ia32_emulation_override_cmdline(char *arg) +{ + return kstrtobool(arg, &__ia32_enabled); +} +early_param("ia32_emulation", ia32_emulation_override_cmdline); +#endif + +/* + * Invoke a 32-bit syscall. Called with IRQs on in CT_STATE_KERNEL. + */ +static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) +{ + /* + * Convert negative numbers to very high and thus out of range + * numbers for comparisons. + */ + unsigned int unr = nr; + + if (likely(unr < IA32_NR_syscalls)) { + unr = array_index_nospec(unr, IA32_NR_syscalls); + regs->ax = ia32_sys_call(regs, unr); + } else if (nr != -1) { + regs->ax = __ia32_sys_ni_syscall(regs); + } +} + +#ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + +/** + * do_int80_emulation - 32-bit legacy syscall C entry from asm + * @regs: syscall arguments in struct pt_args on the stack. + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +__visible noinstr void do_int80_emulation(struct pt_regs *regs) +{ + int nr; + + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} + +#ifdef CONFIG_X86_FRED +/* + * A FRED-specific INT80 handler is warranted for the follwing reasons: + * + * 1) As INT instructions and hardware interrupts are separate event + * types, FRED does not preclude the use of vector 0x80 for external + * interrupts. As a result, the FRED setup code does not reserve + * vector 0x80 and calling int80_is_external() is not merely + * suboptimal but actively incorrect: it could cause a system call + * to be incorrectly ignored. + * + * 2) It is called only for handling vector 0x80 of event type + * EVENT_TYPE_SWINT and will never be called to handle any external + * interrupt (event type EVENT_TYPE_EXTINT). + * + * 3) FRED has separate entry flows depending on if the event came from + * user space or kernel space, and because the kernel does not use + * INT insns, the FRED kernel entry handler fred_entry_from_kernel() + * falls through to fred_bad_type() if the event type is + * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling + * an INT insn, it can only be from a user level. + * + * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will + * likely take a different approach if it is ever needed: it + * probably belongs in either fred_intx()/ fred_other() or + * asm_fred_entrypoint_user(), depending on if this ought to be done + * for all entries from userspace or only system + * calls. + * + * 5) INT $0x80 is the fast path for 32-bit system calls under FRED. + */ +DEFINE_FREDENTRY_RAW(int80_emulation) +{ + int nr; + + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * FRED pushed 0 into regs::orig_ax and regs::ax contains the + * syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif /* CONFIG_X86_FRED */ + +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ +__visible noinstr void do_int80_syscall_32(struct pt_regs *regs) +{ + int nr = syscall_32_enter(regs); + + add_random_kstack_offset(); + /* + * Subtlety here: if ptrace pokes something larger than 2^31-1 into + * orig_ax, the int return value truncates it. This matches + * the semantics of syscall_get_nr(). + */ + nr = syscall_enter_from_user_mode(regs, nr); + instrumentation_begin(); + + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif /* !CONFIG_IA32_EMULATION */ + +static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) +{ + int nr = syscall_32_enter(regs); + int res; + + add_random_kstack_offset(); + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + + instrumentation_begin(); + /* Fetch EBP from where the vDSO stashed it. */ + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Micro-optimization: the pointer we're following is + * explicitly 32 bits, so it can't be out of range. + */ + res = __get_user(*(u32 *)®s->bp, + (u32 __user __force *)(unsigned long)(u32)regs->sp); + } else { + res = get_user(*(u32 *)®s->bp, + (u32 __user __force *)(unsigned long)(u32)regs->sp); + } + + if (res) { + /* User code screwed up. */ + regs->ax = -EFAULT; + + local_irq_disable(); + instrumentation_end(); + irqentry_exit_to_user_mode(regs); + return false; + } + + nr = syscall_enter_from_user_mode_work(regs, nr); + + /* Now this is just like a normal syscall. */ + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); + return true; +} + +/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */ +__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) +{ + /* + * Called using the internal vDSO SYSENTER/SYSCALL32 calling + * convention. Adjust regs so it looks like we entered using int80. + */ + unsigned long landing_pad = (unsigned long)current->mm->context.vdso + + vdso_image_32.sym_int80_landing_pad; + + /* + * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward + * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. + * Fix it up. + */ + regs->ip = landing_pad; + + /* Invoke the syscall. If it failed, keep it simple: use IRET. */ + if (!__do_fast_syscall_32(regs)) + return false; + + /* + * Check that the register state is valid for using SYSRETL/SYSEXIT + * to exit to userspace. Otherwise use the slower but fully capable + * IRET exit path. + */ + + /* XEN PV guests always use the IRET path */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* EIP must point to the VDSO landing pad */ + if (unlikely(regs->ip != landing_pad)) + return false; + + /* CS and SS must match the values set in MSR_STAR */ + if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS)) + return false; + + /* If the TF, RF, or VM flags are set, use IRET */ + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM))) + return false; + + /* Use SYSRETL/SYSEXIT to exit to userspace */ + return true; +} + +/* Returns true to return using SYSEXIT/SYSRETL, or false to use IRET */ +__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs) +{ + /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ + regs->sp = regs->bp; + + /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */ + regs->flags |= X86_EFLAGS_IF; + + return do_fast_syscall_32(regs); +} diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index ba8354424860..b6e68ea98b83 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -1,15 +1,20 @@ -// SPDX-License-Identifier: GPL-2.0 -/* System call table for x86-64. */ +// SPDX-License-Identifier: GPL-2.0-only +/* 64-bit system call dispatch */ #include <linux/linkage.h> #include <linux/sys.h> #include <linux/cache.h> #include <linux/syscalls.h> +#include <linux/entry-common.h> +#include <linux/nospec.h> #include <asm/syscall.h> #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); #define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *); #include <asm/syscalls_64.h> +#ifdef CONFIG_X86_X32_ABI +#include <asm/syscalls_x32.h> +#endif #undef __SYSCALL #undef __SYSCALL_NORETURN @@ -33,4 +38,104 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr) #include <asm/syscalls_64.h> default: return __x64_sys_ni_syscall(regs); } -}; +} + +#ifdef CONFIG_X86_X32_ABI +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_x32.h> + default: return __x64_sys_ni_syscall(regs); + } +} +#endif + +static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) +{ + /* + * Convert negative numbers to very high and thus out of range + * numbers for comparisons. + */ + unsigned int unr = nr; + + if (likely(unr < NR_syscalls)) { + unr = array_index_nospec(unr, NR_syscalls); + regs->ax = x64_sys_call(regs, unr); + return true; + } + return false; +} + +static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) +{ + /* + * Adjust the starting offset of the table, and convert numbers + * < __X32_SYSCALL_BIT to very high and thus out of range + * numbers for comparisons. + */ + unsigned int xnr = nr - __X32_SYSCALL_BIT; + + if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { + xnr = array_index_nospec(xnr, X32_NR_syscalls); + regs->ax = x32_sys_call(regs, xnr); + return true; + } + return false; +} + +/* Returns true to return using SYSRET, or false to use IRET */ +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) +{ + add_random_kstack_offset(); + nr = syscall_enter_from_user_mode(regs, nr); + + instrumentation_begin(); + + if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { + /* Invalid system call, but still a system call. */ + regs->ax = __x64_sys_ni_syscall(regs); + } + + instrumentation_end(); + syscall_exit_to_user_mode(regs); + + /* + * Check that the register state is valid for using SYSRET to exit + * to userspace. Otherwise use the slower but fully capable IRET + * exit path. + */ + + /* XEN PV guests always use the IRET path */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* SYSRET requires RCX == RIP and R11 == EFLAGS */ + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags)) + return false; + + /* CS and SS must match the values set in MSR_STAR */ + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS)) + return false; + + /* + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP + * in kernel space. This essentially lets the user take over + * the kernel, since userspace controls RSP. + * + * TASK_SIZE_MAX covers all user-accessible addresses other than + * the deprecated vsyscall page. + */ + if (unlikely(regs->ip >= TASK_SIZE_MAX)) + return false; + + /* + * SYSRET cannot restore RF. It can restore TF, but unlike IRET, + * restoring TF results in a trap from userspace immediately after + * SYSRET. + */ + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF))) + return false; + + /* Use SYSRET to exit to userspace */ + return true; +} diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c deleted file mode 100644 index fb77908f44f3..000000000000 --- a/arch/x86/entry/syscall_x32.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* System call table for x32 ABI. */ - -#include <linux/linkage.h> -#include <linux/sys.h> -#include <linux/cache.h> -#include <linux/syscalls.h> -#include <asm/syscall.h> - -#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); -#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *); -#include <asm/syscalls_x32.h> -#undef __SYSCALL - -#undef __SYSCALL_NORETURN -#define __SYSCALL_NORETURN __SYSCALL - -#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -long x32_sys_call(const struct pt_regs *regs, unsigned int nr) -{ - switch (nr) { - #include <asm/syscalls_x32.h> - default: return __x64_sys_ni_syscall(regs); - } -}; diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3f0ec87d5db4..ac007ea00979 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c9216ac4fb1e..bf9f4f63e1b4 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -133,6 +133,7 @@ KBUILD_CFLAGS_32 += -fno-stack-protector KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_32 += -DBUILD_VDSO ifdef CONFIG_MITIGATION_RETPOLINE ifneq ($(RETPOLINE_VDSO_CFLAGS),) diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h index b56f6b012941..baba612b832c 100644 --- a/arch/x86/entry/vdso/extable.h +++ b/arch/x86/entry/vdso/extable.h @@ -7,7 +7,7 @@ * vDSO uses a dedicated handler the addresses are relative to the overall * exception table, not each individual entry. */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ASM_VDSO_EXTABLE_HANDLE(from, to) \ ASM_VDSO_EXTABLE_HANDLE from to diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 39e6efc1a9ca..bfc7cabf4017 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -48,8 +48,7 @@ int __init init_vdso_image(const struct vdso_image *image) apply_alternatives((struct alt_instr *)(image->data + image->alt), (struct alt_instr *)(image->data + image->alt + - image->alt_len), - NULL); + image->alt_len)); return 0; } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 833478ffbbf5..6866cc5acb0b 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2849,7 +2849,7 @@ static bool is_uprobe_at_func_entry(struct pt_regs *regs) return true; /* endbr64 (64-bit only) */ - if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) return true; return false; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dc38dec244c1..1ac39611fea8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4735,9 +4735,9 @@ static int adl_hw_config(struct perf_event *event) return -EOPNOTSUPP; } -static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) +static enum intel_cpu_type adl_get_hybrid_cpu_type(void) { - return HYBRID_INTEL_CORE; + return INTEL_CPU_TYPE_CORE; } static inline bool erratum_hsw11(struct perf_event *event) @@ -5082,7 +5082,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - u8 cpu_type = get_this_hybrid_cpu_type(); + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + enum intel_cpu_type cpu_type = c->topo.intel_type; int i; /* @@ -5091,7 +5092,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) * on it. There should be a fixup function provided for these * troublesome CPUs (->get_hybrid_cpu_type). */ - if (cpu_type == HYBRID_INTEL_NONE) { + if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) { if (x86_pmu.get_hybrid_cpu_type) cpu_type = x86_pmu.get_hybrid_cpu_type(); else @@ -5108,16 +5109,16 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; u32 native_id; - if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big) + if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big) return &x86_pmu.hybrid_pmu[i]; - if (cpu_type == HYBRID_INTEL_ATOM) { + if (cpu_type == INTEL_CPU_TYPE_ATOM) { if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small) return &x86_pmu.hybrid_pmu[i]; - native_id = get_this_hybrid_cpu_native_id(); - if (native_id == skt_native_id && pmu_type == hybrid_small) + native_id = c->topo.intel_native_model_id; + if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small) return &x86_pmu.hybrid_pmu[i]; - if (native_id == cmt_native_id && pmu_type == hybrid_tiny) + if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny) return &x86_pmu.hybrid_pmu[i]; } } @@ -6583,15 +6584,21 @@ __init int intel_pmu_init(void) char *name; struct x86_hybrid_pmu *pmu; + /* Architectural Perfmon was introduced starting with Core "Yonah" */ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { - case 0x6: - return p6_pmu_init(); - case 0xb: + case 6: + if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH) + return p6_pmu_init(); + break; + case 11: return knc_pmu_init(); - case 0xf: + case 15: return p4_pmu_init(); } + + pr_cont("unsupported CPU family %d model %d ", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } @@ -6739,7 +6746,7 @@ __init int intel_pmu_init(void) case INTEL_ATOM_SILVERMONT_D: case INTEL_ATOM_SILVERMONT_MID: case INTEL_ATOM_AIRMONT: - case INTEL_ATOM_AIRMONT_MID: + case INTEL_ATOM_SILVERMONT_MID2: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 844bc4fc4724..fb726c6fc6e7 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -10,6 +10,7 @@ #include <linux/perf_event.h> #include <asm/perf_event_p4.h> +#include <asm/cpu_device_id.h> #include <asm/hardirq.h> #include <asm/apic.h> @@ -732,9 +733,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx) { /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ if (event_idx == P4_EVENT_INSTR_COMPLETED) { - if (boot_cpu_data.x86_model != 3 && - boot_cpu_data.x86_model != 4 && - boot_cpu_data.x86_model != 6) + if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT && + boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M && + boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL) return false; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a6cffb4f4ef5..65b45e9d7016 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -2,6 +2,8 @@ #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/cpu_device_id.h> + #include "../perf_event.h" /* @@ -248,30 +250,8 @@ __init int p6_pmu_init(void) { x86_pmu = p6_pmu; - switch (boot_cpu_data.x86_model) { - case 1: /* Pentium Pro */ + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO) x86_add_quirk(p6_pmu_rdpmc_quirk); - break; - - case 3: /* Pentium II - Klamath */ - case 5: /* Pentium II - Deschutes */ - case 6: /* Pentium II - Mendocino */ - break; - - case 7: /* Pentium III - Katmai */ - case 8: /* Pentium III - Coppermine */ - case 10: /* Pentium III Xeon */ - case 11: /* Pentium III - Tualatin */ - break; - - case 9: /* Pentium M - Banias */ - case 13: /* Pentium M - Dothan */ - break; - - default: - pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); - return -ENODEV; - } memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8e5a4c3c5b95..2c0ce0e9545e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -674,18 +674,6 @@ enum { #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) -/* - * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture - * of the core. Bits 31-24 indicates its core type (Core or Atom) - * and Bits [23:0] indicates the native model ID of the core. - * Core type and native model ID are defined in below enumerations. - */ -enum hybrid_cpu_type { - HYBRID_INTEL_NONE, - HYBRID_INTEL_ATOM = 0x20, - HYBRID_INTEL_CORE = 0x40, -}; - #define X86_HYBRID_PMU_ATOM_IDX 0 #define X86_HYBRID_PMU_CORE_IDX 1 #define X86_HYBRID_PMU_TINY_IDX 2 @@ -702,11 +690,6 @@ enum hybrid_pmu_type { hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; -enum atom_native_id { - cmt_native_id = 0x2, /* Crestmont */ - skt_native_id = 0x3, /* Skymont */ -}; - struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -993,7 +976,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); + enum intel_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index cc8c3bd0e7c2..1f7c3082a36d 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -239,5 +239,4 @@ void hyperv_setup_mmu_ops(void) pr_info("Using hypercall for remote TLB flush\n"); pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; - pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 58f4ddecc5fa..4566000e15c4 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -8,6 +8,7 @@ generated-y += syscalls_x32.h generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h +generated-y += cpufeaturemasks.h generic-y += early_ioremap.h generic-y += fprobe.h diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 3b3d3aa19acd..4a37a8bd87fd 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -15,7 +15,7 @@ #define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature)) #define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stddef.h> @@ -87,20 +87,19 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; * instructions were patched in already: */ extern int alternatives_patched; -struct module; extern void alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end, - struct module *mod); -extern void apply_retpolines(s32 *start, s32 *end, struct module *mod); -extern void apply_returns(s32 *start, s32 *end, struct module *mod); -extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); +extern void apply_seal_endbr(s32 *start, s32 *end); extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, - s32 *start_cfi, s32 *end_cfi, struct module *mod); + s32 *start_cfi, s32 *end_cfi); + +struct module; struct callthunk_sites { s32 *call_start, *call_end; - struct alt_instr *alt_start, *alt_end; }; #ifdef CONFIG_CALL_THUNKS @@ -237,10 +236,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * references: i.e., if used for a function, it would add the PLT * suffix. */ -#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ +#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...) \ asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ : ALT_OUTPUT_SP(output) \ - : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + : [old] "i" (oldfunc), [new] "i" (newfunc) \ + COMMA(input) \ + : clobbers) /* * Like alternative_call, but there are two features and respective functions. @@ -249,24 +250,14 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, old function is used. */ #define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ - output, input...) \ + output, input, clobbers...) \ asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ "call %c[new2]", ft_flags2) \ : ALT_OUTPUT_SP(output) \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input) - -/* - * use this macro(s) if you need more than one output parameter - * in alternative_io - */ -#define ASM_OUTPUT2(a...) a - -/* - * use this macro if you need clobbers but no inputs in - * alternative_{input,io,call}() - */ -#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + [new2] "i" (newfunc2) \ + COMMA(input) \ + : clobbers) #define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__ @@ -286,7 +277,7 @@ static inline int alternatives_text_reserved(void *start, void *end) void BUG_func(void); void nop_func(void); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #ifdef CONFIG_SMP .macro LOCK_PREFIX @@ -369,6 +360,6 @@ void nop_func(void); ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ newinstr_yes, ft_flags -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 4c4efb93045e..adfa0854cf2d 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -27,7 +27,6 @@ struct amd_l3_cache { }; struct amd_northbridge { - struct pci_dev *root; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h index 113ad3e8ee40..23fe617898a8 100644 --- a/arch/x86/include/asm/amd_node.h +++ b/arch/x86/include/asm/amd_node.h @@ -30,7 +30,31 @@ static inline u16 amd_num_nodes(void) return topology_amd_nodes_per_pkg() * topology_max_packages(); } +#ifdef CONFIG_AMD_NODE int __must_check amd_smn_read(u16 node, u32 address, u32 *value); int __must_check amd_smn_write(u16 node, u32 address, u32 value); +/* Should only be used by the HSMP driver. */ +int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write); +#else +static inline int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { return -ENODEV; } +static inline int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return -ENODEV; } + +static inline int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write) +{ + return -ENODEV; +} +#endif /* CONFIG_AMD_NODE */ + +/* helper for use with read_poll_timeout */ +static inline int smn_read_register(u32 reg) +{ + int data, rc; + + rc = amd_smn_read(0, reg, &data); + if (rc) + return rc; + + return data; +} #endif /*_ASM_X86_AMD_NODE_H_*/ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f21ff1932699..c903d358405d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -99,8 +99,8 @@ static inline void native_apic_mem_write(u32 reg, u32 v) volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, - ASM_OUTPUT2("=r" (v), "=m" (*addr)), - ASM_OUTPUT2("0" (v), "m" (*addr))); + ASM_OUTPUT("=r" (v), "=m" (*addr)), + ASM_INPUT("0" (v), "m" (*addr))); } static inline u32 native_apic_mem_read(u32 reg) diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index ba88edd0d58b..b5982b94bdba 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -16,9 +16,10 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + asm_inline (ALTERNATIVE("call __sw_hweight32", + "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) + : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT + : [val] REG_IN (w)); return res; } @@ -44,9 +45,10 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + asm_inline (ALTERNATIVE("call __sw_hweight64", + "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) + : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT + : [val] REG_IN (w)); return res; } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 3674006e3974..11c6fecc3ad7 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -16,10 +16,10 @@ #include <asm/gsseg.h> #include <asm/nospec-branch.h> -#ifndef CONFIG_X86_CMPXCHG64 +#ifndef CONFIG_X86_CX8 extern void cmpxchg8b_emu(void); #endif -#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR) +#ifdef CONFIG_STACKPROTECTOR extern unsigned long __ref_stack_chk_guard; #endif diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 2bec0c89a95c..cc2881576c2c 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, @@ -113,7 +113,7 @@ #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef __pic__ static __always_inline __pure void *rip_rel_ptr(void *p) { @@ -144,7 +144,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # include <asm/extable_fixup_types.h> /* Exception table entry */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define _ASM_EXTABLE_TYPE(from, to, type) \ .pushsection "__ex_table","a" ; \ @@ -164,7 +164,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # define _ASM_NOKPROBE(entry) # endif -#else /* ! __ASSEMBLY__ */ +#else /* ! __ASSEMBLER__ */ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ @@ -213,6 +213,17 @@ static __always_inline __pure void *rip_rel_ptr(void *p) /* For C file, we already have NOKPROBE_SYMBOL macro */ +/* Insert a comma if args are non-empty */ +#define COMMA(x...) __COMMA(x) +#define __COMMA(...) , ##__VA_ARGS__ + +/* + * Combine multiple asm inline constraint args into a single arg for passing to + * another macro. + */ +#define ASM_OUTPUT(x...) x +#define ASM_INPUT(x...) x + /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -221,7 +232,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) */ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define _ASM_EXTABLE(from, to) \ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 55b4d24356ea..75743f1dfd4e 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -30,14 +30,14 @@ static __always_inline void arch_atomic_set(atomic_t *v, int i) static __always_inline void arch_atomic_add(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm_inline volatile(LOCK_PREFIX "addl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline void arch_atomic_sub(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "subl %1,%0" + asm_inline volatile(LOCK_PREFIX "subl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } @@ -50,14 +50,14 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) static __always_inline void arch_atomic_inc(atomic_t *v) { - asm volatile(LOCK_PREFIX "incl %0" + asm_inline volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc static __always_inline void arch_atomic_dec(atomic_t *v) { - asm volatile(LOCK_PREFIX "decl %0" + asm_inline volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec @@ -116,7 +116,7 @@ static __always_inline int arch_atomic_xchg(atomic_t *v, int new) static __always_inline void arch_atomic_and(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "andl %1,%0" + asm_inline volatile(LOCK_PREFIX "andl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); @@ -134,7 +134,7 @@ static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) static __always_inline void arch_atomic_or(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "orl %1,%0" + asm_inline volatile(LOCK_PREFIX "orl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); @@ -152,7 +152,7 @@ static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) static __always_inline void arch_atomic_xor(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "xorl %1,%0" + asm_inline volatile(LOCK_PREFIX "xorl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 6c6e9b9f98a4..ab838205c1c6 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -48,17 +48,20 @@ static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v) ATOMIC64_EXPORT(atomic64_##sym) #endif -#ifdef CONFIG_X86_CMPXCHG64 -#define __alternative_atomic64(f, g, out, in...) \ - asm volatile("call %c[func]" \ +#ifdef CONFIG_X86_CX8 +#define __alternative_atomic64(f, g, out, in, clobbers...) \ + asm volatile("call %c[func]" \ : ALT_OUTPUT_SP(out) \ - : [func] "i" (atomic64_##g##_cx8), ## in) + : [func] "i" (atomic64_##g##_cx8) \ + COMMA(in) \ + : clobbers) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) #else -#define __alternative_atomic64(f, g, out, in...) \ - alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ - X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in) +#define __alternative_atomic64(f, g, out, in, clobbers...) \ + alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ + X86_FEATURE_CX8, ASM_OUTPUT(out), \ + ASM_INPUT(in), clobbers) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \ ATOMIC64_DECL_ONE(sym##_386) @@ -69,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386); ATOMIC64_DECL_ONE(dec_386); #endif -#define alternative_atomic64(f, out, in...) \ - __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) +#define alternative_atomic64(f, out, in, clobbers...) \ + __alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers) ATOMIC64_DECL(read); ATOMIC64_DECL(set); @@ -105,9 +108,10 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) s64 o; unsigned high = (unsigned)(n >> 32); unsigned low = (unsigned)n; - alternative_atomic64(xchg, "=&A" (o), - "S" (v), "b" (low), "c" (high) - : "memory"); + alternative_atomic64(xchg, + "=&A" (o), + ASM_INPUT("S" (v), "b" (low), "c" (high)), + "memory"); return o; } #define arch_atomic64_xchg arch_atomic64_xchg @@ -116,23 +120,25 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; - alternative_atomic64(set, /* no output */, - "S" (v), "b" (low), "c" (high) - : "eax", "edx", "memory"); + alternative_atomic64(set, + /* no output */, + ASM_INPUT("S" (v), "b" (low), "c" (high)), + "eax", "edx", "memory"); } static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { s64 r; - alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); + alternative_atomic64(read, "=&A" (r), "c" (v), "memory"); return r; } static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); return i; } #define arch_atomic64_add_return arch_atomic64_add_return @@ -140,8 +146,9 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); return i; } #define arch_atomic64_sub_return arch_atomic64_sub_return @@ -149,8 +156,10 @@ static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { s64 a; - alternative_atomic64(inc_return, "=&A" (a), - "S" (v) : "memory", "ecx"); + alternative_atomic64(inc_return, + "=&A" (a), + "S" (v), + "memory", "ecx"); return a; } #define arch_atomic64_inc_return arch_atomic64_inc_return @@ -158,8 +167,10 @@ static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { s64 a; - alternative_atomic64(dec_return, "=&A" (a), - "S" (v) : "memory", "ecx"); + alternative_atomic64(dec_return, + "=&A" (a), + "S" (v), + "memory", "ecx"); return a; } #define arch_atomic64_dec_return arch_atomic64_dec_return @@ -167,28 +178,34 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); } static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); } static __always_inline void arch_atomic64_inc(atomic64_t *v) { - __alternative_atomic64(inc, inc_return, /* no output */, - "S" (v) : "memory", "eax", "ecx", "edx"); + __alternative_atomic64(inc, inc_return, + /* no output */, + "S" (v), + "memory", "eax", "ecx", "edx"); } #define arch_atomic64_inc arch_atomic64_inc static __always_inline void arch_atomic64_dec(atomic64_t *v) { - __alternative_atomic64(dec, dec_return, /* no output */, - "S" (v) : "memory", "eax", "ecx", "edx"); + __alternative_atomic64(dec, dec_return, + /* no output */, + "S" (v), + "memory", "eax", "ecx", "edx"); } #define arch_atomic64_dec arch_atomic64_dec @@ -197,8 +214,9 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); alternative_atomic64(add_unless, - ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), - "S" (v) : "memory"); + ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)), + "S" (v), + "memory"); return (int)a; } #define arch_atomic64_add_unless arch_atomic64_add_unless @@ -206,8 +224,10 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; - alternative_atomic64(inc_not_zero, "=&a" (r), - "S" (v) : "ecx", "edx", "memory"); + alternative_atomic64(inc_not_zero, + "=&a" (r), + "S" (v), + "ecx", "edx", "memory"); return r; } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero @@ -215,8 +235,10 @@ static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 r; - alternative_atomic64(dec_if_positive, "=&A" (r), - "S" (v) : "ecx", "memory"); + alternative_atomic64(dec_if_positive, + "=&A" (r), + "S" (v), + "ecx", "memory"); return r; } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index ae12acae5b06..87b496325b5b 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -22,14 +22,14 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "addq %1,%0" + asm_inline volatile(LOCK_PREFIX "addq %1, %0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "subq %1,%0" + asm_inline volatile(LOCK_PREFIX "subq %1, %0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } @@ -42,7 +42,7 @@ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_inc(atomic64_t *v) { - asm volatile(LOCK_PREFIX "incq %0" + asm_inline volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } @@ -50,7 +50,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) static __always_inline void arch_atomic64_dec(atomic64_t *v) { - asm volatile(LOCK_PREFIX "decq %0" + asm_inline volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } @@ -110,7 +110,7 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "andq %1,%0" + asm_inline volatile(LOCK_PREFIX "andq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); @@ -128,7 +128,7 @@ static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "orq %1,%0" + asm_inline volatile(LOCK_PREFIX "orq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); @@ -146,7 +146,7 @@ static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "xorq %1,%0" + asm_inline volatile(LOCK_PREFIX "xorq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b96d45944c59..100413aff640 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -52,12 +52,12 @@ static __always_inline void arch_set_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "orb %b1,%0" + asm_inline volatile(LOCK_PREFIX "orb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -72,11 +72,11 @@ static __always_inline void arch_clear_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "andb %b1,%0" + asm_inline volatile(LOCK_PREFIX "andb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } @@ -98,7 +98,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, volatile unsigned long *addr) { bool negative; - asm volatile(LOCK_PREFIX "xorb %2,%1" + asm_inline volatile(LOCK_PREFIX "xorb %2,%1" CC_SET(s) : CC_OUT(s) (negative), WBYTE_ADDR(addr) : "iq" ((char)mask) : "memory"); @@ -122,11 +122,11 @@ static __always_inline void arch_change_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "xorb %b1,%0" + asm_inline volatile(LOCK_PREFIX "xorb %b1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" (CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 3e5b111e619d..3f02ff6d333d 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -74,7 +74,7 @@ # define BOOT_STACK_SIZE 0x1000 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned int output_len; extern const unsigned long kernel_text_size; extern const unsigned long kernel_total_size; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index e85ac0c7c039..f0e9acf72547 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -17,13 +17,17 @@ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. */ #define INSN_ASOP 0x67 +#define INSN_LOCK 0xf0 #define OPCODE_ESCAPE 0x0f #define SECOND_BYTE_OPCODE_UD1 0xb9 #define SECOND_BYTE_OPCODE_UD2 0x0b #define BUG_NONE 0xffff -#define BUG_UD1 0xfffe -#define BUG_UD2 0xfffd +#define BUG_UD2 0xfffe +#define BUG_UD1 0xfffd +#define BUG_UD1_UBSAN 0xfffc +#define BUG_EA 0xffea +#define BUG_LOCK 0xfff0 #ifdef CONFIG_GENERIC_BUG diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 31d19c815f99..3e51ba459154 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -101,6 +101,16 @@ enum cfi_mode { extern enum cfi_mode cfi_mode; +#ifdef CONFIG_FINEIBT_BHI +extern bool cfi_bhi; +#else +#define cfi_bhi (0) +#endif + +typedef u8 bhi_thunk[32]; +extern bhi_thunk __bhi_args[]; +extern bhi_thunk __bhi_args_end[]; + struct pt_regs; #ifdef CONFIG_CFI_CLANG @@ -125,6 +135,18 @@ static inline int cfi_get_offset(void) #define cfi_get_offset cfi_get_offset extern u32 cfi_get_func_hash(void *func); +extern int cfi_get_func_arity(void *func); + +#ifdef CONFIG_FINEIBT +extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type); +#else +static inline bool +decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type) +{ + return false; +} + +#endif #else static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) @@ -137,6 +159,10 @@ static inline u32 cfi_get_func_hash(void *func) { return 0; } +static inline int cfi_get_func_arity(void *func) +{ + return 0; +} #endif /* CONFIG_CFI_CLANG */ #if HAS_KERNEL_IBT == 1 diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index fd8afc1f5f6b..b61f32c3459f 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -44,22 +44,22 @@ extern void __add_wrong_size(void) __typeof__ (*(ptr)) __ret = (arg); \ switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ - asm volatile (lock #op "b %b0, %1\n" \ + asm_inline volatile (lock #op "b %b0, %1" \ : "+q" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_W: \ - asm volatile (lock #op "w %w0, %1\n" \ + asm_inline volatile (lock #op "w %w0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_L: \ - asm volatile (lock #op "l %0, %1\n" \ + asm_inline volatile (lock #op "l %0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_Q: \ - asm volatile (lock #op "q %q0, %1\n" \ + asm_inline volatile (lock #op "q %q0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ @@ -91,7 +91,7 @@ extern void __add_wrong_size(void) case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile(lock "cmpxchgb %2,%1" \ + asm_inline volatile(lock "cmpxchgb %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ @@ -100,7 +100,7 @@ extern void __add_wrong_size(void) case __X86_CASE_W: \ { \ volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile(lock "cmpxchgw %2,%1" \ + asm_inline volatile(lock "cmpxchgw %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -109,7 +109,7 @@ extern void __add_wrong_size(void) case __X86_CASE_L: \ { \ volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile(lock "cmpxchgl %2,%1" \ + asm_inline volatile(lock "cmpxchgl %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -118,7 +118,7 @@ extern void __add_wrong_size(void) case __X86_CASE_Q: \ { \ volatile u64 *__ptr = (volatile u64 *)(ptr); \ - asm volatile(lock "cmpxchgq %2,%1" \ + asm_inline volatile(lock "cmpxchgq %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -165,7 +165,7 @@ extern void __add_wrong_size(void) case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(_ptr); \ - asm volatile(lock "cmpxchgb %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ @@ -177,7 +177,7 @@ extern void __add_wrong_size(void) case __X86_CASE_W: \ { \ volatile u16 *__ptr = (volatile u16 *)(_ptr); \ - asm volatile(lock "cmpxchgw %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ @@ -189,7 +189,7 @@ extern void __add_wrong_size(void) case __X86_CASE_L: \ { \ volatile u32 *__ptr = (volatile u32 *)(_ptr); \ - asm volatile(lock "cmpxchgl %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ @@ -201,7 +201,7 @@ extern void __add_wrong_size(void) case __X86_CASE_Q: \ { \ volatile u64 *__ptr = (volatile u64 *)(_ptr); \ - asm volatile(lock "cmpxchgq %[new], %[ptr]" \ + asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \ CC_SET(z) \ : CC_OUT(z) (success), \ [ptr] "+m" (*__ptr), \ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 8806c646d452..371f7906019e 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -19,7 +19,7 @@ union __u64_halves { union __u64_halves o = { .full = (_old), }, \ n = { .full = (_new), }; \ \ - asm volatile(_lock "cmpxchg8b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ : [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ @@ -45,7 +45,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new n = { .full = (_new), }; \ bool ret; \ \ - asm volatile(_lock "cmpxchg8b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ CC_SET(e) \ : CC_OUT(e) (ret), \ [ptr] "+m" (*(_ptr)), \ @@ -69,7 +69,7 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, return __arch_try_cmpxchg64(ptr, oldp, new,); } -#ifdef CONFIG_X86_CMPXCHG64 +#ifdef CONFIG_X86_CX8 #define arch_cmpxchg64 __cmpxchg64 diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 5e241306db26..71d1e72ed879 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -38,7 +38,7 @@ union __u128_halves { union __u128_halves o = { .full = (_old), }, \ n = { .full = (_new), }; \ \ - asm volatile(_lock "cmpxchg16b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ : [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ @@ -65,7 +65,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, n = { .full = (_new), }; \ bool ret; \ \ - asm volatile(_lock "cmpxchg16b %[ptr]" \ + asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ CC_SET(e) \ : CC_OUT(e) (ret), \ [ptr] "+m" (*(_ptr)), \ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 98eced5084ca..ad235dda1ded 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -12,7 +12,6 @@ #ifndef CONFIG_SMP #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define cpu_acpi_id(cpu) 0 -#define safe_smp_processor_id() 0 #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU @@ -50,20 +49,6 @@ static inline void split_lock_init(void) {} static inline void bus_lock_init(void) {} #endif -#ifdef CONFIG_CPU_SUP_INTEL -u8 get_this_hybrid_cpu_type(void); -u32 get_this_hybrid_cpu_native_id(void); -#else -static inline u8 get_this_hybrid_cpu_type(void) -{ - return 0; -} - -static inline u32 get_this_hybrid_cpu_native_id(void) -{ - return 0; -} -#endif #ifdef CONFIG_IA32_FEAT_CTL void init_ia32_feat_ctl(struct cpuinfo_x86 *c); #else diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index ba32e0f44cba..6be777a06944 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -57,7 +57,7 @@ #define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0) /** - * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * X86_MATCH_CPU - Base macro for CPU matching * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@_vendor * @_family: The family number or X86_FAMILY_ANY @@ -74,47 +74,18 @@ * into another macro at the usage site for good reasons, then please * start this local macro with X86_MATCH to allow easy grepping. */ -#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ - _steppings, _feature, _data) { \ - .vendor = X86_VENDOR_##_vendor, \ - .family = _family, \ - .model = _model, \ - .steppings = _steppings, \ - .feature = _feature, \ - .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ - .driver_data = (unsigned long) _data \ -} - -#define X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ - _steppings, _feature, _data) { \ +#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \ .vendor = _vendor, \ .family = _family, \ .model = _model, \ .steppings = _steppings, \ .feature = _feature, \ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ + .type = _type, \ .driver_data = (unsigned long) _data \ } /** - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching - * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY - * The name is expanded to X86_VENDOR_@_vendor - * @_family: The family number or X86_FAMILY_ANY - * @_model: The model number, model constant or X86_MODEL_ANY - * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY - * @_data: Driver specific data or NULL. The internal storage - * format is unsigned long. The supplied value, pointer - * etc. is casted to unsigned long internally. - * - * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is - * set to wildcards. - */ -#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ - X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ - X86_STEPPING_ANY, feature, data) - -/** * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@vendor @@ -123,13 +94,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ - X86_MODEL_ANY, feature, data) +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature @@ -139,12 +107,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ - X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_FEATURE - Macro for matching a CPU feature @@ -152,12 +118,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_FEATURE(feature, data) \ - X86_MATCH_VENDOR_FEATURE(ANY, feature, data) +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model @@ -168,13 +132,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ - X86_FEATURE_ANY, data) +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY, \ + X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VENDOR_FAM - Match vendor and family @@ -184,12 +145,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set of wildcards. */ -#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ - X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \ + X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VFM - Match encoded vendor/family/model @@ -197,34 +156,26 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is cast to unsigned long internally. - * - * Stepping and feature are set to wildcards */ -#define X86_MATCH_VFM(vfm, data) \ - X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ - VFM_VENDOR(vfm), \ - VFM_FAMILY(vfm), \ - VFM_MODEL(vfm), \ - X86_STEPPING_ANY, X86_FEATURE_ANY, data) +#define X86_MATCH_VFM(vfm, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) #define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) /** - * X86_MATCH_VFM_STEPPINGS - Match encoded vendor/family/model/stepping + * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings + * range. * @vfm: Encoded 8-bits each for vendor, family, model - * @steppings: Bitmask of steppings to match + * @min_step: Lowest stepping number to match + * @max_step: Highest stepping number to match * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is cast to unsigned long internally. - * - * feature is set to wildcard */ -#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \ - X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ - VFM_VENDOR(vfm), \ - VFM_FAMILY(vfm), \ - VFM_MODEL(vfm), \ - __X86_STEPPINGS(min_step, max_step), \ - X86_FEATURE_ANY, data) +#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY, \ + X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature @@ -233,15 +184,22 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is cast to unsigned long internally. - * - * Steppings is set to wildcard */ -#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ - X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ - VFM_VENDOR(vfm), \ - VFM_FAMILY(vfm), \ - VFM_MODEL(vfm), \ - X86_STEPPING_ANY, feature, data) +#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type + * @vfm: Encoded 8-bits each for vendor, family, model + * @type: CPU type e.g. P-core, E-core + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + */ +#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, type, data) extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index de1ad09fe8d7..893cbca37fe9 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -4,11 +4,12 @@ #include <asm/processor.h> -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if defined(__KERNEL__) && !defined(__ASSEMBLER__) #include <asm/asm.h> #include <linux/bitops.h> #include <asm/alternative.h> +#include <asm/cpufeaturemasks.h> enum cpuid_leafs { @@ -37,92 +38,19 @@ enum cpuid_leafs NR_CPUID_WORDS, }; -#define X86_CAP_FMT_NUM "%d:%d" -#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31) - extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; -#define X86_CAP_FMT "%s" -#define x86_cap_flag(flag) x86_cap_flags[flag] /* * In order to save room, we index into this array by doing * X86_BUG_<name> - NCAPINTS*32. */ extern const char * const x86_bug_flags[NBUGINTS*32]; +#define x86_bug_flag(flag) x86_bug_flags[flag] #define test_cpu_cap(c, bit) \ arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) -/* - * There are 32 bits/features in each mask word. The high bits - * (selected with (bit>>5) give us the word number and the low 5 - * bits give us the bit/feature number inside the word. - * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can - * see if it is set in the mask word. - */ -#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ - (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) - -/* - * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the - * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all - * header macros which use NCAPINTS need to be changed. The duplicated macro - * use causes the compiler to issue errors for all headers so that all usage - * sites can be corrected. - */ -#define REQUIRED_MASK_BIT_SET(feature_bit) \ - ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ - REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) - -#define DISABLED_MASK_BIT_SET(feature_bit) \ - ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ - DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) - #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) @@ -149,6 +77,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); +void check_cpufeature_deps(struct cpuinfo_x86 *c); #define setup_force_cpu_cap(bit) do { \ \ @@ -208,5 +137,5 @@ t_no: #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model -#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */ #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 508c0dad116b..c0462be0c5f6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -2,14 +2,6 @@ #ifndef _ASM_X86_CPUFEATURES_H #define _ASM_X86_CPUFEATURES_H -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#include <asm/required-features.h> -#endif - -#ifndef _ASM_X86_DISABLED_FEATURES_H -#include <asm/disabled-features.h> -#endif - /* * Defines x86 CPU feature bits */ @@ -338,6 +330,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index b2b9b4ef3dae..d5749b25fa10 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -1,222 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * CPUID-related helpers/definitions - */ #ifndef _ASM_X86_CPUID_H #define _ASM_X86_CPUID_H -#include <linux/types.h> - -#include <asm/string.h> - -struct cpuid_regs { - u32 eax, ebx, ecx, edx; -}; - -enum cpuid_regs_idx { - CPUID_EAX = 0, - CPUID_EBX, - CPUID_ECX, - CPUID_EDX, -}; - -#define CPUID_LEAF_MWAIT 0x5 -#define CPUID_LEAF_DCA 0x9 -#define CPUID_LEAF_XSTATE 0x0d -#define CPUID_LEAF_TSC 0x15 -#define CPUID_LEAF_FREQ 0x16 -#define CPUID_LEAF_TILE 0x1d - -#ifdef CONFIG_X86_32 -bool have_cpuid_p(void); -#else -static inline bool have_cpuid_p(void) -{ - return true; -} -#endif -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx) - : "memory"); -} - -#define native_cpuid_reg(reg) \ -static inline unsigned int native_cpuid_##reg(unsigned int op) \ -{ \ - unsigned int eax = op, ebx, ecx = 0, edx; \ - \ - native_cpuid(&eax, &ebx, &ecx, &edx); \ - \ - return reg; \ -} - -/* - * Native CPUID functions returning a single datum. - */ -native_cpuid_reg(eax) -native_cpuid_reg(ebx) -native_cpuid_reg(ecx) -native_cpuid_reg(edx) - -#ifdef CONFIG_PARAVIRT_XXL -#include <asm/paravirt.h> -#else -#define __cpuid native_cpuid -#endif - -/* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx - * resulting in stale register contents being returned. - */ -static inline void cpuid(unsigned int op, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = 0; - __cpuid(eax, ebx, ecx, edx); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(unsigned int op, int count, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return eax; -} - -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return ebx; -} - -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return ecx; -} - -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return edx; -} - -static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs) -{ - regs[CPUID_EAX] = leaf; - regs[CPUID_ECX] = subleaf; - __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX); -} - -#define cpuid_subleaf(leaf, subleaf, regs) { \ - static_assert(sizeof(*(regs)) == 16); \ - __cpuid_read(leaf, subleaf, (u32 *)(regs)); \ -} - -#define cpuid_leaf(leaf, regs) { \ - static_assert(sizeof(*(regs)) == 16); \ - __cpuid_read(leaf, 0, (u32 *)(regs)); \ -} - -static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf, - enum cpuid_regs_idx regidx, u32 *reg) -{ - u32 regs[4]; - - __cpuid_read(leaf, subleaf, regs); - *reg = regs[regidx]; -} - -#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \ - static_assert(sizeof(*(reg)) == 4); \ - __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \ -} - -#define cpuid_leaf_reg(leaf, regidx, reg) { \ - static_assert(sizeof(*(reg)) == 4); \ - __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \ -} - -static __always_inline bool cpuid_function_is_indexed(u32 function) -{ - switch (function) { - case 4: - case 7: - case 0xb: - case 0xd: - case 0xf: - case 0x10: - case 0x12: - case 0x14: - case 0x17: - case 0x18: - case 0x1d: - case 0x1e: - case 0x1f: - case 0x24: - case 0x8000001d: - return true; - } - - return false; -} - -#define for_each_possible_hypervisor_cpuid_base(function) \ - for (function = 0x40000000; function < 0x40010000; function += 0x100) - -static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) -{ - uint32_t base, eax, signature[3]; - - for_each_possible_hypervisor_cpuid_base(base) { - cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); - - /* - * This must not compile to "call memcmp" because it's called - * from PVH early boot code before instrumentation is set up - * and memcmp() itself may be instrumented. - */ - if (!__builtin_memcmp(sig, signature, 12) && - (leaves == 0 || ((eax - base) >= leaves))) - return base; - } - - return 0; -} +#include <asm/cpuid/api.h> #endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h new file mode 100644 index 000000000000..9c180c9cc58e --- /dev/null +++ b/arch/x86/include/asm/cpuid/api.h @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUID_API_H +#define _ASM_X86_CPUID_API_H + +#include <asm/cpuid/types.h> + +#include <linux/build_bug.h> +#include <linux/types.h> + +#include <asm/string.h> + +/* + * Raw CPUID accessors: + */ + +#ifdef CONFIG_X86_32 +bool have_cpuid_p(void); +#else +static inline bool have_cpuid_p(void) +{ + return true; +} +#endif + +static inline void native_cpuid(u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + +#define NATIVE_CPUID_REG(reg) \ +static inline u32 native_cpuid_##reg(u32 op) \ +{ \ + u32 eax = op, ebx, ecx = 0, edx; \ + \ + native_cpuid(&eax, &ebx, &ecx, &edx); \ + \ + return reg; \ +} + +/* + * Native CPUID functions returning a single datum: + */ +NATIVE_CPUID_REG(eax) +NATIVE_CPUID_REG(ebx) +NATIVE_CPUID_REG(ecx) +NATIVE_CPUID_REG(edx) + +#ifdef CONFIG_PARAVIRT_XXL +# include <asm/paravirt.h> +#else +# define __cpuid native_cpuid +#endif + +/* + * Generic CPUID function + * + * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX + * resulting in stale register contents being returned. + */ +static inline void cpuid(u32 op, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ECX */ +static inline void cpuid_count(u32 op, int count, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum: + */ + +static inline u32 cpuid_eax(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return eax; +} + +static inline u32 cpuid_ebx(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ebx; +} + +static inline u32 cpuid_ecx(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ecx; +} + +static inline u32 cpuid_edx(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return edx; +} + +static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs) +{ + regs[CPUID_EAX] = leaf; + regs[CPUID_ECX] = subleaf; + __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX); +} + +#define cpuid_subleaf(leaf, subleaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, subleaf, (u32 *)(regs)); \ +} + +#define cpuid_leaf(leaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, 0, (u32 *)(regs)); \ +} + +static inline void __cpuid_read_reg(u32 leaf, u32 subleaf, + enum cpuid_regs_idx regidx, u32 *reg) +{ + u32 regs[4]; + + __cpuid_read(leaf, subleaf, regs); + *reg = regs[regidx]; +} + +#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \ +} + +#define cpuid_leaf_reg(leaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \ +} + +static __always_inline bool cpuid_function_is_indexed(u32 function) +{ + switch (function) { + case 4: + case 7: + case 0xb: + case 0xd: + case 0xf: + case 0x10: + case 0x12: + case 0x14: + case 0x17: + case 0x18: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x24: + case 0x8000001d: + return true; + } + + return false; +} + +#define for_each_possible_hypervisor_cpuid_base(function) \ + for (function = 0x40000000; function < 0x40010000; function += 0x100) + +static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves) +{ + u32 base, eax, signature[3]; + + for_each_possible_hypervisor_cpuid_base(base) { + cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); + + /* + * This must not compile to "call memcmp" because it's called + * from PVH early boot code before instrumentation is set up + * and memcmp() itself may be instrumented. + */ + if (!__builtin_memcmp(sig, signature, 12) && + (leaves == 0 || ((eax - base) >= leaves))) + return base; + } + + return 0; +} + +#endif /* _ASM_X86_CPUID_API_H */ diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h new file mode 100644 index 000000000000..8582e27e836d --- /dev/null +++ b/arch/x86/include/asm/cpuid/types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUID_TYPES_H +#define _ASM_X86_CPUID_TYPES_H + +#include <linux/types.h> + +/* + * Types for raw CPUID access: + */ + +struct cpuid_regs { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +}; + +enum cpuid_regs_idx { + CPUID_EAX = 0, + CPUID_EBX, + CPUID_ECX, + CPUID_EDX, +}; + +#define CPUID_LEAF_MWAIT 0x05 +#define CPUID_LEAF_DCA 0x09 +#define CPUID_LEAF_XSTATE 0x0d +#define CPUID_LEAF_TSC 0x15 +#define CPUID_LEAF_FREQ 0x16 +#define CPUID_LEAF_TILE 0x1d + +#endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 4acfd57de8f1..70f6b60ad67b 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPUMASK_H #define _ASM_X86_CPUMASK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> extern void setup_cpu_local_masks(void); @@ -34,5 +34,5 @@ static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp #define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index bf5953883ec3..cc4a3f725b37 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -5,52 +5,28 @@ #include <linux/build_bug.h> #include <linux/compiler.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cache.h> #include <asm/percpu.h> struct task_struct; -struct pcpu_hot { - union { - struct { - struct task_struct *current_task; - int preempt_count; - int cpu_number; -#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING - u64 call_depth; -#endif - unsigned long top_of_stack; - void *hardirq_stack_ptr; - u16 softirq_pending; -#ifdef CONFIG_X86_64 - bool hardirq_stack_inuse; -#else - void *softirq_stack_ptr; -#endif - }; - u8 pad[64]; - }; -}; -static_assert(sizeof(struct pcpu_hot) == 64); - -DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); - -/* const-qualified alias to pcpu_hot, aliased by linker. */ -DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override, - const_pcpu_hot); +DECLARE_PER_CPU_CACHE_HOT(struct task_struct *, current_task); +/* const-qualified alias provided by the linker. */ +DECLARE_PER_CPU_CACHE_HOT(struct task_struct * const __percpu_seg_override, + const_current_task); static __always_inline struct task_struct *get_current(void) { if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return this_cpu_read_const(const_pcpu_hot.current_task); + return this_cpu_read_const(const_current_task); - return this_cpu_read_stable(pcpu_hot.current_task); + return this_cpu_read_stable(current_task); } #define current get_current() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 62dc9f59ea76..ec95fe44fa3a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -46,7 +46,6 @@ struct gdt_page { } __attribute__((aligned(PAGE_SIZE))); DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); -DECLARE_INIT_PER_CPU(gdt_page); /* Provide the original GDT */ static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu) diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index d440a65af8f3..7e6b9314758a 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -58,7 +58,7 @@ #define DESC_USER (_DESC_DPL(3)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -166,7 +166,7 @@ struct desc_ptr { unsigned long address; } __attribute__((packed)) ; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Boot IDT definitions */ #define BOOT_IDT_ENTRIES 32 diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h deleted file mode 100644 index c492bdc97b05..000000000000 --- a/arch/x86/include/asm/disabled-features.h +++ /dev/null @@ -1,161 +0,0 @@ -#ifndef _ASM_X86_DISABLED_FEATURES_H -#define _ASM_X86_DISABLED_FEATURES_H - -/* These features, although they might be available in a CPU - * will not be used because the compile options to support - * them are not present. - * - * This code allows them to be checked and disabled at - * compile time without an explicit #ifdef. Use - * cpu_feature_enabled(). - */ - -#ifdef CONFIG_X86_UMIP -# define DISABLE_UMIP 0 -#else -# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) -#endif - -#ifdef CONFIG_X86_64 -# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) -# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) -# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) -# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) -# define DISABLE_PCID 0 -#else -# define DISABLE_VME 0 -# define DISABLE_K6_MTRR 0 -# define DISABLE_CYRIX_ARR 0 -# define DISABLE_CENTAUR_MCR 0 -# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -# define DISABLE_PKU 0 -# define DISABLE_OSPKE 0 -#else -# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) -# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ - -#ifdef CONFIG_X86_5LEVEL -# define DISABLE_LA57 0 -#else -# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#endif - -#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION -# define DISABLE_PTI 0 -#else -# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) -#endif - -#ifdef CONFIG_MITIGATION_RETPOLINE -# define DISABLE_RETPOLINE 0 -#else -# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) -#endif - -#ifdef CONFIG_MITIGATION_RETHUNK -# define DISABLE_RETHUNK 0 -#else -# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) -#endif - -#ifdef CONFIG_MITIGATION_UNRET_ENTRY -# define DISABLE_UNRET 0 -#else -# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) -#endif - -#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING -# define DISABLE_CALL_DEPTH_TRACKING 0 -#else -# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) -#endif - -#ifdef CONFIG_ADDRESS_MASKING -# define DISABLE_LAM 0 -#else -# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31)) -#endif - -#ifdef CONFIG_INTEL_IOMMU_SVM -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif - -#ifdef CONFIG_X86_SGX -# define DISABLE_SGX 0 -#else -# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) -#endif - -#ifdef CONFIG_XEN_PV -# define DISABLE_XENPV 0 -#else -# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31)) -#endif - -#ifdef CONFIG_INTEL_TDX_GUEST -# define DISABLE_TDX_GUEST 0 -#else -# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) -#endif - -#ifdef CONFIG_X86_USER_SHADOW_STACK -#define DISABLE_USER_SHSTK 0 -#else -#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31)) -#endif - -#ifdef CONFIG_X86_KERNEL_IBT -#define DISABLE_IBT 0 -#else -#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) -#endif - -#ifdef CONFIG_X86_FRED -# define DISABLE_FRED 0 -#else -# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31)) -#endif - -#ifdef CONFIG_KVM_AMD_SEV -#define DISABLE_SEV_SNP 0 -#else -#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31)) -#endif - -/* - * Make sure to add features to the correct mask - */ -#define DISABLED_MASK0 (DISABLE_VME) -#define DISABLED_MASK1 0 -#define DISABLED_MASK2 0 -#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 (DISABLE_PCID) -#define DISABLED_MASK5 0 -#define DISABLED_MASK6 0 -#define DISABLED_MASK7 (DISABLE_PTI) -#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST) -#define DISABLED_MASK9 (DISABLE_SGX) -#define DISABLED_MASK10 0 -#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ - DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK) -#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM) -#define DISABLED_MASK13 0 -#define DISABLED_MASK14 0 -#define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ - DISABLE_ENQCMD) -#define DISABLED_MASK17 0 -#define DISABLED_MASK18 (DISABLE_IBT) -#define DISABLED_MASK19 (DISABLE_SEV_SNP) -#define DISABLED_MASK20 0 -#define DISABLED_MASK21 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) - -#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 430fca13bb56..302e11b15da8 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_DWARF2_H #define _ASM_X86_DWARF2_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1fb83d47711f..128602612eca 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -54,8 +54,9 @@ typedef struct user_i387_struct elf_fpregset_t; #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ #define R_X86_64_RELATIVE 8 /* Adjust by program base */ -#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative - offset to GOT */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */ +#define R_X86_64_GOTPCRELX 41 +#define R_X86_64_REX_GOTPCRELX 42 #define R_X86_64_32 10 /* Direct 32 bit zero extended */ #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ #define R_X86_64_16 12 /* Direct 16 bit zero extended */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index d0dcefb5cc59..4519c9f35ba0 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -31,7 +31,7 @@ /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ #define FIXMAP_PMD_TOP 507 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> #include <asm/apicdef.h> #include <asm/page.h> @@ -196,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index fb42659f6e98..0ab65073c1cc 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -11,7 +11,7 @@ #ifdef CONFIG_FRAME_POINTER -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro FRAME_BEGIN push %_ASM_BP @@ -51,7 +51,7 @@ .endm #endif /* CONFIG_X86_64 */ -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #define FRAME_BEGIN \ "push %" _ASM_BP "\n" \ @@ -82,18 +82,18 @@ static inline unsigned long encode_frame_pointer(struct pt_regs *regs) #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define FRAME_OFFSET __ASM_SEL(4, 8) #else /* !CONFIG_FRAME_POINTER */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro ENCODE_FRAME_POINTER ptregs_offset=0 .endm -#else /* !__ASSEMBLY */ +#else /* !__ASSEMBLER__ */ #define ENCODE_FRAME_POINTER diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 25ca00bd70e8..2a29e5216881 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -32,7 +32,7 @@ #define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9) #define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_FRED #include <linux/kernel.h> @@ -113,6 +113,6 @@ static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { static inline void fred_sync_rsp0(unsigned long rsp0) { } static inline void fred_update_rsp0(void) { } #endif /* CONFIG_X86_FRED */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* ASM_X86_FRED_H */ diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h index 9e7e8ca8e299..02f239569b93 100644 --- a/arch/x86/include/asm/fsgsbase.h +++ b/arch/x86/include/asm/fsgsbase.h @@ -2,7 +2,7 @@ #ifndef _ASM_FSGSBASE_H #define _ASM_FSGSBASE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_64 @@ -80,6 +80,6 @@ extern unsigned long x86_fsgsbase_read_task(struct task_struct *task, #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index f9cb4d07df58..93156ac4ffe0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -22,7 +22,7 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) @@ -36,21 +36,9 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) { -#ifdef CONFIG_X86_KERNEL_IBT - u32 instr; - - /* We want to be extra safe in case entry ip is on the page edge, - * but otherwise we need to avoid get_kernel_nofault()'s overhead. - */ - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) - return fentry_ip; - } else { - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); - } - if (is_endbr(instr)) + if (is_endbr((void*)(fentry_ip - ENDBR_INSN_SIZE))) fentry_ip -= ENDBR_INSN_SIZE; -#endif + return fentry_ip; } #define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip) @@ -118,11 +106,11 @@ struct dyn_arch_ftrace { }; #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void prepare_ftrace_return(unsigned long ip, unsigned long *parent, unsigned long frame_pointer); @@ -166,6 +154,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ #endif /* !COMPILE_OFFSETS */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 6ffa8b75f4cd..f00c09ffe6a9 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -3,7 +3,6 @@ #define _ASM_X86_HARDIRQ_H #include <linux/threads.h> -#include <asm/current.h> typedef struct { #if IS_ENABLED(CONFIG_KVM_INTEL) @@ -66,7 +65,8 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat -#define local_softirq_pending_ref pcpu_hot.softirq_pending +DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending); +#define local_softirq_pending_ref __softirq_pending #if IS_ENABLED(CONFIG_KVM_INTEL) /* diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index edebf1020e04..162ebd73a698 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -16,7 +16,7 @@ #include <asm/irq_vectors.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/percpu.h> #include <linux/profile.h> @@ -128,6 +128,6 @@ extern char spurious_entries_start[]; typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -#endif /* !ASSEMBLY_ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_HW_IRQ_H */ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 1e59581d500c..28d845257303 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -21,7 +21,7 @@ #define HAS_KERNEL_IBT 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_64 #define ASM_ENDBR "endbr64\n\t" @@ -41,7 +41,7 @@ _ASM_PTR fname "\n\t" \ ".popsection\n\t" -static inline __attribute_const__ u32 gen_endbr(void) +static __always_inline __attribute_const__ u32 gen_endbr(void) { u32 endbr; @@ -56,7 +56,7 @@ static inline __attribute_const__ u32 gen_endbr(void) return endbr; } -static inline __attribute_const__ u32 gen_endbr_poison(void) +static __always_inline __attribute_const__ u32 gen_endbr_poison(void) { /* * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it @@ -65,19 +65,24 @@ static inline __attribute_const__ u32 gen_endbr_poison(void) return 0x001f0f66; /* osp nopl (%rax) */ } -static inline bool is_endbr(u32 val) +static inline bool __is_endbr(u32 val) { if (val == gen_endbr_poison()) return true; + /* See cfi_fineibt_bhi_preamble() */ + if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5) + return true; + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ return val == gen_endbr(); } +extern __noendbr bool is_endbr(u32 *val); extern __noendbr u64 ibt_save(bool disable); extern __noendbr void ibt_restore(u64 save); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #ifdef CONFIG_X86_64 #define ENDBR endbr64 @@ -85,29 +90,29 @@ extern __noendbr void ibt_restore(u64 save); #define ENDBR endbr32 #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* !IBT */ #define HAS_KERNEL_IBT 0 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define ASM_ENDBR #define IBT_NOSEAL(name) #define __noendbr -static inline bool is_endbr(u32 val) { return false; } +static inline bool is_endbr(u32 *val) { return false; } static inline u64 ibt_save(bool disable) { return 0; } static inline void ibt_restore(u64 save) { } -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define ENDBR -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_X86_KERNEL_IBT */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index ad5c68f0509d..a4ec27c67988 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -7,7 +7,7 @@ #define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/entry-common.h> #include <linux/hardirq.h> @@ -474,7 +474,7 @@ static inline void fred_install_sysvec(unsigned int vector, const idtentry_t fun idt_install_sysvec(vector, asm_##function); \ } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ /* * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. @@ -579,7 +579,7 @@ SYM_CODE_START(spurious_entries_start) SYM_CODE_END(spurious_entries_start) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * The actual entry points. Note that DECLARE_IDTENTRY*() serves two diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 0e82ebc5d1e1..8b1b1abcef15 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,7 +2,11 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H +#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000 +#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector +#else #define __head __section(".head.text") __no_sanitize_undefined +#endif struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 438ccd4f3cc4..e48a00b3311d 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -6,7 +6,7 @@ #ifndef X86_ASM_INST_H #define X86_ASM_INST_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define REG_NUM_INVALID 100 diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 6d7b04ffc5fd..3a97a7eefb51 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -45,7 +45,18 @@ /* Wildcard match so X86_MATCH_VFM(ANY) works */ #define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) +/* Family 5 */ +#define INTEL_FAM5_START IFM(5, 0x00) /* Notational marker, also P5 A-step */ +#define INTEL_PENTIUM_75 IFM(5, 0x02) /* P54C */ +#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */ +#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ + +/* Family 6 */ #define INTEL_PENTIUM_PRO IFM(6, 0x01) +#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03) +#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05) +#define INTEL_PENTIUM_III_TUALATIN IFM(6, 0x0B) +#define INTEL_PENTIUM_M_DOTHAN IFM(6, 0x0D) #define INTEL_CORE_YONAH IFM(6, 0x0E) @@ -110,9 +121,9 @@ #define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */ -#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) +#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) /* Raptor Cove */ -#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) +#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */ #define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) /* "Hybrid" Processors (P-Core/E-Core) */ @@ -126,16 +137,16 @@ #define INTEL_RAPTORLAKE_P IFM(6, 0xBA) #define INTEL_RAPTORLAKE_S IFM(6, 0xBF) -#define INTEL_METEORLAKE IFM(6, 0xAC) +#define INTEL_METEORLAKE IFM(6, 0xAC) /* Redwood Cove / Crestmont */ #define INTEL_METEORLAKE_L IFM(6, 0xAA) -#define INTEL_ARROWLAKE_H IFM(6, 0xC5) +#define INTEL_ARROWLAKE_H IFM(6, 0xC5) /* Lion Cove / Skymont */ #define INTEL_ARROWLAKE IFM(6, 0xC6) #define INTEL_ARROWLAKE_U IFM(6, 0xB5) -#define INTEL_LUNARLAKE_M IFM(6, 0xBD) +#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */ -#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */ /* "Small Core" Processors (Atom/E-Core) */ @@ -149,9 +160,9 @@ #define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */ #define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */ #define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */ +#define INTEL_ATOM_SILVERMONT_MID2 IFM(6, 0x5A) /* Anniedale */ #define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */ -#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */ #define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */ #define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */ @@ -176,16 +187,35 @@ #define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */ #define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ -/* Family 5 */ -#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ +/* Notational marker denoting the last Family 6 model */ +#define INTEL_FAM6_LAST IFM(6, 0xFF) + +/* Family 15 - NetBurst */ +#define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */ +#define INTEL_P4_PRESCOTT IFM(15, 0x03) +#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04) +#define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */ /* Family 19 */ #define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ -/* CPU core types */ +/* + * Intel CPU core types + * + * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture + * of the core. Bits 31-24 indicates its core type (Core or Atom) + * and Bits [23:0] indicates the native model ID of the core. + * Core type and native model ID are defined in below enumerations. + */ enum intel_cpu_type { + INTEL_CPU_TYPE_UNKNOWN, INTEL_CPU_TYPE_ATOM = 0x20, INTEL_CPU_TYPE_CORE = 0x40, }; +enum intel_native_id { + INTEL_ATOM_CMT_NATIVE_ID = 0x2, /* Crestmont */ + INTEL_ATOM_SKT_NATIVE_ID = 0x3, /* Skymont */ +}; + #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index ed580c7f9d0a..1a0dc2b2bf5b 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -175,6 +175,9 @@ extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, un extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags); +#define arch_memremap_wb arch_memremap_wb + /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 562a547c29a5..735c3a491f60 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -116,7 +116,7 @@ ASM_CALL_ARG2 #define call_on_irqstack(func, asm_call, argconstr...) \ - call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \ + call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ func, asm_call, argconstr) /* Macros to assert type correctness for run_*_on_irqstack macros */ @@ -135,7 +135,7 @@ * User mode entry and interrupt on the irq stack do not \ * switch stacks. If from user mode the task stack is empty. \ */ \ - if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \ + if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ irq_enter_rcu(); \ func(c_args); \ irq_exit_rcu(); \ @@ -146,9 +146,9 @@ * places. Invoke the stack switch macro with the call \ * sequence which matches the above direct invocation. \ */ \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ + __this_cpu_write(hardirq_stack_inuse, true); \ call_on_irqstack(func, asm_call, constr); \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ + __this_cpu_write(hardirq_stack_inuse, false); \ } \ } @@ -212,9 +212,9 @@ */ #define do_softirq_own_stack() \ { \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ + __this_cpu_write(hardirq_stack_inuse, true); \ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ - __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ + __this_cpu_write(hardirq_stack_inuse, false); \ } #endif diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index cf7fc2b8e3ce..abb8374c9ff7 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -4,7 +4,7 @@ #include <asm/processor-flags.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/nospec-branch.h> @@ -79,7 +79,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags) #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> static __always_inline unsigned long arch_local_save_flags(void) @@ -133,10 +133,10 @@ static __always_inline unsigned long arch_local_irq_save(void) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_PARAVIRT_XXL */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); @@ -154,6 +154,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) if (!arch_irqs_disabled_flags(flags)) arch_local_irq_enable(); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 3f1c1d6c0da1..61dd1dee7812 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -7,7 +7,7 @@ #include <asm/asm.h> #include <asm/nops.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stringify.h> #include <linux/types.h> @@ -55,6 +55,6 @@ l_yes: extern int arch_jump_entry_size(struct jump_entry *entry); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index de75306b932e..d7e33c7f096b 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -23,7 +23,7 @@ (1ULL << (__VIRTUAL_MASK_SHIFT - \ KASAN_SHADOW_SCALE_SHIFT))) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_KASAN void __init kasan_early_init(void); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 8ad187462b68..5432457d2338 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -13,11 +13,12 @@ # define KEXEC_CONTROL_PAGE_SIZE 4096 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/string.h> #include <linux/kernel.h> +#include <asm/asm.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -71,41 +72,32 @@ static inline void crash_setup_regs(struct pt_regs *newregs, if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); } else { + asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx)); + asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx)); + asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx)); + asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si)); + asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di)); + asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp)); + asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax)); + asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp)); +#ifdef CONFIG_X86_64 + asm volatile("mov %%r8,%0" : "=m"(newregs->r8)); + asm volatile("mov %%r9,%0" : "=m"(newregs->r9)); + asm volatile("mov %%r10,%0" : "=m"(newregs->r10)); + asm volatile("mov %%r11,%0" : "=m"(newregs->r11)); + asm volatile("mov %%r12,%0" : "=m"(newregs->r12)); + asm volatile("mov %%r13,%0" : "=m"(newregs->r13)); + asm volatile("mov %%r14,%0" : "=m"(newregs->r14)); + asm volatile("mov %%r15,%0" : "=m"(newregs->r15)); +#endif + asm volatile("mov %%ss,%k0" : "=a"(newregs->ss)); + asm volatile("mov %%cs,%k0" : "=a"(newregs->cs)); #ifdef CONFIG_X86_32 - asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); - asm volatile("movl %%ecx,%0" : "=m"(newregs->cx)); - asm volatile("movl %%edx,%0" : "=m"(newregs->dx)); - asm volatile("movl %%esi,%0" : "=m"(newregs->si)); - asm volatile("movl %%edi,%0" : "=m"(newregs->di)); - asm volatile("movl %%ebp,%0" : "=m"(newregs->bp)); - asm volatile("movl %%eax,%0" : "=m"(newregs->ax)); - asm volatile("movl %%esp,%0" : "=m"(newregs->sp)); - asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); - asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); - asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds)); - asm volatile("movl %%es, %%eax;" :"=a"(newregs->es)); - asm volatile("pushfl; popl %0" :"=m"(newregs->flags)); -#else - asm volatile("movq %%rbx,%0" : "=m"(newregs->bx)); - asm volatile("movq %%rcx,%0" : "=m"(newregs->cx)); - asm volatile("movq %%rdx,%0" : "=m"(newregs->dx)); - asm volatile("movq %%rsi,%0" : "=m"(newregs->si)); - asm volatile("movq %%rdi,%0" : "=m"(newregs->di)); - asm volatile("movq %%rbp,%0" : "=m"(newregs->bp)); - asm volatile("movq %%rax,%0" : "=m"(newregs->ax)); - asm volatile("movq %%rsp,%0" : "=m"(newregs->sp)); - asm volatile("movq %%r8,%0" : "=m"(newregs->r8)); - asm volatile("movq %%r9,%0" : "=m"(newregs->r9)); - asm volatile("movq %%r10,%0" : "=m"(newregs->r10)); - asm volatile("movq %%r11,%0" : "=m"(newregs->r11)); - asm volatile("movq %%r12,%0" : "=m"(newregs->r12)); - asm volatile("movq %%r13,%0" : "=m"(newregs->r13)); - asm volatile("movq %%r14,%0" : "=m"(newregs->r14)); - asm volatile("movq %%r15,%0" : "=m"(newregs->r15)); - asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); - asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); - asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); + asm volatile("mov %%ds,%k0" : "=a"(newregs->ds)); + asm volatile("mov %%es,%k0" : "=a"(newregs->es)); #endif + asm volatile("pushf\n\t" + "pop %0" : "=m"(newregs->flags)); newregs->ip = _THIS_IP_; } } @@ -225,6 +217,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void); #define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index dc31b13b87a0..b51d8a4673f5 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -38,7 +38,7 @@ #define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN) #define SYM_F_ALIGN __FUNC_ALIGN -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk @@ -50,7 +50,7 @@ #endif #endif /* CONFIG_MITIGATION_RETPOLINE */ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" @@ -62,7 +62,7 @@ #endif #endif /* CONFIG_MITIGATION_RETPOLINE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the @@ -119,33 +119,27 @@ /* SYM_FUNC_START -- use for global functions */ #define SYM_FUNC_START(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ - ENDBR + SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ #define SYM_FUNC_START_NOALIGN(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ - ENDBR + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) /* SYM_FUNC_START_LOCAL -- use for local functions */ #define SYM_FUNC_START_LOCAL(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \ - ENDBR + SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ #define SYM_FUNC_START_LOCAL_NOALIGN(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ - ENDBR + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) /* SYM_FUNC_START_WEAK -- use for weak functions */ #define SYM_FUNC_START_WEAK(name) \ - SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \ - ENDBR + SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ #define SYM_FUNC_START_WEAK_NOALIGN(name) \ - SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ - ENDBR + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index f922b682b9b4..1530ee301dfe 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -10,7 +10,7 @@ #ifndef __X86_MEM_ENCRYPT_H__ #define __X86_MEM_ENCRYPT_H__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/init.h> #include <linux/cc_platform.h> @@ -114,6 +114,6 @@ void add_encrypt_protection_map(void); extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 3b496cdcb74b..8b8055a8eb9e 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -69,6 +69,18 @@ typedef struct { u16 pkey_allocation_map; s16 execute_only_pkey; #endif + +#ifdef CONFIG_BROADCAST_TLB_FLUSH + /* + * The global ASID will be a non-zero value when the process has + * the same ASID across all CPUs, allowing it to make use of + * hardware-assisted remote TLB invalidation like AMD INVLPGB. + */ + u16 global_asid; + + /* The process is transitioning to a new global ASID number. */ + bool asid_transition; +#endif } mm_context_t; #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 795fdd53bd0a..2398058b6e83 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -2,7 +2,6 @@ #ifndef _ASM_X86_MMU_CONTEXT_H #define _ASM_X86_MMU_CONTEXT_H -#include <asm/desc.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/pkeys.h> @@ -13,6 +12,7 @@ #include <asm/paravirt.h> #include <asm/debugreg.h> #include <asm/gsseg.h> +#include <asm/desc.h> extern atomic64_t last_mm_ctx_id; @@ -139,6 +139,11 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm) #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +#define mm_init_global_asid mm_init_global_asid +extern void mm_init_global_asid(struct mm_struct *mm); + +extern void mm_free_global_asid(struct mm_struct *mm); + /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). @@ -161,6 +166,8 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif + + mm_init_global_asid(mm); mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; @@ -170,6 +177,7 @@ static inline int init_new_context(struct task_struct *tsk, static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); + mm_free_global_asid(mm); } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f91ab1e75f9f..5e6193dbc97e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -77,11 +77,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_tdx_hypercall(control, input_address, output_address); if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[output_address], %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address) + : [output_address] "r" (output_address) : "cc", "memory", "r8", "r9", "r10", "r11"); return hv_status; } @@ -89,12 +89,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hv_hypercall_pg) return U64_MAX; - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[output_address], %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address), - THUNK_TARGET(hv_hypercall_pg) + : [output_address] "r" (output_address), + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); #else u32 input_address_hi = upper_32_bits(input_address); @@ -187,18 +187,18 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) return hv_tdx_hypercall(control, input1, input2); if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[input2], %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "r" (input2) + : [input2] "r" (input2) : "cc", "r8", "r9", "r10", "r11"); } else { - __asm__ __volatile__("mov %4, %%r8\n" + __asm__ __volatile__("mov %[input2], %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "r" (input2), + : [input2] "r" (input2), THUNK_TARGET(hv_hypercall_pg) : "cc", "r8", "r9", "r10", "r11"); } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 72765b2fe0d8..1aacd6b68fab 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -25,6 +25,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) @@ -34,6 +35,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_TCE (1<<_EFER_TCE) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 001853541f1e..9397a319d165 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -4,7 +4,7 @@ #include "msr-index.h" -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/errno.h> @@ -397,5 +397,5 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) return wrmsr_safe_regs(regs); } #endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_MSR_H */ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 41a0ebb699ec..f677382093f3 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -56,6 +56,8 @@ int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); +void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler); + void stop_nmi(void); void restart_nmi(void); void local_touch_nmi(void); diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 1c1b7550fa55..cd94221d8335 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -82,7 +82,7 @@ #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const unsigned char * const x86_nops[]; #endif diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index aee26bb8230f..804b66a7686a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -12,7 +12,6 @@ #include <asm/msr-index.h> #include <asm/unwind_hints.h> #include <asm/percpu.h> -#include <asm/current.h> /* * Call depth tracking for Intel SKL CPUs to address the RSB underflow @@ -78,21 +77,21 @@ #include <asm/asm-offsets.h> #define CREDIT_CALL_DEPTH \ - movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); + movq $-1, PER_CPU_VAR(__x86_call_depth); #define RESET_CALL_DEPTH \ xor %eax, %eax; \ bts $63, %rax; \ - movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); + movq %rax, PER_CPU_VAR(__x86_call_depth); #define RESET_CALL_DEPTH_FROM_CALL \ movb $0xfc, %al; \ shl $56, %rax; \ - movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + movq %rax, PER_CPU_VAR(__x86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #define INCREMENT_CALL_DEPTH \ - sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + sarq $5, PER_CPU_VAR(__x86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #else @@ -177,7 +176,7 @@ add $(BITS_PER_LONG/8), %_ASM_SP; \ lfence; -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions @@ -335,7 +334,7 @@ #define CLEAR_BRANCH_HISTORY_VMEXIT #endif -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; @@ -387,6 +386,8 @@ extern void call_depth_return_thunk(void); __stringify(INCREMENT_CALL_DEPTH), \ X86_FEATURE_CALL_DEPTH) +DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); + #ifdef CONFIG_CALL_THUNKS_DEBUG DECLARE_PER_CPU(u64, __x86_call_count); DECLARE_PER_CPU(u64, __x86_ret_count); @@ -602,6 +603,6 @@ static __always_inline void mds_idle_clear_cpu_buffers(void) mds_clear_cpu_buffers(); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 46d7e06763c9..e0125afa53fb 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -45,7 +45,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/byteorder.h> /* @@ -73,6 +73,6 @@ struct orc_entry { #endif } __packed; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index c9fe207916f4..9265f2fca99a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -14,7 +14,7 @@ #include <asm/page_32.h> #endif /* CONFIG_X86_64 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct page; @@ -84,7 +84,7 @@ static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits) return __canonical_address(vaddr, vaddr_bits) == vaddr; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 580d71aca65a..0c623706cb7e 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -4,7 +4,7 @@ #include <asm/page_32_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) #ifdef CONFIG_DEBUG_VIRTUAL @@ -26,6 +26,6 @@ static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index faf9cc1c14bb..a9b62e0e6f79 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -11,8 +11,8 @@ * a virtual address space of one gigabyte, which limits the * amount of physical memory you can use to about 950MB. * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. + * If you want more physical memory than this then see the CONFIG_VMSPLIT_2G + * and CONFIG_HIGHMEM4G options in the kernel configuration. */ #define __PAGE_OFFSET_BASE _AC(CONFIG_PAGE_OFFSET, UL) #define __PAGE_OFFSET __PAGE_OFFSET_BASE @@ -63,7 +63,7 @@ */ #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This much address space is reserved for vmalloc() and iomap() @@ -75,6 +75,6 @@ extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); extern void setup_bootmem_allocator(void); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_32_DEFS_H */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index d63576608ce7..d3aab6f4e59a 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -4,7 +4,7 @@ #include <asm/page_64_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -55,11 +55,12 @@ static inline void clear_page(void *page) clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), - "D" (page) - : "cc", "memory", "rax", "rcx"); + "D" (page), + "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); +KCFI_REFERENCE(copy_page); #ifdef CONFIG_X86_5LEVEL /* @@ -94,7 +95,7 @@ static __always_inline unsigned long task_size_max(void) } #endif /* CONFIG_X86_5LEVEL */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION # define __HAVE_ARCH_GATE_AREA 1 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 06ef25411d62..1faa8f88850a 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/kaslr.h> #endif diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 974688973cf6..9f77bf03d747 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -43,7 +43,7 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif /* CONFIG_X86_64 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK extern phys_addr_t physical_mask; @@ -66,6 +66,6 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); extern void initmem_init(void); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 041aff51eb50..bed346bfac89 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -6,7 +6,7 @@ #include <asm/paravirt_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct mm_struct; #endif @@ -15,7 +15,7 @@ struct mm_struct; #include <asm/asm.h> #include <asm/nospec-branch.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bug.h> #include <linux/types.h> #include <linux/cpumask.h> @@ -91,11 +91,6 @@ static inline void __flush_tlb_multi(const struct cpumask *cpumask, PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); } -static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); -} - static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { PVOP_VCALL1(mmu.exit_mmap, mm); @@ -720,7 +715,7 @@ static __always_inline unsigned long arch_local_irq_save(void) extern void default_banner(void); void native_pv_lock_init(void) __init; -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL @@ -740,18 +735,18 @@ void native_pv_lock_init(void) __init; #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void native_pv_lock_init(void) { } #endif #endif /* !CONFIG_PARAVIRT */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_enter_mmap(struct mm_struct *mm) { @@ -769,5 +764,5 @@ static inline void paravirt_set_cap(void) { } #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index fea56b04f436..62912023b46f 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -4,7 +4,7 @@ #ifdef CONFIG_PARAVIRT -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/desc_defs.h> @@ -134,8 +134,6 @@ struct pv_mmu_ops { void (*flush_tlb_multi)(const struct cpumask *cpus, const struct flush_tlb_info *info); - void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); - /* Hook for intercepting the destruction of an mm_struct. */ void (*exit_mmap)(struct mm_struct *mm); void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc); @@ -242,9 +240,17 @@ extern struct paravirt_patch_template pv_ops; #define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op) -int paravirt_disable_iospace(void); - -/* This generates an indirect call based on the operation type number. */ +/* + * This generates an indirect call based on the operation type number. + * + * Since alternatives run after enabling CET/IBT -- the latter setting/clearing + * capabilities and the former requiring all capabilities being finalized -- + * these indirect calls are subject to IBT and the paravirt stubs should have + * ENDBR on. + * + * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs + * don't need to bother with CFI prefixes. + */ #define PARAVIRT_CALL \ ANNOTATE_RETPOLINE_SAFE \ "call *%[paravirt_opptr];" @@ -519,7 +525,7 @@ unsigned long pv_native_read_cr2(void); #define paravirt_nop ((void *)nop_func) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 08f5f61690b7..105db2d33c7b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -10,7 +10,7 @@ # define __percpu_rel #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_SMP # define __percpu %__percpu_seg: @@ -20,14 +20,9 @@ #define PER_CPU_VAR(var) __percpu(var)__percpu_rel -#ifdef CONFIG_X86_64_SMP -# define INIT_PER_CPU_VAR(var) init_per_cpu__##var -#else -# define INIT_PER_CPU_VAR(var) var -#endif - #else /* !__ASSEMBLY__: */ +#include <linux/args.h> #include <linux/build_bug.h> #include <linux/stringify.h> #include <asm/asm.h> @@ -41,12 +36,7 @@ # define __seg_fs __attribute__((address_space(__seg_fs))) #endif -#ifdef CONFIG_X86_64 -# define __percpu_seg_override __seg_gs -#else -# define __percpu_seg_override __seg_fs -#endif - +#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg) #define __percpu_prefix "" #else /* !CONFIG_CC_HAS_NAMED_AS: */ @@ -98,22 +88,6 @@ #define __force_percpu_arg(x) __force_percpu_prefix "%" #x /* - * Initialized pointers to per-CPU variables needed for the boot - * processor need to use these macros to get the proper address - * offset from __per_cpu_load on SMP. - * - * There also must be an entry in vmlinux_64.lds.S - */ -#define DECLARE_INIT_PER_CPU(var) \ - extern typeof(var) init_per_cpu_var(var) - -#ifdef CONFIG_X86_64_SMP -# define init_per_cpu_var(var) init_per_cpu__##var -#else -# define init_per_cpu_var(var) var -#endif - -/* * For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ @@ -128,15 +102,10 @@ #define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff)) #define __pcpu_cast_8(val) ((u64)(val)) -#define __pcpu_op1_1(op, dst) op "b " dst -#define __pcpu_op1_2(op, dst) op "w " dst -#define __pcpu_op1_4(op, dst) op "l " dst -#define __pcpu_op1_8(op, dst) op "q " dst - -#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst -#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst -#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst -#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst +#define __pcpu_op_1(op) op "b " +#define __pcpu_op_2(op) op "w " +#define __pcpu_op_4(op) op "l " +#define __pcpu_op_8(op) op "q " #define __pcpu_reg_1(mod, x) mod "q" (x) #define __pcpu_reg_2(mod, x) mod "r" (x) @@ -168,7 +137,8 @@ do { \ ({ \ __pcpu_type_##size pfo_val__; \ \ - asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), "%[val]") \ + asm qual (__pcpu_op_##size("mov") \ + __percpu_arg([var]) ", %[val]" \ : [val] __pcpu_reg_##size("=", pfo_val__) \ : [var] "m" (__my_cpu_var(_var))); \ \ @@ -184,7 +154,8 @@ do { \ pto_tmp__ = (_val); \ (void)pto_tmp__; \ } \ - asm qual(__pcpu_op2_##size("mov", "%[val]", __percpu_arg([var])) \ + asm qual (__pcpu_op_##size("mov") "%[val], " \ + __percpu_arg([var]) \ : [var] "=m" (__my_cpu_var(_var)) \ : [val] __pcpu_reg_imm_##size(pto_val__)); \ } while (0) @@ -201,7 +172,8 @@ do { \ ({ \ __pcpu_type_##size pfo_val__; \ \ - asm(__pcpu_op2_##size("mov", __force_percpu_arg(a[var]), "%[val]") \ + asm(__pcpu_op_##size("mov") \ + __force_percpu_arg(a[var]) ", %[val]" \ : [val] __pcpu_reg_##size("=", pfo_val__) \ : [var] "i" (&(_var))); \ \ @@ -210,7 +182,7 @@ do { \ #define percpu_unary_op(size, qual, op, _var) \ ({ \ - asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \ + asm qual (__pcpu_op_##size(op) __percpu_arg([var]) \ : [var] "+m" (__my_cpu_var(_var))); \ }) @@ -223,7 +195,7 @@ do { \ pto_tmp__ = (_val); \ (void)pto_tmp__; \ } \ - asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \ + asm qual (__pcpu_op_##size(op) "%[val], " __percpu_arg([var]) \ : [var] "+m" (__my_cpu_var(_var)) \ : [val] __pcpu_reg_imm_##size(pto_val__)); \ } while (0) @@ -259,8 +231,8 @@ do { \ ({ \ __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \ \ - asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \ - __percpu_arg([var])) \ + asm qual (__pcpu_op_##size("xadd") "%[tmp], " \ + __percpu_arg([var]) \ : [tmp] __pcpu_reg_##size("+", paro_tmp__), \ [var] "+m" (__my_cpu_var(_var)) \ : : "memory"); \ @@ -303,8 +275,8 @@ do { \ __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \ __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ \ - asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \ - __percpu_arg([var])) \ + asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \ + __percpu_arg([var]) \ : [oval] "+a" (pco_old__), \ [var] "+m" (__my_cpu_var(_var)) \ : [nval] __pcpu_reg_##size(, pco_new__) \ @@ -320,8 +292,8 @@ do { \ __pcpu_type_##size pco_old__ = *pco_oval__; \ __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ \ - asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \ - __percpu_arg([var])) \ + asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \ + __percpu_arg([var]) \ CC_SET(z) \ : CC_OUT(z) (success), \ [oval] "+a" (pco_old__), \ @@ -579,7 +551,7 @@ do { \ * it is accessed while this_cpu_read_stable() allows the value to be cached. * this_cpu_read_stable() is more efficient and can be used if its value * is guaranteed to be valid across CPUs. The current users include - * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are + * current_task and cpu_current_top_of_stack, both of which are * actually per-thread variables implemented as per-CPU variables and * thus stable for the duration of the respective task. */ @@ -614,9 +586,9 @@ do { \ #include <asm-generic/percpu.h> /* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); +DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index dd4841231bb9..a33147520044 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -29,11 +29,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif -/* - * Flags to use when allocating a user page table page. - */ -extern gfp_t __userpte_alloc_gfp; - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Instead of one PGD, we acquire two PGDs. Being order-1, it is diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 4a12c276b181..66425424ce91 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H #define _ASM_X86_PGTABLE_2LEVEL_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> typedef unsigned long pteval_t; @@ -16,7 +16,7 @@ typedef union { pteval_t pte; pteval_t pte_low; } pte_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define SHARED_KERNEL_PMD 0 diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 80911349519e..9d5b257d44e3 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H #define _ASM_X86_PGTABLE_3LEVEL_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> typedef u64 pteval_t; @@ -25,7 +25,7 @@ typedef union { }; pmdval_t pmd; } pmd_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index a0c1525f1b6f..e12e52ae8083 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -2,7 +2,7 @@ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * A clear pte value is special, and doesn't get inverted. @@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) return val; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 593f10aabd45..7bd6bd6df4a1 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,7 +15,7 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/spinlock.h> #include <asm/x86_init.h> #include <asm/pkru.h> @@ -973,7 +973,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) } #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_X86_32 @@ -982,7 +982,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) # include <asm/pgtable_64.h> #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/log2.h> @@ -1233,12 +1233,12 @@ static inline int pgd_none(pgd_t pgd) } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern int direct_gbpages; void init_mem_mapping(void); @@ -1812,6 +1812,6 @@ bool arch_is_platform_page(u64 paddr); WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ set_pgd(pgdp, pgd); \ }) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 7d4ad8907297..b612cc57a4d3 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -13,7 +13,7 @@ * This file contains the functions and defines necessary to modify and use * the i386 page table tree. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> #include <linux/threads.h> #include <asm/paravirt.h> @@ -45,7 +45,7 @@ do { \ flush_tlb_one_kernel((vaddr)); \ } while (0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * This is used to calculate the .brk reservation for initial pagetables. diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h index b6355416a15a..921148b42967 100644 --- a/arch/x86/include/asm/pgtable_32_areas.h +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -13,7 +13,7 @@ */ #define VMALLOC_OFFSET (8 * 1024 * 1024) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern bool __vmalloc_start_set; /* set once high_memory is set */ #endif diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index d1426b64c1b9..b89f8f1194a9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -5,7 +5,7 @@ #include <linux/const.h> #include <asm/pgtable_64_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This file contains the functions and defines necessary to modify and use @@ -270,7 +270,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) #include <asm/pgtable-invert.h> -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) @@ -291,5 +291,5 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) i = i + 1 ; \ .endr -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index ec68f8369bdc..5bb782d856f2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -4,7 +4,7 @@ #include <asm/sparsemem.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/kaslr.h> @@ -44,7 +44,7 @@ static inline bool pgtable_l5_enabled(void) extern unsigned int pgdir_shift; extern unsigned int ptrs_per_p4d; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define SHARED_KERNEL_PMD 0 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4b804531b03c..b2ed8198d5cd 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -33,6 +33,7 @@ #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ +#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 #ifdef CONFIG_X86_64 @@ -64,6 +65,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K) #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) #define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) @@ -164,7 +166,7 @@ * to have the WB mode at index 0 (all bits clear). This is the default * right now and likely would break too much if changed. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum page_cache_mode { _PAGE_CACHE_MODE_WB = 0, _PAGE_CACHE_MODE_WC = 1, @@ -239,7 +241,7 @@ enum page_cache_mode { #define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) @@ -262,7 +264,7 @@ enum page_cache_mode { #define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) #define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * early identity mapping pte attrib macros. @@ -281,7 +283,7 @@ enum page_cache_mode { # include <asm/pgtable_64_types.h> #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -580,6 +582,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long page_flags); extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, unsigned long numpages); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 919909d8cb77..578441db09f0 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -4,10 +4,11 @@ #include <asm/rmwcc.h> #include <asm/percpu.h> -#include <asm/current.h> #include <linux/static_call_types.h> +DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count); + /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -23,18 +24,18 @@ */ static __always_inline int preempt_count(void) { - return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED; + return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) { int old, new; - old = raw_cpu_read_4(pcpu_hot.preempt_count); + old = raw_cpu_read_4(__preempt_count); do { new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new)); + } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new)); } /* @@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* @@ -57,17 +58,17 @@ static __always_inline void preempt_count_set(int pc) static __always_inline void set_preempt_need_resched(void) { - raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED); + raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { - raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED); + raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { - return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED); + return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); } /* @@ -76,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - raw_cpu_add_4(pcpu_hot.preempt_count, val); + raw_cpu_add_4(__preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { - raw_cpu_add_4(pcpu_hot.preempt_count, -val); + raw_cpu_add_4(__preempt_count, -val); } /* @@ -91,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e, + return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e, __percpu_arg([var])); } @@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPTION diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c0cd10182e90..7a3918308a36 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -60,18 +60,13 @@ struct vm86; # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -enum tlb_infos { - ENTRIES, - NR_INFO -}; - -extern u16 __read_mostly tlb_lli_4k[NR_INFO]; -extern u16 __read_mostly tlb_lli_2m[NR_INFO]; -extern u16 __read_mostly tlb_lli_4m[NR_INFO]; -extern u16 __read_mostly tlb_lld_4k[NR_INFO]; -extern u16 __read_mostly tlb_lld_2m[NR_INFO]; -extern u16 __read_mostly tlb_lld_4m[NR_INFO]; -extern u16 __read_mostly tlb_lld_1g[NR_INFO]; +extern u16 __read_mostly tlb_lli_4k; +extern u16 __read_mostly tlb_lli_2m; +extern u16 __read_mostly tlb_lli_4m; +extern u16 __read_mostly tlb_lld_4k; +extern u16 __read_mostly tlb_lld_2m; +extern u16 __read_mostly tlb_lld_4m; +extern u16 __read_mostly tlb_lld_1g; /* * CPU type and hardware bug flags. Kept separately for each CPU. @@ -234,7 +229,7 @@ static inline unsigned long long l1tf_pfn_limit(void) void init_cpu_devs(void); void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); -extern void identify_secondary_cpu(struct cpuinfo_x86 *); +extern void identify_secondary_cpu(unsigned int cpu); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); @@ -420,37 +415,33 @@ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); +DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); #ifdef CONFIG_X86_64 -struct fixed_percpu_data { - /* - * GCC hardcodes the stack canary as %gs:40. Since the - * irq_stack is the object at %gs:0, we reserve the bottom - * 48 bytes of the irq stack for the canary. - * - * Once we are willing to require -mstack-protector-guard-symbol= - * support for x86_64 stackprotector, we can get rid of this. - */ - char gs_base[40]; - unsigned long stack_canary; -}; +DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); +#else +DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); +#endif -DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; -DECLARE_INIT_PER_CPU(fixed_percpu_data); +DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack); +/* const-qualified alias provided by the linker. */ +DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override, + const_cpu_current_top_of_stack); +#ifdef CONFIG_X86_64 static inline unsigned long cpu_kernelmode_gs_base(int cpu) { - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); +#ifdef CONFIG_SMP + return per_cpu_offset(cpu); +#else + return 0; +#endif } extern asmlinkage void entry_SYSCALL32_ignore(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); -#else /* X86_64 */ -#ifdef CONFIG_STACKPROTECTOR -DECLARE_PER_CPU(unsigned long, __stack_chk_guard); -#endif -#endif /* !X86_64 */ +#endif /* X86_64 */ struct perf_event; @@ -561,9 +552,9 @@ static __always_inline unsigned long current_top_of_stack(void) * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return this_cpu_read_const(const_pcpu_hot.top_of_stack); + return this_cpu_read_const(const_cpu_current_top_of_stack); - return this_cpu_read_stable(pcpu_hot.top_of_stack); + return this_cpu_read_stable(cpu_current_top_of_stack); } static __always_inline bool on_thread_stack(void) @@ -668,8 +659,6 @@ static __always_inline void prefetchw(const void *x) .sysenter_cs = __KERNEL_CS, \ } -#define KSTK_ESP(task) (task_pt_regs(task)->sp) - #else extern unsigned long __top_init_kernel_stack[]; @@ -677,8 +666,6 @@ extern unsigned long __top_init_kernel_stack[]; .sp = (unsigned long)&__top_init_kernel_stack, \ } -extern unsigned long KSTK_ESP(struct task_struct *task); - #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, @@ -692,6 +679,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) #define KSTK_EIP(task) (task_pt_regs(task)->ip) +#define KSTK_ESP(task) (task_pt_regs(task)->sp) /* Get/set a process' ability to use the timestamp counter instruction */ #define GET_TSC_CTL(adr) get_tsc_mode((adr)) diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 365798cb4408..5d0dbab85264 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -8,7 +8,7 @@ #ifndef _ASM_X86_PROM_H #define _ASM_X86_PROM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/of.h> #include <linux/types.h> @@ -33,5 +33,5 @@ static inline void x86_flattree_get_config(void) { } extern char cmd_line[COMMAND_LINE_SIZE]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 484f4f0131a5..05224a695872 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -15,7 +15,6 @@ void entry_SYSCALL_64(void); void entry_SYSCALL_64_safe_stack(void); void entry_SYSRETQ_unsafe_stack(void); void entry_SYSRETQ_end(void); -long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); #endif #ifdef CONFIG_X86_32 @@ -41,6 +40,6 @@ void x86_configure_nx(void); extern int reboot_force; -long do_arch_prctl_common(int option, unsigned long arg2); +long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index ab167c96b9ab..88d0a1ab1f77 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTI_H #define _ASM_X86_PTI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION extern void pti_init(void); @@ -11,5 +11,5 @@ extern void pti_finalize(void); static inline void pti_check_boottime_disable(void) { } #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5a83fbd9bc0b..50f75467f73d 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -6,7 +6,7 @@ #include <asm/page_types.h> #include <uapi/asm/ptrace.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __i386__ struct pt_regs { @@ -469,5 +469,5 @@ extern int do_set_thread_area(struct task_struct *p, int idx, # define do_set_thread_area_64(p, s, t) (0) #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h index 5528e9325049..2fee5e9f1ccc 100644 --- a/arch/x86/include/asm/purgatory.h +++ b/arch/x86/include/asm/purgatory.h @@ -2,10 +2,10 @@ #ifndef _ASM_X86_PURGATORY_H #define _ASM_X86_PURGATORY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/purgatory.h> extern void purgatory(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 1436226efe3e..b9fece5fc96d 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_ABI_H #define _ASM_X86_PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * These structs MUST NOT be changed. @@ -44,5 +44,5 @@ struct pvclock_wall_clock { #define PVCLOCK_GUEST_STOPPED (1 << 1) /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 87e5482acd0d..f607081a022a 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -9,7 +9,7 @@ #define TH_FLAGS_SME_ACTIVE_BIT 0 #define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/io.h> @@ -95,6 +95,6 @@ void reserve_real_mode(void); void load_trampoline_pgtable(void); void init_real_mode(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h deleted file mode 100644 index e9187ddd3d1f..000000000000 --- a/arch/x86/include/asm/required-features.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#define _ASM_X86_REQUIRED_FEATURES_H - -/* Define minimum CPUID feature set for kernel These bits are checked - really early to actually display a visible error message before the - kernel dies. Make sure to assign features to the proper mask! - - Some requirements that are not in CPUID yet are also in the - CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. - - The real information is in arch/x86/Kconfig.cpu, this just converts - the CONFIGs into a bitmask */ - -#ifndef CONFIG_MATH_EMULATION -# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) -#else -# define NEED_FPU 0 -#endif - -#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) -# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) -#else -# define NEED_PAE 0 -#endif - -#ifdef CONFIG_X86_CMPXCHG64 -# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) -#else -# define NEED_CX8 0 -#endif - -#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) -# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) -#else -# define NEED_CMOV 0 -#endif - -# define NEED_3DNOW 0 - -#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) -# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) -#else -# define NEED_NOPL 0 -#endif - -#ifdef CONFIG_MATOM -# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) -#else -# define NEED_MOVBE 0 -#endif - -#ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT_XXL -/* Paravirtualized systems may not have PSE or PGE available */ -#define NEED_PSE 0 -#define NEED_PGE 0 -#else -#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) -#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) -#endif -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) -#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) -#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -#define NEED_LM (1<<(X86_FEATURE_LM & 31)) -#else -#define NEED_PSE 0 -#define NEED_MSR 0 -#define NEED_PGE 0 -#define NEED_FXSR 0 -#define NEED_XMM 0 -#define NEED_XMM2 0 -#define NEED_LM 0 -#endif - -#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ - NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ - NEED_XMM|NEED_XMM2) -#define SSE_MASK (NEED_XMM|NEED_XMM2) - -#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) - -#define REQUIRED_MASK2 0 -#define REQUIRED_MASK3 (NEED_NOPL) -#define REQUIRED_MASK4 (NEED_MOVBE) -#define REQUIRED_MASK5 0 -#define REQUIRED_MASK6 0 -#define REQUIRED_MASK7 0 -#define REQUIRED_MASK8 0 -#define REQUIRED_MASK9 0 -#define REQUIRED_MASK10 0 -#define REQUIRED_MASK11 0 -#define REQUIRED_MASK12 0 -#define REQUIRED_MASK13 0 -#define REQUIRED_MASK14 0 -#define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 0 -#define REQUIRED_MASK17 0 -#define REQUIRED_MASK18 0 -#define REQUIRED_MASK19 0 -#define REQUIRED_MASK20 0 -#define REQUIRED_MASK21 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) - -#endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 363266cbcada..3821ee3fae35 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -29,7 +29,7 @@ cc_label: c = true; \ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c; \ - asm volatile (fullop CC_SET(cc) \ + asm_inline volatile (fullop CC_SET(cc) \ : [var] "+m" (_var), CC_OUT(cc) (c) \ : __VA_ARGS__ : clobbers); \ c; \ diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 6652ebddfd02..8d983cfd06ea 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -2,6 +2,18 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef __ASSEMBLY__ + +.macro RUNTIME_CONST_PTR sym reg + movq $0x0123456789abcdef, %\reg + 1: + .pushsection runtime_ptr_\sym, "a" + .long 1b - 8 - . + .popsection +.endm + +#else /* __ASSEMBLY__ */ + #define runtime_const_ptr(sym) ({ \ typeof(sym) __ret; \ asm_inline("mov %1,%0\n1:\n" \ @@ -58,4 +70,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), } } +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 9d6411c65920..77d8f49b92bd 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -233,7 +233,7 @@ #define VDSO_CPUNODE_BITS 12 #define VDSO_CPUNODE_MASK 0xfff -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Helper functions to store/load CPU and node numbers */ @@ -265,7 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) *node = (p >> VDSO_CPUNODE_BITS); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef __KERNEL__ @@ -286,7 +286,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) */ #define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); @@ -350,7 +350,7 @@ static inline void __loadsegment_fs(unsigned short value) #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __KERNEL__ */ #endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index cc62ef70ccc0..8d9f1c9aaa4c 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -2,7 +2,6 @@ #ifndef _ASM_X86_SET_MEMORY_H #define _ASM_X86_SET_MEMORY_H -#include <linux/mm.h> #include <asm/page.h> #include <asm-generic/set_memory.h> @@ -38,7 +37,6 @@ int set_memory_rox(unsigned long addr, int numpages); * The caller is required to take care of these. */ -int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 85f4fde3515c..ad9212df0ec0 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -27,7 +27,7 @@ #define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */ #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cache.h> #include <asm/bootparam.h> @@ -46,6 +46,7 @@ void setup_bios_corruption_check(void); void early_platform_quirks(void); extern unsigned long saved_video_mode; +extern unsigned long acpi_realmode_flags; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); @@ -141,7 +142,7 @@ extern bool builtin_cmdline_added __ro_after_init; #define builtin_cmdline_added 0 #endif -#else /* __ASSEMBLY */ +#else /* __ASSEMBLER__ */ .macro __RESERVE_BRK name, size .pushsection .bss..brk, "aw" @@ -153,6 +154,6 @@ SYM_DATA_END(__brk_\name) #define RESERVE_BRK(name, size) __RESERVE_BRK name, size -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h index 77c51111a893..7bb16f843c93 100644 --- a/arch/x86/include/asm/setup_data.h +++ b/arch/x86/include/asm/setup_data.h @@ -4,7 +4,7 @@ #include <uapi/asm/setup_data.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct pci_setup_rom { struct setup_data data; @@ -27,6 +27,6 @@ struct efi_setup_data { u64 reserved[8]; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index fcbbef484a78..a28ff6b14145 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -106,7 +106,7 @@ #define TDX_PS_1G 2 #define TDX_PS_NR (TDX_PS_1G + 1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler_attributes.h> @@ -177,5 +177,5 @@ static __always_inline u64 hcall_func(u64 exit_reason) return exit_reason; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_SHARED_TDX_H */ diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h index 4cb77e004615..ba6f2fe43848 100644 --- a/arch/x86/include/asm/shstk.h +++ b/arch/x86/include/asm/shstk.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_SHSTK_H #define _ASM_X86_SHSTK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> struct task_struct; @@ -37,6 +37,6 @@ static inline int shstk_update_last_frame(unsigned long val) { return 0; } static inline bool shstk_is_enabled(void) { return false; } #endif /* CONFIG_X86_USER_SHADOW_STACK */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SHSTK_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 4a4043ca6493..c72d46175374 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_SIGNAL_H #define _ASM_X86_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> /* Most things should be clean enough to redefine this at will, if care @@ -28,9 +28,9 @@ typedef struct { #define SA_IA32_ABI 0x02000000u #define SA_X32_ABI 0x01000000u -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include <uapi/asm/signal.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_HAS_SA_RESTORER @@ -101,5 +101,5 @@ struct pt_regs; #endif /* !__i386__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SIGNAL_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 2de1e5a75c57..daea94c2993c 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -13,7 +13,7 @@ #include <asm/cpufeatures.h> #include <asm/alternative.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define ASM_CLAC \ ALTERNATIVE "", "clac", X86_FEATURE_SMAP @@ -21,7 +21,7 @@ #define ASM_STAC \ ALTERNATIVE "", "stac", X86_FEATURE_SMAP -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ static __always_inline void clac(void) { @@ -61,6 +61,6 @@ static __always_inline void smap_restore(unsigned long flags) #define ASM_STAC \ ALTERNATIVE("", "stac", X86_FEATURE_SMAP) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SMAP_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ca073f40698f..0c1c68039d6f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SMP_H #define _ASM_X86_SMP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> +#include <linux/thread_info.h> #include <asm/cpumask.h> -#include <asm/current.h> -#include <asm/thread_info.h> + +DECLARE_PER_CPU_CACHE_HOT(int, cpu_number); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -114,13 +115,12 @@ void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); void smp_kick_mwait_play_dead(void); +void __noreturn mwait_play_dead(unsigned int eax_hint); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -void smp_store_cpu_info(int id); - asmlinkage __visible void smp_reboot_interrupt(void); __visible void smp_reschedule_interrupt(struct pt_regs *regs); __visible void smp_call_function_interrupt(struct pt_regs *regs); @@ -133,14 +133,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. */ -#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number) -#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number) - -#ifdef CONFIG_X86_32 -extern int safe_smp_processor_id(void); -#else -# define safe_smp_processor_id() smp_processor_id() -#endif +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) static inline struct cpumask *cpu_llc_shared_mask(int cpu) { @@ -164,6 +158,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) { return (struct cpumask *)cpumask_of(0); } + +static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); } #endif /* CONFIG_SMP */ #ifdef CONFIG_DEBUG_NMI_SELFTEST @@ -175,7 +171,7 @@ extern void nmi_selftest(void); extern unsigned int smpboot_control; extern unsigned long apic_mmio_base; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Control bits for startup_64 */ #define STARTUP_READ_APICID 0x80000000 diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 03e7c2d49559..6266d6b9e0b8 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -42,14 +42,14 @@ static __always_inline void native_write_cr2(unsigned long val) asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); } -static inline unsigned long __native_read_cr3(void) +static __always_inline unsigned long __native_read_cr3(void) { unsigned long val; asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } -static inline void native_write_cr3(unsigned long val) +static __always_inline void native_write_cr3(unsigned long val) { asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); } @@ -176,9 +176,8 @@ static __always_inline void clflush(volatile void *__p) static inline void clflushopt(volatile void *__p) { - alternative_io(".byte 0x3e; clflush %0", - ".byte 0x66; clflush %0", - X86_FEATURE_CLFLUSHOPT, + alternative_io("ds clflush %0", + "clflushopt %0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); } @@ -186,14 +185,11 @@ static inline void clwb(volatile void *__p) { volatile struct { char x[64]; } *p = __p; - asm volatile(ALTERNATIVE_2( - ".byte 0x3e; clflush (%[pax])", - ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ - X86_FEATURE_CLFLUSHOPT, - ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ - X86_FEATURE_CLWB) - : [p] "+m" (*p) - : [pax] "a" (p)); + asm_inline volatile(ALTERNATIVE_2( + "ds clflush %0", + "clflushopt %0", X86_FEATURE_CLFLUSHOPT, + "clwb %0", X86_FEATURE_CLWB) + : "+m" (*p)); } #ifdef CONFIG_X86_USER_SHADOW_STACK diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h deleted file mode 100644 index e0975e9c4f47..000000000000 --- a/arch/x86/include/asm/sta2x11.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Header file for STMicroelectronics ConneXt (STA2X11) IOHub - */ -#ifndef __ASM_STA2X11_H -#define __ASM_STA2X11_H - -#include <linux/pci.h> - -/* This needs to be called from the MFD to configure its sub-devices */ -struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev); - -#endif /* __ASM_STA2X11_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 00473a650f51..cd761b14eb02 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -2,26 +2,10 @@ /* * GCC stack protector support. * - * Stack protector works by putting predefined pattern at the start of + * Stack protector works by putting a predefined pattern at the start of * the stack frame and verifying that it hasn't been overwritten when - * returning from the function. The pattern is called stack canary - * and unfortunately gcc historically required it to be at a fixed offset - * from the percpu segment base. On x86_64, the offset is 40 bytes. - * - * The same segment is shared by percpu area and stack canary. On - * x86_64, percpu symbols are zero based and %gs (64-bit) points to the - * base of percpu area. The first occupant of the percpu area is always - * fixed_percpu_data which contains stack_canary at the appropriate - * offset. On x86_32, the stack canary is just a regular percpu - * variable. - * - * Putting percpu data in %fs on 32-bit is a minor optimization compared to - * using %gs. Since 32-bit userspace normally has %fs == 0, we are likely - * to load 0 into %fs on exit to usermode, whereas with percpu data in - * %gs, we are likely to load a non-null %gs on return to user mode. - * - * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector - * support, we can remove some of this complexity. + * returning from the function. The pattern is called the stack canary + * and is a unique value for each task. */ #ifndef _ASM_STACKPROTECTOR_H @@ -36,6 +20,8 @@ #include <linux/sched.h> +DECLARE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard); + /* * Initialize the stackprotector canary value. * @@ -51,25 +37,13 @@ static __always_inline void boot_init_stack_canary(void) { unsigned long canary = get_random_canary(); -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); -#endif - current->stack_canary = canary; -#ifdef CONFIG_X86_64 - this_cpu_write(fixed_percpu_data.stack_canary, canary); -#else this_cpu_write(__stack_chk_guard, canary); -#endif } static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) { -#ifdef CONFIG_X86_64 - per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary; -#else per_cpu(__stack_chk_guard, cpu) = idle->stack_canary; -#endif } #else /* STACKPROTECTOR */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 9d0b324eab21..79e9695dc13e 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -21,6 +21,7 @@ extern void *__memcpy(void *to, const void *from, size_t len); #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); void *__memset(void *s, int c, size_t n); +KCFI_REFERENCE(__memset); /* * KMSAN needs to instrument as much code as possible. Use C versions of @@ -70,6 +71,7 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n) #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t count); void *__memmove(void *dest, const void *src, size_t count); +KCFI_REFERENCE(__memmove); int memcmp(const void *cs, const void *ct, size_t count); size_t strlen(const char *s); diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index b4b16dafd55e..65394aa9b49f 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -30,7 +30,7 @@ #define TDX_SUCCESS 0ULL #define TDX_RND_NO_ENTROPY 0x8000020300000000ULL -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <uapi/asm/mce.h> @@ -126,5 +126,5 @@ static inline int tdx_enable(void) { return -ENODEV; } static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } #endif /* CONFIG_INTEL_TDX_HOST */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a55c214f3ba6..9282465eea21 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -54,7 +54,7 @@ * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; #include <asm/cpufeature.h> #include <linux/atomic.h> @@ -73,7 +73,7 @@ struct thread_info { .flags = 0, \ } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #include <asm/asm-offsets.h> @@ -161,7 +161,7 @@ struct thread_info { * * preempt_count needs to be 1 initially, until the scheduler is functional. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Walks up the stack frames to make sure that the specified object is @@ -213,7 +213,7 @@ static inline int arch_within_stack_frames(const void * const stack, #endif } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Thread-synchronous status. @@ -224,7 +224,7 @@ static inline int arch_within_stack_frames(const void * const stack, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ @@ -242,6 +242,6 @@ static inline int arch_within_stack_frames(const void * const stack, extern void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 77f52bc1578a..866ea78ba156 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -6,6 +6,9 @@ static inline void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> +#include <linux/kernel.h> +#include <vdso/bits.h> +#include <vdso/page.h> static inline void tlb_flush(struct mmu_gather *tlb) { @@ -25,4 +28,139 @@ static inline void invlpg(unsigned long addr) asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); } +enum addr_stride { + PTE_STRIDE = 0, + PMD_STRIDE = 1 +}; + +/* + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination + * of the three. For example: + * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address + * - FLAG_PCID: invalidate all TLB entries matching the PCID + * + * The first is used to invalidate (kernel) mappings at a particular + * address across all processes. + * + * The latter invalidates all TLB entries matching a PCID. + */ +#define INVLPGB_FLAG_VA BIT(0) +#define INVLPGB_FLAG_PCID BIT(1) +#define INVLPGB_FLAG_ASID BIT(2) +#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3) +#define INVLPGB_FLAG_FINAL_ONLY BIT(4) +#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5) + +/* The implied mode when all bits are clear: */ +#define INVLPGB_MODE_ALL_NONGLOBALS 0UL + +#ifdef CONFIG_BROADCAST_TLB_FLUSH +/* + * INVLPGB does broadcast TLB invalidation across all the CPUs in the system. + * + * The INVLPGB instruction is weakly ordered, and a batch of invalidations can + * be done in a parallel fashion. + * + * The instruction takes the number of extra pages to invalidate, beyond the + * first page, while __invlpgb gets the more human readable number of pages to + * invalidate. + * + * The bits in rax[0:2] determine respectively which components of the address + * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any* + * address in the specified range matches. + * + * Since it is desired to only flush TLB entries for the ASID that is executing + * the instruction (a host/hypervisor or a guest), the ASID valid bit should + * always be set. On a host/hypervisor, the hardware will use the ASID value + * specified in EDX[15:0] (which should be 0). On a guest, the hardware will + * use the actual ASID value of the guest. + * + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from + * this CPU have completed. + */ +static inline void __invlpgb(unsigned long asid, unsigned long pcid, + unsigned long addr, u16 nr_pages, + enum addr_stride stride, u8 flags) +{ + u64 rax = addr | flags | INVLPGB_FLAG_ASID; + u32 ecx = (stride << 31) | (nr_pages - 1); + u32 edx = (pcid << 16) | asid; + + /* The low bits in rax are for flags. Verify addr is clean. */ + VM_WARN_ON_ONCE(addr & ~PAGE_MASK); + + /* INVLPGB; supported in binutils >= 2.36. */ + asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx)); +} + +static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) +{ + __invlpgb(asid, pcid, 0, 1, 0, flags); +} + +static inline void __tlbsync(void) +{ + /* + * TLBSYNC waits for INVLPGB instructions originating on the same CPU + * to have completed. Print a warning if the task has been migrated, + * and might not be waiting on all the INVLPGBs issued during this TLB + * invalidation sequence. + */ + cant_migrate(); + + /* TLBSYNC: supported in binutils >= 0.36. */ + asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory"); +} +#else +/* Some compilers (I'm looking at you clang!) simply can't do DCE */ +static inline void __invlpgb(unsigned long asid, unsigned long pcid, + unsigned long addr, u16 nr_pages, + enum addr_stride s, u8 flags) { } +static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { } +static inline void __tlbsync(void) { } +#endif + +static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, + unsigned long addr, + u16 nr, bool stride) +{ + enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE; + u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA; + + __invlpgb(0, pcid, addr, nr, str, flags); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) +{ + __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invlpgb_flush_all(void) +{ + /* + * TLBSYNC at the end needs to make sure all flushes done on the + * current CPU have been executed system-wide. Therefore, make + * sure nothing gets migrated in-between but disable preemption + * as it is cheaper. + */ + guard(preempt)(); + __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL); + __tlbsync(); +} + +/* Flush addr, including globals, for all PCIDs. */ +static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) +{ + __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invlpgb_flush_all_nonglobals(void) +{ + guard(preempt)(); + __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS); + __tlbsync(); +} #endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h index 1ad56eb3e8a8..80aaf64ff25f 100644 --- a/arch/x86/include/asm/tlbbatch.h +++ b/arch/x86/include/asm/tlbbatch.h @@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch { * the PFNs being flushed.. */ struct cpumask cpumask; + /* + * Set if pages were unmapped from any MM, even one that does not + * have active CPUs in its cpumask. + */ + bool unmapped_pages; }; #endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 02fc2aa06e9e..a9af8759de34 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -6,6 +6,7 @@ #include <linux/mmu_notifier.h> #include <linux/sched.h> +#include <asm/barrier.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> @@ -183,6 +184,9 @@ static inline void cr4_init_shadow(void) extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; +/* How many pages can be invalidated with one INVLPGB. */ +extern u16 invlpgb_count_max; + extern void initialize_tlbstate_and_flush(void); /* @@ -231,6 +235,71 @@ void flush_tlb_one_kernel(unsigned long addr); void flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info); +static inline bool is_dyn_asid(u16 asid) +{ + return asid < TLB_NR_DYN_ASIDS; +} + +static inline bool is_global_asid(u16 asid) +{ + return !is_dyn_asid(asid); +} + +#ifdef CONFIG_BROADCAST_TLB_FLUSH +static inline u16 mm_global_asid(struct mm_struct *mm) +{ + u16 asid; + + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return 0; + + asid = smp_load_acquire(&mm->context.global_asid); + + /* mm->context.global_asid is either 0, or a global ASID */ + VM_WARN_ON_ONCE(asid && is_dyn_asid(asid)); + + return asid; +} + +static inline void mm_init_global_asid(struct mm_struct *mm) +{ + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + mm->context.global_asid = 0; + mm->context.asid_transition = false; + } +} + +static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) +{ + /* + * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() -> + * finish_asid_transition() needs to observe asid_transition = true + * once it observes global_asid. + */ + mm->context.asid_transition = true; + smp_store_release(&mm->context.global_asid, asid); +} + +static inline void mm_clear_asid_transition(struct mm_struct *mm) +{ + WRITE_ONCE(mm->context.asid_transition, false); +} + +static inline bool mm_in_asid_transition(struct mm_struct *mm) +{ + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return false; + + return mm && READ_ONCE(mm->context.asid_transition); +} +#else +static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } +static inline void mm_init_global_asid(struct mm_struct *mm) { } +static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } +static inline void mm_clear_asid_transition(struct mm_struct *mm) { } +static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } +#endif /* CONFIG_BROADCAST_TLB_FLUSH */ + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #endif @@ -242,7 +311,7 @@ void flush_tlb_multi(const struct cpumask *cpumask, flush_tlb_mm_range((vma)->vm_mm, start, end, \ ((vma)->vm_flags & VM_HUGETLB) \ ? huge_page_shift(hstate_vma(vma)) \ - : PAGE_SHIFT, false) + : PAGE_SHIFT, true) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, @@ -284,6 +353,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + batch->unmapped_pages = true; mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 1f1deaecd364..869b88061801 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -35,8 +35,6 @@ static inline int get_si_code(unsigned long condition) return TRAP_BRKPT; } -extern int panic_on_unrecovered_nmi; - void math_emulate(struct math_emu_info *); bool fault_in_kernel_space(unsigned long address); diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 85cc57cb6539..8f4579c5a6f8 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -5,7 +5,7 @@ #include "orc_types.h" -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro UNWIND_HINT_END_OF_STACK UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK @@ -88,6 +88,6 @@ #define UNWIND_HINT_RESTORE \ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h index 2bf9c0e970c3..785f8edcb9c9 100644 --- a/arch/x86/include/asm/vdso/getrandom.h +++ b/arch/x86/include/asm/vdso/getrandom.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> @@ -37,6 +37,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void return &vdso_rng_data; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 375a34b0f365..428f3f4c2235 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -10,7 +10,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <uapi/linux/time.h> #include <asm/vgtod.h> @@ -350,6 +350,6 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, } #define vdso_calc_ns vdso_calc_ns -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h index 2cbce97d29ea..c9b2ba7a9ec4 100644 --- a/arch/x86/include/asm/vdso/processor.h +++ b/arch/x86/include/asm/vdso/processor.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static __always_inline void rep_nop(void) @@ -22,6 +22,6 @@ struct getcpu_cache; notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index 37b4a70559a8..72aedebb7648 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -9,7 +9,7 @@ #define VDSO_PAGE_PVCLOCK_OFFSET 0 #define VDSO_PAGE_HVCLOCK_OFFSET 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> #include <asm/vgtod.h> @@ -36,6 +36,6 @@ struct vdso_rng_data *__x86_get_k_vdso_rng_data(void) /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec3..5d471253c755 100644 --- a/arch/x86/include/asm/vermagic.h +++ b/arch/x86/include/asm/vermagic.h @@ -15,8 +15,6 @@ #define MODULE_PROC_FAMILY "586TSC " #elif defined CONFIG_M586MMX #define MODULE_PROC_FAMILY "586MMX " -#elif defined CONFIG_MCORE2 -#define MODULE_PROC_FAMILY "CORE2 " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -33,8 +31,6 @@ #define MODULE_PROC_FAMILY "K6 " #elif defined CONFIG_MK7 #define MODULE_PROC_FAMILY "K7 " -#elif defined CONFIG_MK8 -#define MODULE_PROC_FAMILY "K8 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index baca0b00ef76..a078a2b0f032 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -72,7 +72,7 @@ #endif #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Explicitly size integers that represent pfns in the public interface * with Xen so that on ARM we can have one ABI that works for 32 and 64 * bit guests. */ @@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t); #define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ @@ -186,7 +186,7 @@ struct arch_shared_info { uint32_t wc_sec_hi; #endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_X86_32 #include <asm/xen/interface_32.h> @@ -196,7 +196,7 @@ struct arch_shared_info { #include <asm/pvclock-abi.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. @@ -376,7 +376,7 @@ struct xen_pmu_arch { } c; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Prefix forces emulation of some non-trapping instructions. diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index dc40578abded..74d9768a9cf7 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -44,7 +44,7 @@ */ #define __HYPERVISOR_VIRT_START 0xF5800000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct cpu_user_regs { uint32_t ebx; @@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t; #define XEN_CALLBACK(__cs, __eip) \ ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index c10f279aae93..38a19edb81a3 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -77,7 +77,7 @@ #define VGCF_in_syscall (1<<_VGCF_in_syscall) #define VGCF_IN_SYSCALL VGCF_in_syscall -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct iret_context { /* Top of stack (%rsp at point of hypercall). */ @@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t; #define XEN_CALLBACK(__cs, __rip) \ ((unsigned long)(__rip)) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_XEN_INTERFACE_64_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 9b82eebd7add..dafbf581c515 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -26,7 +26,7 @@ #define XLF_5LEVEL_ENABLED (1<<6) #define XLF_MEM_ENCRYPTION (1<<7) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/screen_info.h> @@ -210,6 +210,6 @@ enum x86_hardware_subarch { X86_NR_SUBARCHS, }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 2f491efe3a12..55bc66867156 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -54,7 +54,7 @@ */ #define E820_RESERVED_KERN 128 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> struct e820entry { __u64 addr; /* start of memory segment */ @@ -76,7 +76,7 @@ struct e820map { #define BIOS_ROM_BASE 0xffe00000 #define BIOS_ROM_END 0xffffffff -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_E820_H */ diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index d62ac5db093b..a82c039d8e6a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -12,7 +12,7 @@ /* The size of each LDT entry. */ #define LDT_ENTRY_SIZE 8 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Note on 64bit base and limit is ignored and you cannot set DS/ES/CS * not to the default values if you still want to do syscalls. This @@ -44,5 +44,5 @@ struct user_desc { #define MODIFY_LDT_CONTENTS_STACK 1 #define MODIFY_LDT_CONTENTS_CODE 2 -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_LDT_H */ diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h index e7516b402a00..4b8917ca28fe 100644 --- a/arch/x86/include/uapi/asm/msr.h +++ b/arch/x86/include/uapi/asm/msr.h @@ -2,7 +2,7 @@ #ifndef _UAPI_ASM_X86_MSR_H #define _UAPI_ASM_X86_MSR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/ioctl.h> @@ -10,5 +10,5 @@ #define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8]) #define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8]) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_MSR_H */ diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h index 16074b9c93bb..5823584dea13 100644 --- a/arch/x86/include/uapi/asm/ptrace-abi.h +++ b/arch/x86/include/uapi/asm/ptrace-abi.h @@ -25,7 +25,7 @@ #else /* __i386__ */ -#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS) /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". @@ -57,7 +57,7 @@ #define EFLAGS 144 #define RSP 152 #define SS 160 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* top of stack page */ #define FRAME_SIZE 168 @@ -87,7 +87,7 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #endif diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h index 85165c0edafc..e0b5b4f6226b 100644 --- a/arch/x86/include/uapi/asm/ptrace.h +++ b/arch/x86/include/uapi/asm/ptrace.h @@ -7,7 +7,7 @@ #include <asm/processor-flags.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __i386__ /* this struct defines the way the registers are stored on the @@ -81,6 +81,6 @@ struct pt_regs { -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h index b111b0c18544..50c45ead4e7c 100644 --- a/arch/x86/include/uapi/asm/setup_data.h +++ b/arch/x86/include/uapi/asm/setup_data.h @@ -18,7 +18,7 @@ #define SETUP_INDIRECT (1<<31) #define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -78,6 +78,6 @@ struct ima_setup_data { __u64 size; } __attribute__((packed)); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_SETUP_DATA_H */ diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h index f777346450ec..1067efabf18b 100644 --- a/arch/x86/include/uapi/asm/signal.h +++ b/arch/x86/include/uapi/asm/signal.h @@ -2,7 +2,7 @@ #ifndef _UAPI_ASM_X86_SIGNAL_H #define _UAPI_ASM_X86_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/compiler.h> @@ -16,7 +16,7 @@ struct siginfo; typedef unsigned long sigset_t; #endif /* __KERNEL__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define SIGHUP 1 @@ -68,7 +68,7 @@ typedef unsigned long sigset_t; #include <asm-generic/signal-defs.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ # ifndef __KERNEL__ @@ -106,6 +106,6 @@ typedef struct sigaltstack { __kernel_size_t ss_size; } stack_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_SIGNAL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b43eb7e384eb..84cfa179802c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -44,6 +44,8 @@ KCOV_INSTRUMENT_unwind_orc.o := n KCOV_INSTRUMENT_unwind_frame.o := n KCOV_INSTRUMENT_unwind_guess.o := n +CFLAGS_head32.o := -fno-stack-protector +CFLAGS_head64.o := -fno-stack-protector CFLAGS_irq.o := -I $(src)/../include/asm/trace obj-y += head_$(BITS).o diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 5854f0b8f0f1..d5ac34186555 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -13,9 +13,11 @@ #include <linux/sched.h> #include <acpi/processor.h> +#include <asm/cpu_device_id.h> #include <asm/cpuid.h> #include <asm/mwait.h> #include <asm/special_insns.h> +#include <asm/smp.h> /* * Initialize bm_flags based on the CPU cache properties @@ -47,12 +49,11 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, /* * On all recent Intel platforms, ARB_DISABLE is a nop. * So, set bm_control to zero to indicate that ARB_DISABLE - * is not required while entering C3 type state on - * P4, Core and beyond CPUs + * is not required while entering C3 type state. */ if (c->x86_vendor == X86_VENDOR_INTEL && - (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f))) - flags->bm_control = 0; + (c->x86 > 15 || (c->x86_vfm >= INTEL_CORE2_MEROM && c->x86_vfm <= INTEL_FAM6_LAST))) + flags->bm_control = 0; if (c->x86_vendor == X86_VENDOR_CENTAUR) { if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f && @@ -205,6 +206,16 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); +void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +{ + unsigned int cpu = smp_processor_id(); + struct cstate_entry *percpu_entry; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + mwait_play_dead(percpu_entry->states[cx->index].eax); +} +EXPORT_SYMBOL_GPL(acpi_processor_ffh_play_dead); + void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/acpi/madt_playdead.S b/arch/x86/kernel/acpi/madt_playdead.S index 4e498d28cdc8..aefb9cb583ad 100644 --- a/arch/x86/kernel/acpi/madt_playdead.S +++ b/arch/x86/kernel/acpi/madt_playdead.S @@ -14,6 +14,7 @@ * rsi: PGD of the identity mapping */ SYM_FUNC_START(asm_acpi_mp_play_dead) + ANNOTATE_NOENDBR /* Turn off global entries. Following CR3 write will flush them. */ movq %cr4, %rdx andq $~(X86_CR4_PGE), %rdx diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index d5ef6215583b..f36f28405dcc 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -70,58 +70,6 @@ static void __init free_pgt_page(void *pgt, void *dummy) return memblock_free(pgt, PAGE_SIZE); } -/* - * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at - * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches - * to the identity mapping and the function has be present at the same spot in - * the virtual address space before and after switching page tables. - */ -static int __init init_transition_pgtable(pgd_t *pgd) -{ - pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; - unsigned long vaddr, paddr; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - vaddr = (unsigned long)asm_acpi_mp_play_dead; - pgd += pgd_index(vaddr); - if (!pgd_present(*pgd)) { - p4d = (p4d_t *)alloc_pgt_page(NULL); - if (!p4d) - return -ENOMEM; - set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); - } - p4d = p4d_offset(pgd, vaddr); - if (!p4d_present(*p4d)) { - pud = (pud_t *)alloc_pgt_page(NULL); - if (!pud) - return -ENOMEM; - set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); - } - pud = pud_offset(p4d, vaddr); - if (!pud_present(*pud)) { - pmd = (pmd_t *)alloc_pgt_page(NULL); - if (!pmd) - return -ENOMEM; - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - } - pmd = pmd_offset(pud, vaddr); - if (!pmd_present(*pmd)) { - pte = (pte_t *)alloc_pgt_page(NULL); - if (!pte) - return -ENOMEM; - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); - } - pte = pte_offset_kernel(pmd, vaddr); - - paddr = __pa(vaddr); - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); - - return 0; -} - static int __init acpi_mp_setup_reset(u64 reset_vector) { struct x86_mapping_info info = { @@ -130,6 +78,7 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) .page_flag = __PAGE_KERNEL_LARGE_EXEC, .kernpg_flag = _KERNPG_TABLE_NOENC, }; + unsigned long mstart, mend; pgd_t *pgd; pgd = alloc_pgt_page(NULL); @@ -137,8 +86,6 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) return -ENOMEM; for (int i = 0; i < nr_pfn_mapped; i++) { - unsigned long mstart, mend; - mstart = pfn_mapped[i].start << PAGE_SHIFT; mend = pfn_mapped[i].end << PAGE_SHIFT; if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { @@ -147,14 +94,24 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) } } - if (kernel_ident_mapping_init(&info, pgd, - PAGE_ALIGN_DOWN(reset_vector), - PAGE_ALIGN(reset_vector + 1))) { + mstart = PAGE_ALIGN_DOWN(reset_vector); + mend = mstart + PAGE_SIZE; + if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { kernel_ident_mapping_free(&info, pgd); return -ENOMEM; } - if (init_transition_pgtable(pgd)) { + /* + * Make sure asm_acpi_mp_play_dead() is present in the identity mapping + * at the same place as in the kernel page tables. + * asm_acpi_mp_play_dead() switches to the identity mapping and the + * function must be present at the same spot in the virtual address space + * before and after switching page tables. + */ + info.offset = __START_KERNEL_map - phys_base; + mstart = PAGE_ALIGN_DOWN(__pa(asm_acpi_mp_play_dead)); + mend = mstart + PAGE_SIZE; + if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { kernel_ident_mapping_free(&info, pgd); return -ENOMEM; } diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index b200a193beeb..04f561f75e99 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -17,6 +17,7 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ SYM_FUNC_START(wakeup_long64) + ANNOTATE_NOENDBR movq saved_magic(%rip), %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c71b575bf229..bf82c6f7d690 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -392,10 +392,8 @@ EXPORT_SYMBOL(BUG_func); * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)". */ -static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, - struct module *mod) +static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) { - u8 *wr_instr = module_writable_address(mod, instr); void *target, *bug = &BUG_func; s32 disp; @@ -405,14 +403,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, } if (a->instrlen != 6 || - wr_instr[0] != CALL_RIP_REL_OPCODE || - wr_instr[1] != CALL_RIP_REL_MODRM) { + instr[0] != CALL_RIP_REL_OPCODE || + instr[1] != CALL_RIP_REL_MODRM) { pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n"); BUG(); } /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */ - disp = *(s32 *)(wr_instr + 2); + disp = *(s32 *)(instr + 2); #ifdef CONFIG_X86_64 /* ff 15 00 00 00 00 call *0x0(%rip) */ /* target address is stored at "next instruction + disp". */ @@ -450,8 +448,7 @@ static inline u8 * instr_va(struct alt_instr *i) * to refetch changed I$ lines. */ void __init_or_module noinline apply_alternatives(struct alt_instr *start, - struct alt_instr *end, - struct module *mod) + struct alt_instr *end) { u8 insn_buff[MAX_PATCH_LEN]; u8 *instr, *replacement; @@ -480,7 +477,6 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { int insn_buff_sz = 0; - u8 *wr_instr, *wr_replacement; /* * In case of nested ALTERNATIVE()s the outer alternative might @@ -494,11 +490,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, } instr = instr_va(a); - wr_instr = module_writable_address(mod, instr); - replacement = (u8 *)&a->repl_offset + a->repl_offset; - wr_replacement = module_writable_address(mod, replacement); - BUG_ON(a->instrlen > sizeof(insn_buff)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); @@ -509,9 +501,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - memcpy(insn_buff, wr_instr, a->instrlen); + memcpy(insn_buff, instr, a->instrlen); optimize_nops(instr, insn_buff, a->instrlen); - text_poke_early(wr_instr, insn_buff, a->instrlen); + text_poke_early(instr, insn_buff, a->instrlen); continue; } @@ -521,12 +513,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, instr, instr, a->instrlen, replacement, a->replacementlen, a->flags); - memcpy(insn_buff, wr_replacement, a->replacementlen); + memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; if (a->flags & ALT_FLAG_DIRECT_CALL) { - insn_buff_sz = alt_replace_call(instr, insn_buff, a, - mod); + insn_buff_sz = alt_replace_call(instr, insn_buff, a); if (insn_buff_sz < 0) continue; } @@ -536,11 +527,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen); - DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(wr_instr, insn_buff, insn_buff_sz); + text_poke_early(instr, insn_buff, insn_buff_sz); } kasan_enable_current(); @@ -731,20 +722,18 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* * Generated by 'objtool --retpoline'. */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, - struct module *mod) +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op1, op2; - ret = insn_decode_kernel(&insn, wr_addr); + ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -752,6 +741,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, op2 = insn.opcode.bytes[1]; switch (op1) { + case 0x70 ... 0x7f: /* Jcc.d8 */ + /* See cfi_paranoid. */ + WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); + continue; + case CALL_INSN_OPCODE: case JMP32_INSN_OPCODE: break; @@ -772,9 +766,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(addr, bytes, len); - DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(wr_addr, bytes, len); + text_poke_early(addr, bytes, len); } } } @@ -810,8 +804,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) return i; } -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; @@ -820,13 +813,12 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end, for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op; - ret = insn_decode_kernel(&insn, wr_addr); + ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -846,41 +838,59 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end, len = patch_return(addr, &insn, bytes); if (len == insn.length) { - DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr); + DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(wr_addr, bytes, len); + text_poke_early(addr, bytes, len); } } } -#else -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) { } -#endif /* CONFIG_MITIGATION_RETHUNK */ +#else /* !CONFIG_MITIGATION_RETHUNK: */ +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* !CONFIG_MITIGATION_RETHUNK */ #else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, - struct module *mod) { } -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) { } +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } -#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */ +#endif /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr, void *wr_addr); +__noendbr bool is_endbr(u32 *val) +{ + u32 endbr; + + __get_kernel_nofault(&endbr, val, u32, Efault); + return __is_endbr(endbr); + +Efault: + return false; +} -static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) +#ifdef CONFIG_FINEIBT + +static __noendbr bool exact_endbr(u32 *val) { - u32 endbr, poison = gen_endbr_poison(); + u32 endbr; - if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr))) - return; + __get_kernel_nofault(&endbr, val, u32, Efault); + return endbr == gen_endbr(); + +Efault: + return false; +} - if (!is_endbr(endbr)) { - WARN_ON_ONCE(warn); +#endif + +static void poison_cfi(void *addr); + +static void __init_or_module poison_endbr(void *addr) +{ + u32 poison = gen_endbr_poison(); + + if (WARN_ON_ONCE(!is_endbr(addr))) return; - } DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr); @@ -889,7 +899,7 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) */ DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(wr_addr, &poison, 4); + text_poke_early(addr, &poison, 4); } /* @@ -898,36 +908,39 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) * Seal the functions for indirect calls by clobbering the ENDBR instructions * and the kCFI hash value. */ -void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr, wr_addr, true); + poison_endbr(addr); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_cfi(addr - 16, wr_addr - 16); + poison_cfi(addr - 16); } } -#else +#else /* !CONFIG_X86_KERNEL_IBT: */ -void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } -#endif /* CONFIG_X86_KERNEL_IBT */ +#endif /* !CONFIG_X86_KERNEL_IBT */ #ifdef CONFIG_CFI_AUTO_DEFAULT -#define __CFI_DEFAULT CFI_AUTO +# define __CFI_DEFAULT CFI_AUTO #elif defined(CONFIG_CFI_CLANG) -#define __CFI_DEFAULT CFI_KCFI +# define __CFI_DEFAULT CFI_KCFI #else -#define __CFI_DEFAULT CFI_OFF +# define __CFI_DEFAULT CFI_OFF #endif enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT; +#ifdef CONFIG_FINEIBT_BHI +bool cfi_bhi __ro_after_init = false; +#endif + #ifdef CONFIG_CFI_CLANG struct bpf_insn; @@ -935,11 +948,7 @@ struct bpf_insn; extern unsigned int __bpf_prog_runX(const void *ctx, const struct bpf_insn *insn); -/* - * Force a reference to the external symbol so the compiler generates - * __kcfi_typid. - */ -__ADDRESSABLE(__bpf_prog_runX); +KCFI_REFERENCE(__bpf_prog_runX); /* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */ asm ( @@ -956,7 +965,7 @@ asm ( /* Must match bpf_callback_t */ extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); -__ADDRESSABLE(__bpf_callback_fn); +KCFI_REFERENCE(__bpf_callback_fn); /* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */ asm ( @@ -991,6 +1000,21 @@ u32 cfi_get_func_hash(void *func) return hash; } + +int cfi_get_func_arity(void *func) +{ + bhi_thunk *target; + s32 disp; + + if (cfi_mode != CFI_FINEIBT && !cfi_bhi) + return 0; + + if (get_kernel_nofault(disp, func - 4)) + return 0; + + target = func + disp; + return target - __bhi_args; +} #endif #ifdef CONFIG_FINEIBT @@ -998,6 +1022,8 @@ u32 cfi_get_func_hash(void *func) static bool cfi_rand __ro_after_init = true; static u32 cfi_seed __ro_after_init; +static bool cfi_paranoid __ro_after_init = false; + /* * Re-hash the CFI hash with a boot-time seed while making sure the result is * not a valid ENDBR instruction. @@ -1005,7 +1031,7 @@ static u32 cfi_seed __ro_after_init; static u32 cfi_rehash(u32 hash) { hash ^= cfi_seed; - while (unlikely(is_endbr(hash) || is_endbr(-hash))) { + while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) { bool lsb = hash & 1; hash >>= 1; if (lsb) @@ -1037,6 +1063,25 @@ static __init int cfi_parse_cmdline(char *str) cfi_mode = CFI_FINEIBT; } else if (!strcmp(str, "norand")) { cfi_rand = false; + } else if (!strcmp(str, "warn")) { + pr_alert("CFI mismatch non-fatal!\n"); + cfi_warn = true; + } else if (!strcmp(str, "paranoid")) { + if (cfi_mode == CFI_FINEIBT) { + cfi_paranoid = true; + } else { + pr_err("Ignoring paranoid; depends on fineibt.\n"); + } + } else if (!strcmp(str, "bhi")) { +#ifdef CONFIG_FINEIBT_BHI + if (cfi_mode == CFI_FINEIBT) { + cfi_bhi = true; + } else { + pr_err("Ignoring bhi; depends on fineibt.\n"); + } +#else + pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n"); +#endif } else { pr_err("Ignoring unknown cfi option (%s).", str); } @@ -1054,9 +1099,9 @@ early_param("cfi", cfi_parse_cmdline); * __cfi_\func: __cfi_\func: * movl $0x12345678,%eax // 5 endbr64 // 4 * nop subl $0x12345678,%r10d // 7 - * nop jz 1f // 2 - * nop ud2 // 2 - * nop 1: nop // 1 + * nop jne __cfi_\func+6 // 2 + * nop nop3 // 3 + * nop * nop * nop * nop @@ -1068,34 +1113,53 @@ early_param("cfi", cfi_parse_cmdline); * * caller: caller: * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 - * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4 + * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4 * je 1f // 2 nop4 // 4 * ud2 // 2 - * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5 + * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6 * */ -asm( ".pushsection .rodata \n" - "fineibt_preamble_start: \n" - " endbr64 \n" - " subl $0x12345678, %r10d \n" - " je fineibt_preamble_end \n" - " ud2 \n" - " nop \n" - "fineibt_preamble_end: \n" +/* + * <fineibt_preamble_start>: + * 0: f3 0f 1e fa endbr64 + * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d + * b: 75 f9 jne 6 <fineibt_preamble_start+0x6> + * d: 0f 1f 00 nopl (%rax) + * + * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as + * (bad) on x86_64 and raises #UD. + */ +asm( ".pushsection .rodata \n" + "fineibt_preamble_start: \n" + " endbr64 \n" + " subl $0x12345678, %r10d \n" + "fineibt_preamble_bhi: \n" + " jne fineibt_preamble_start+6 \n" + ASM_NOP3 + "fineibt_preamble_end: \n" ".popsection\n" ); extern u8 fineibt_preamble_start[]; +extern u8 fineibt_preamble_bhi[]; extern u8 fineibt_preamble_end[]; #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) +#define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start) +#define fineibt_preamble_ud 6 #define fineibt_preamble_hash 7 +/* + * <fineibt_caller_start>: + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d + * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * a: 0f 1f 40 00 nopl 0x0(%rax) + */ asm( ".pushsection .rodata \n" "fineibt_caller_start: \n" " movl $0x12345678, %r10d \n" - " sub $16, %r11 \n" + " lea -0x10(%r11), %r11 \n" ASM_NOP4 "fineibt_caller_end: \n" ".popsection \n" @@ -1109,13 +1173,62 @@ extern u8 fineibt_caller_end[]; #define fineibt_caller_jmp (fineibt_caller_size - 2) -static u32 decode_preamble_hash(void *addr) +/* + * Since FineIBT does hash validation on the callee side it is prone to + * circumvention attacks where a 'naked' ENDBR instruction exists that + * is not part of the fineibt_preamble sequence. + * + * Notably the x86 entry points must be ENDBR and equally cannot be + * fineibt_preamble. + * + * The fineibt_paranoid caller sequence adds additional caller side + * hash validation. This stops such circumvention attacks dead, but at the cost + * of adding a load. + * + * <fineibt_paranoid_start>: + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d + * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d + * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 + * e: 75 fd jne d <fineibt_paranoid_start+0xd> + * 10: 41 ff d3 call *%r11 + * 13: 90 nop + * + * Notably LEA does not modify flags and can be reordered with the CMP, + * avoiding a dependency. Again, using a non-taken (backwards) branch + * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the + * Jcc.d8, causing #UD. + */ +asm( ".pushsection .rodata \n" + "fineibt_paranoid_start: \n" + " movl $0x12345678, %r10d \n" + " cmpl -9(%r11), %r10d \n" + " lea -0x10(%r11), %r11 \n" + " jne fineibt_paranoid_start+0xd \n" + "fineibt_paranoid_ind: \n" + " call *%r11 \n" + " nop \n" + "fineibt_paranoid_end: \n" + ".popsection \n" +); + +extern u8 fineibt_paranoid_start[]; +extern u8 fineibt_paranoid_ind[]; +extern u8 fineibt_paranoid_end[]; + +#define fineibt_paranoid_size (fineibt_paranoid_end - fineibt_paranoid_start) +#define fineibt_paranoid_ind (fineibt_paranoid_ind - fineibt_paranoid_start) +#define fineibt_paranoid_ud 0xd + +static u32 decode_preamble_hash(void *addr, int *reg) { u8 *p = addr; - /* b8 78 56 34 12 mov $0x12345678,%eax */ - if (p[0] == 0xb8) + /* b8+reg 78 56 34 12 movl $0x12345678,\reg */ + if (p[0] >= 0xb8 && p[0] < 0xc0) { + if (reg) + *reg = p[0] - 0xb8; return *(u32 *)(addr + 1); + } return 0; /* invalid hash value */ } @@ -1124,11 +1237,11 @@ static u32 decode_caller_hash(void *addr) { u8 *p = addr; - /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */ + /* 41 ba 88 a9 cb ed mov $(-0x12345678),%r10d */ if (p[0] == 0x41 && p[1] == 0xba) return -*(u32 *)(addr + 2); - /* e8 0c 78 56 34 12 jmp.d8 +12 */ + /* e8 0c 88 a9 cb ed jmp.d8 +12 */ if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp) return -*(u32 *)(addr + 2); @@ -1136,7 +1249,7 @@ static u32 decode_caller_hash(void *addr) } /* .retpoline_sites */ -static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_disable_callers(s32 *start, s32 *end) { /* * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate @@ -1148,23 +1261,20 @@ static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); - + hash = decode_caller_hash(addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(wr_addr, jmp, 2); + text_poke_early(addr, jmp, 2); } return 0; } -static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_enable_callers(s32 *start, s32 *end) { /* * Re-enable kCFI, undo what cfi_disable_callers() did. @@ -1174,126 +1284,212 @@ static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(wr_addr, mov, 2); + text_poke_early(addr, mov, 2); } return 0; } /* .cfi_sites */ -static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod) +static int cfi_rand_preamble(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(wr_addr); + hash = decode_preamble_hash(addr, NULL); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; hash = cfi_rehash(hash); - text_poke_early(wr_addr + 1, &hash, 4); + text_poke_early(addr + 1, &hash, 4); } return 0; } -static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod) +static void cfi_fineibt_bhi_preamble(void *addr, int arity) +{ + if (!arity) + return; + + if (!cfi_warn && arity == 1) { + /* + * Crazy scheme to allow arity-1 inline: + * + * __cfi_foo: + * 0: f3 0f 1e fa endbr64 + * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d + * b: 49 0f 45 fa cmovne %r10, %rdi + * f: 75 f5 jne __cfi_foo+6 + * 11: 0f 1f 00 nopl (%rax) + * + * Code that direct calls to foo()+0, decodes the tail end as: + * + * foo: + * 0: f5 cmc + * 1: 0f 1f 00 nopl (%rax) + * + * which clobbers CF, but does not affect anything ABI + * wise. + * + * Notably, this scheme is incompatible with permissive CFI + * because the CMOVcc is unconditional and RDI will have been + * clobbered. + */ + const u8 magic[9] = { + 0x49, 0x0f, 0x45, 0xfa, + 0x75, 0xf5, + BYTES_NOP3, + }; + + text_poke_early(addr + fineibt_preamble_bhi, magic, 9); + + return; + } + + text_poke_early(addr + fineibt_preamble_bhi, + text_gen_insn(CALL_INSN_OPCODE, + addr + fineibt_preamble_bhi, + __bhi_args[arity]), + CALL_INSN_SIZE); +} + +static int cfi_rewrite_preamble(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); + int arity; u32 hash; - hash = decode_preamble_hash(wr_addr); + /* + * When the function doesn't start with ENDBR the compiler will + * have determined there are no indirect calls to it and we + * don't need no CFI either. + */ + if (!is_endbr(addr + 16)) + continue; + + hash = decode_preamble_hash(addr, &arity); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; - text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size); - WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678); - text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4); + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); + WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); + text_poke_early(addr + fineibt_preamble_hash, &hash, 4); + + WARN_ONCE(!IS_ENABLED(CONFIG_FINEIBT_BHI) && arity, + "kCFI preamble has wrong register at: %pS %*ph\n", + addr, 5, addr); + + if (cfi_bhi) + cfi_fineibt_bhi_preamble(addr, arity); } return 0; } -static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod) +static void cfi_rewrite_endbr(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr + 16, wr_addr + 16, false); + if (!exact_endbr(addr + 16)) + continue; + + poison_endbr(addr + 16); } } /* .retpoline_sites */ -static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_rand_callers(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (hash) { hash = -cfi_rehash(hash); - text_poke_early(wr_addr + 2, &hash, 4); + text_poke_early(addr + 2, &hash, 4); } } return 0; } -static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; + BUG_ON(fineibt_paranoid_size != 20); + for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; + struct insn insn; + u8 bytes[20]; u32 hash; + int ret; + u8 op; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); - if (hash) { - text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size); - WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678); - text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4); + hash = decode_caller_hash(addr); + if (!hash) + continue; + + if (!cfi_paranoid) { + text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); + WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); + text_poke_early(addr + fineibt_caller_hash, &hash, 4); + /* rely on apply_retpolines() */ + continue; + } + + /* cfi_paranoid */ + ret = insn_decode_kernel(&insn, addr + fineibt_caller_size); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op = insn.opcode.bytes[0]; + if (op != CALL_INSN_OPCODE && op != JMP32_INSN_OPCODE) { + WARN_ON_ONCE(1); + continue; } - /* rely on apply_retpolines() */ + + memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size); + memcpy(bytes + fineibt_caller_hash, &hash, 4); + + ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); + if (WARN_ON_ONCE(ret != 3)) + continue; + + text_poke_early(addr, bytes, fineibt_paranoid_size); } return 0; } static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi, bool builtin) { - bool builtin = mod ? false : true; int ret; if (WARN_ONCE(fineibt_preamble_size != 16, @@ -1302,8 +1498,15 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, if (cfi_mode == CFI_AUTO) { cfi_mode = CFI_KCFI; - if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) { + /* + * FRED has much saner context on exception entry and + * is less easy to take advantage of. + */ + if (!cpu_feature_enabled(X86_FEATURE_FRED)) + cfi_paranoid = true; cfi_mode = CFI_FINEIBT; + } } /* @@ -1311,7 +1514,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI. */ - ret = cfi_disable_callers(start_retpoline, end_retpoline, mod); + ret = cfi_disable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1322,11 +1525,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); } - ret = cfi_rand_preamble(start_cfi, end_cfi, mod); + ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err; - ret = cfi_rand_callers(start_retpoline, end_retpoline, mod); + ret = cfi_rand_callers(start_retpoline, end_retpoline); if (ret) goto err; } @@ -1338,7 +1541,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_KCFI: - ret = cfi_enable_callers(start_retpoline, end_retpoline, mod); + ret = cfi_enable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1348,20 +1551,23 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, case CFI_FINEIBT: /* place the FineIBT preamble at func()-16 */ - ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod); + ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; /* rewrite the callers to target func()-16 */ - ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod); + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; /* now that nobody targets func()+0, remove ENDBR there */ - cfi_rewrite_endbr(start_cfi, end_cfi, mod); + cfi_rewrite_endbr(start_cfi, end_cfi); - if (builtin) - pr_info("Using FineIBT CFI\n"); + if (builtin) { + pr_info("Using %sFineIBT%s CFI\n", + cfi_paranoid ? "paranoid " : "", + cfi_bhi ? "+BHI" : ""); + } return; default: @@ -1377,11 +1583,25 @@ static inline void poison_hash(void *addr) *(u32 *)addr = 0; } -static void poison_cfi(void *addr, void *wr_addr) +static void poison_cfi(void *addr) { + /* + * Compilers manage to be inconsistent with ENDBR vs __cfi prefixes, + * some (static) functions for which they can determine the address + * is never taken do not get a __cfi prefix, but *DO* get an ENDBR. + * + * As such, these functions will get sealed, but we need to be careful + * to not unconditionally scribble the previous function. + */ switch (cfi_mode) { case CFI_FINEIBT: /* + * FineIBT prefix should start with an ENDBR. + */ + if (!is_endbr(addr)) + break; + + /* * __cfi_\func: * osp nopl (%rax) * subl $0, %r10d @@ -1389,17 +1609,23 @@ static void poison_cfi(void *addr, void *wr_addr) * ud2 * 1: nop */ - poison_endbr(addr, wr_addr, false); - poison_hash(wr_addr + fineibt_preamble_hash); + poison_endbr(addr); + poison_hash(addr + fineibt_preamble_hash); break; case CFI_KCFI: /* + * kCFI prefix should start with a valid hash. + */ + if (!decode_preamble_hash(addr, NULL)) + break; + + /* * __cfi_\func: * movl $0, %eax * .skip 11, 0x90 */ - poison_hash(wr_addr + 1); + poison_hash(addr + 1); break; default: @@ -1407,24 +1633,135 @@ static void poison_cfi(void *addr, void *wr_addr) } } -#else +/* + * When regs->ip points to a 0xEA byte in the FineIBT preamble, + * return true and fill out target and type. + * + * We check the preamble by checking for the ENDBR instruction relative to the + * 0xEA instruction. + */ +static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type) +{ + unsigned long addr = regs->ip - fineibt_preamble_ud; + u32 hash; + + if (!exact_endbr((void *)addr)) + return false; + + *target = addr + fineibt_preamble_size; + + __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); + *type = (u32)regs->r10 + hash; + + /* + * Since regs->ip points to the middle of an instruction; it cannot + * continue with the normal fixup. + */ + regs->ip = *target; + + return true; + +Efault: + return false; +} + +/* + * regs->ip points to one of the UD2 in __bhi_args[]. + */ +static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 *type) +{ + unsigned long addr; + u32 hash; + + if (!cfi_bhi) + return false; + + if (regs->ip < (unsigned long)__bhi_args || + regs->ip >= (unsigned long)__bhi_args_end) + return false; + + /* + * Fetch the return address from the stack, this points to the + * FineIBT preamble. Since the CALL instruction is in the 5 last + * bytes of the preamble, the return address is in fact the target + * address. + */ + __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault); + *target = addr; + + addr -= fineibt_preamble_size; + if (!exact_endbr((void *)addr)) + return false; + + __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); + *type = (u32)regs->r10 + hash; + + /* + * The UD2 sites are constructed with a RET immediately following, + * as such the non-fatal case can use the regular fixup. + */ + return true; + +Efault: + return false; +} + +/* + * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] + * sequence. + */ +static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) +{ + unsigned long addr = regs->ip - fineibt_paranoid_ud; + u32 hash; + + if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) + return false; + + __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault); + *target = regs->r11 + fineibt_preamble_size; + *type = regs->r10; + + /* + * Since the trapping instruction is the exact, but LOCK prefixed, + * Jcc.d8 that got us here, the normal fixup will work. + */ + return true; + +Efault: + return false; +} + +bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type) +{ + if (decode_fineibt_paranoid(regs, target, type)) + return true; + + if (decode_fineibt_bhi(regs, target, type)) + return true; + + return decode_fineibt_preamble(regs, target, type); +} + +#else /* !CONFIG_FINEIBT: */ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi, bool builtin) { } #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr, void *wr_addr) { } +static void poison_cfi(void *addr) { } #endif -#endif +#endif /* !CONFIG_FINEIBT */ void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi) { return __apply_fineibt(start_retpoline, end_retpoline, - start_cfi, end_cfi, mod); + start_cfi, end_cfi, + /* .builtin = */ false); } #ifdef CONFIG_SMP @@ -1721,27 +2058,27 @@ void __init alternative_instructions(void) paravirt_set_cap(); __apply_fineibt(__retpoline_sites, __retpoline_sites_end, - __cfi_sites, __cfi_sites_end, NULL); + __cfi_sites, __cfi_sites_end, true); /* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks. */ - apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL); - apply_returns(__return_sites, __return_sites_end, NULL); - - apply_alternatives(__alt_instructions, __alt_instructions_end, NULL); + apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* - * Now all calls are established. Apply the call thunks if - * required. + * Adjust all CALL instructions to point to func()-10, including + * those in .altinstr_replacement. */ callthunks_patch_builtin_calls(); + apply_alternatives(__alt_instructions, __alt_instructions_end); + /* * Seal all functions that do not have their address taken. */ - apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL); + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 67e773744edb..6d12a9b69432 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -73,7 +73,6 @@ static int amd_cache_northbridges(void) amd_northbridges.nb = nb; for (i = 0; i < amd_northbridges.num; i++) { - node_to_amd_nb(i)->root = amd_node_get_root(i); node_to_amd_nb(i)->misc = amd_node_get_func(i, 3); /* diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index d2ec7fd555c5..b670fa85c61b 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -8,6 +8,7 @@ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> */ +#include <linux/debugfs.h> #include <asm/amd_node.h> /* @@ -93,10 +94,14 @@ static struct pci_dev **amd_roots; /* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); +static bool smn_exclusive; #define SMN_INDEX_OFFSET 0x60 #define SMN_DATA_OFFSET 0x64 +#define HSMP_INDEX_OFFSET 0xc4 +#define HSMP_DATA_OFFSET 0xc8 + /* * SMN accesses may fail in ways that are difficult to detect here in the called * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do @@ -146,6 +151,9 @@ static int __amd_smn_rw(u8 i_off, u8 d_off, u16 node, u32 address, u32 *value, b if (!root) return err; + if (!smn_exclusive) + return err; + guard(mutex)(&smn_mutex); err = pci_write_config_dword(root, i_off, address); @@ -179,6 +187,93 @@ int __must_check amd_smn_write(u16 node, u32 address, u32 value) } EXPORT_SYMBOL_GPL(amd_smn_write); +int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write) +{ + return __amd_smn_rw(HSMP_INDEX_OFFSET, HSMP_DATA_OFFSET, node, address, value, write); +} +EXPORT_SYMBOL_GPL(amd_smn_hsmp_rdwr); + +static struct dentry *debugfs_dir; +static u16 debug_node; +static u32 debug_address; + +static ssize_t smn_node_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + u16 node; + int ret; + + ret = kstrtou16_from_user(userbuf, count, 0, &node); + if (ret) + return ret; + + if (node >= amd_num_nodes()) + return -ENODEV; + + debug_node = node; + return count; +} + +static int smn_node_show(struct seq_file *m, void *v) +{ + seq_printf(m, "0x%08x\n", debug_node); + return 0; +} + +static ssize_t smn_address_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + int ret; + + ret = kstrtouint_from_user(userbuf, count, 0, &debug_address); + if (ret) + return ret; + + return count; +} + +static int smn_address_show(struct seq_file *m, void *v) +{ + seq_printf(m, "0x%08x\n", debug_address); + return 0; +} + +static int smn_value_show(struct seq_file *m, void *v) +{ + u32 val; + int ret; + + ret = amd_smn_read(debug_node, debug_address, &val); + if (ret) + return ret; + + seq_printf(m, "0x%08x\n", val); + return 0; +} + +static ssize_t smn_value_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + u32 val; + int ret; + + ret = kstrtouint_from_user(userbuf, count, 0, &val); + if (ret) + return ret; + + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + + ret = amd_smn_write(debug_node, debug_address, val); + if (ret) + return ret; + + return count; +} + +DEFINE_SHOW_STORE_ATTRIBUTE(smn_node); +DEFINE_SHOW_STORE_ATTRIBUTE(smn_address); +DEFINE_SHOW_STORE_ATTRIBUTE(smn_value); + static int amd_cache_roots(void) { u16 node, num_nodes = amd_num_nodes(); @@ -193,6 +288,48 @@ static int amd_cache_roots(void) return 0; } +static int reserve_root_config_spaces(void) +{ + struct pci_dev *root = NULL; + struct pci_bus *bus = NULL; + + while ((bus = pci_find_next_bus(bus))) { + /* Root device is Device 0 Function 0 on each Primary Bus. */ + root = pci_get_slot(bus, 0); + if (!root) + continue; + + if (root->vendor != PCI_VENDOR_ID_AMD && + root->vendor != PCI_VENDOR_ID_HYGON) + continue; + + pci_dbg(root, "Reserving PCI config space\n"); + + /* + * There are a few SMN index/data pairs and other registers + * that shouldn't be accessed by user space. + * So reserve the entire PCI config space for simplicity rather + * than covering specific registers piecemeal. + */ + if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { + pci_err(root, "Failed to reserve config space\n"); + return -EEXIST; + } + } + + smn_exclusive = true; + return 0; +} + +static bool enable_dfs; + +static int __init amd_smn_enable_dfs(char *str) +{ + enable_dfs = true; + return 1; +} +__setup("amd_smn_debugfs_enable", amd_smn_enable_dfs); + static int __init amd_smn_init(void) { int err; @@ -209,6 +346,18 @@ static int __init amd_smn_init(void) if (err) return err; + err = reserve_root_config_spaces(); + if (err) + return err; + + if (enable_dfs) { + debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir); + + debugfs_create_file("node", 0600, debugfs_dir, NULL, &smn_node_fops); + debugfs_create_file("address", 0600, debugfs_dir, NULL, &smn_address_fops); + debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops); + } + return 0; } diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 3bf0487cf3b7..52d1808ee360 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -23,8 +23,5 @@ obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-y += apic_flat_64.o endif -# APIC probe will depend on the listing order here -obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o - # For 32bit, probe_32 need to be listed last obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e893dc6f11c1..62584a347931 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1371,8 +1371,6 @@ void __init apic_intr_mode_init(void) x86_64_probe_apic(); - x86_32_install_bigsmp(); - if (x86_platform.apic_post_init) x86_platform.apic_post_init(); @@ -1674,7 +1672,6 @@ static __init void apic_read_boot_cpu_id(bool x2apic) boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } topology_register_boot_apic(boot_cpu_physical_apicid); - x86_32_probe_bigsmp_early(); } #ifdef CONFIG_X86_X2APIC @@ -2014,8 +2011,8 @@ static bool __init detect_init_APIC(void) case X86_VENDOR_HYGON: break; case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) + if ((boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)) || + boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO) break; goto no_apic; default: diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c deleted file mode 100644 index 9285d500d5b4..000000000000 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs. - * - * Drives the local APIC in "clustered mode". - */ -#include <linux/cpumask.h> -#include <linux/dmi.h> -#include <linux/smp.h> - -#include <asm/apic.h> -#include <asm/io_apic.h> - -#include "local.h" - -static u32 bigsmp_get_apic_id(u32 x) -{ - return (x >> 24) & 0xFF; -} - -static void bigsmp_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void bigsmp_send_IPI_all(int vector) -{ - default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); -} - -static int dmi_bigsmp; /* can be set by dmi scanners */ - -static int hp_ht_bigsmp(const struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); - dmi_bigsmp = 1; - - return 0; -} - - -static const struct dmi_system_id bigsmp_dmi_table[] = { - { hp_ht_bigsmp, "HP ProLiant DL760 G2", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P44-"), - } - }, - - { hp_ht_bigsmp, "HP ProLiant DL740", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P47-"), - } - }, - { } /* NULL entry stops DMI scanning */ -}; - -static int probe_bigsmp(void) -{ - return dmi_check_system(bigsmp_dmi_table); -} - -static struct apic apic_bigsmp __ro_after_init = { - - .name = "bigsmp", - .probe = probe_bigsmp, - - .dest_mode_logical = false, - - .disable_esr = 1, - - .cpu_present_to_apicid = default_cpu_present_to_apicid, - - .max_apic_id = 0xFE, - .get_apic_id = bigsmp_get_apic_id, - - .calc_dest_apicid = apic_default_calc_apicid, - - .send_IPI = default_send_IPI_single_phys, - .send_IPI_mask = default_send_IPI_mask_sequence_phys, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = bigsmp_send_IPI_allbutself, - .send_IPI_all = bigsmp_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .eoi = native_apic_mem_eoi, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = apic_mem_wait_icr_idle, - .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout, -}; - -bool __init apic_bigsmp_possible(bool cmdline_override) -{ - return apic == &apic_bigsmp || !cmdline_override; -} - -void __init apic_bigsmp_force(void) -{ - if (apic != &apic_bigsmp) - apic_install_driver(&apic_bigsmp); -} - -apic_driver(apic_bigsmp); diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 5da693d633b7..23025a3a1db4 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -287,34 +287,4 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } - -#ifdef CONFIG_SMP -static int convert_apicid_to_cpu(u32 apic_id) -{ - int i; - - for_each_possible_cpu(i) { - if (per_cpu(x86_cpu_to_apicid, i) == apic_id) - return i; - } - return -1; -} - -int safe_smp_processor_id(void) -{ - u32 apicid; - int cpuid; - - if (!boot_cpu_has(X86_FEATURE_APIC)) - return 0; - - apicid = read_apic_id(); - if (apicid == BAD_APICID) - return 0; - - cpuid = convert_apicid_to_cpu(apicid); - - return cpuid >= 0 ? cpuid : 0; -} -#endif #endif diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 842fe28496be..bdcf609eb283 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -65,17 +65,4 @@ void default_send_IPI_self(int vector); void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_logical(const struct cpumask *mask, int vector); -void x86_32_probe_bigsmp_early(void); -void x86_32_install_bigsmp(void); -#else -static inline void x86_32_probe_bigsmp_early(void) { } -static inline void x86_32_install_bigsmp(void) { } -#endif - -#ifdef CONFIG_X86_BIGSMP -bool apic_bigsmp_possible(bool cmdline_selected); -void apic_bigsmp_force(void); -#else -static inline bool apic_bigsmp_possible(bool cmdline_selected) { return false; }; -static inline void apic_bigsmp_force(void) { } #endif diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index f75ee345c02d..87bc9e7ca5d6 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -93,35 +93,6 @@ static int __init parse_apic(char *arg) } early_param("apic", parse_apic); -void __init x86_32_probe_bigsmp_early(void) -{ - if (nr_cpu_ids <= 8 || xen_pv_domain()) - return; - - if (IS_ENABLED(CONFIG_X86_BIGSMP)) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(boot_cpu_apic_version)) - break; - /* P4 and above */ - fallthrough; - case X86_VENDOR_HYGON: - case X86_VENDOR_AMD: - if (apic_bigsmp_possible(cmdline_apic)) - return; - break; - } - } - pr_info("Limiting to 8 possible CPUs\n"); - set_nr_cpu_ids(8); -} - -void __init x86_32_install_bigsmp(void) -{ - if (nr_cpu_ids > 8 && !xen_pv_domain()) - apic_bigsmp_force(); -} - void __init x86_32_probe_apic(void) { if (!cmdline_apic) { diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index a98020bf31bb..ad4ea6fb3b6c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -107,11 +107,6 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); - OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); - OFFSET(X86_current_task, pcpu_hot, current_task); -#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING - OFFSET(X86_call_depth, pcpu_hot, call_depth); -#endif #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) /* Offset for fields in aria_ctx */ BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bb65371ea9df..590b6cd0eac0 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -54,11 +54,5 @@ int main(void) BLANK(); #undef ENTRY - BLANK(); - -#ifdef CONFIG_STACKPROTECTOR - OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary); - BLANK(); -#endif return 0; } diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 8418a892d195..25ae54250112 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -240,21 +240,10 @@ patch_call_sites(s32 *start, s32 *end, const struct core_text *ct) } static __init_or_module void -patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end, - const struct core_text *ct) -{ - struct alt_instr *a; - - for (a = start; a < end; a++) - patch_call((void *)&a->instr_offset + a->instr_offset, ct); -} - -static __init_or_module void callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct) { prdbg("Patching call sites %s\n", ct->name); patch_call_sites(cs->call_start, cs->call_end, ct); - patch_alt_call_sites(cs->alt_start, cs->alt_end, ct); prdbg("Patching call sites done%s\n", ct->name); } @@ -263,8 +252,6 @@ void __init callthunks_patch_builtin_calls(void) struct callthunk_sites cs = { .call_start = __call_sites, .call_end = __call_sites_end, - .alt_start = __alt_instructions, - .alt_end = __alt_instructions_end }; if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c index e6bf78fac146..77086cf565ec 100644 --- a/arch/x86/kernel/cfi.c +++ b/arch/x86/kernel/cfi.c @@ -67,16 +67,30 @@ static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target, */ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) { - unsigned long target; + unsigned long target, addr = regs->ip; u32 type; - if (!is_cfi_trap(regs->ip)) - return BUG_TRAP_TYPE_NONE; + switch (cfi_mode) { + case CFI_KCFI: + if (!is_cfi_trap(addr)) + return BUG_TRAP_TYPE_NONE; + + if (!decode_cfi_insn(regs, &target, &type)) + return report_cfi_failure_noaddr(regs, addr); + + break; - if (!decode_cfi_insn(regs, &target, &type)) - return report_cfi_failure_noaddr(regs, regs->ip); + case CFI_FINEIBT: + if (!decode_fineibt_insn(regs, &target, &type)) + return BUG_TRAP_TYPE_NONE; + + break; + + default: + return BUG_TRAP_TYPE_NONE; + } - return report_cfi_failure(regs, regs->ip, &target, type); + return report_cfi_failure(regs, addr, &target, type); } /* diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 54194f5995de..79569f72b8ee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -29,6 +29,8 @@ #include "cpu.h" +u16 invlpgb_count_max __ro_after_init; + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { u32 gprs[8] = { 0 }; @@ -632,7 +634,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) * (model = 0x14) and later actually support it. * (AMD Erratum #110, docId: 25759). */ - if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { + if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) { clear_cpu_cap(c, X86_FEATURE_LAHF_LM); if (!rdmsrl_amd_safe(0xc001100d, &value)) { value &= ~BIT_64(32); @@ -1073,6 +1075,10 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); + + /* Enable Translation Cache Extension */ + if (cpu_has(c, X86_FEATURE_TCE)) + msr_set_bit(MSR_EFER, _EFER_TCE); } #ifdef CONFIG_X86_32 @@ -1105,8 +1111,8 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); - tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; - tlb_lli_4k[ENTRIES] = ebx & mask; + tlb_lld_4k = (ebx >> 16) & mask; + tlb_lli_4k = ebx & mask; /* * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB @@ -1119,26 +1125,30 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!((eax >> 16) & mask)) - tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; + tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff; else - tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + tlb_lld_2m = (eax >> 16) & mask; /* a 4M entry uses two 2M entries */ - tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + tlb_lld_4m = tlb_lld_2m >> 1; /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!(eax & mask)) { /* Erratum 658 */ if (c->x86 == 0x15 && c->x86_model <= 0x1f) { - tlb_lli_2m[ENTRIES] = 1024; + tlb_lli_2m = 1024; } else { cpuid(0x80000005, &eax, &ebx, &ecx, &edx); - tlb_lli_2m[ENTRIES] = eax & 0xff; + tlb_lli_2m = eax & 0xff; } } else - tlb_lli_2m[ENTRIES] = eax & mask; + tlb_lli_2m = eax & mask; + + tlb_lli_4m = tlb_lli_2m >> 1; - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + /* Max number of pages INVLPGB can invalidate in one shot */ + if (cpu_has(c, X86_FEATURE_INVLPGB)) + invlpgb_count_max = (cpuid_edx(0x80000008) & 0xffff) + 1; } static const struct cpu_dev amd_cpu_dev = { diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index a6c6bccfa8b8..b3a520959b51 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -8,21 +8,19 @@ * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ -#include <linux/slab.h> #include <linux/cacheinfo.h> +#include <linux/capability.h> #include <linux/cpu.h> #include <linux/cpuhotplug.h> -#include <linux/sched.h> -#include <linux/capability.h> -#include <linux/sysfs.h> #include <linux/pci.h> #include <linux/stop_machine.h> +#include <linux/sysfs.h> -#include <asm/cpufeature.h> -#include <asm/cacheinfo.h> #include <asm/amd_nb.h> -#include <asm/smp.h> +#include <asm/cacheinfo.h> +#include <asm/cpufeature.h> #include <asm/mtrr.h> +#include <asm/smp.h> #include <asm/tlbflush.h> #include "cpu.h" @@ -31,7 +29,6 @@ #define LVL_1_DATA 2 #define LVL_2 3 #define LVL_3 4 -#define LVL_TRACE 5 /* Shared last level cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); @@ -96,10 +93,6 @@ static const struct _cache_table cache_table[] = { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ - { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ - { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ - { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -787,19 +780,13 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } - /* - * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for - * trace cache - */ - if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) { + + /* Don't use CPUID(2) if CPUID(4) is supported. */ + if (!ci->num_leaves && c->cpuid_level > 1) { /* supports eax=2 call */ int j, n; unsigned int regs[4]; unsigned char *dp = (unsigned char *)regs; - int only_trace = 0; - - if (ci->num_leaves && c->x86 == 15) - only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -820,8 +807,6 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) /* look up this descriptor in the table */ while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { - if (only_trace && cache_table[k].cache_type != LVL_TRACE) - break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7cce91b19fb2..73565168fc19 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -667,8 +667,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) if (!warn) continue; - pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", - x86_cap_flag(df->feature), df->level); + pr_warn("CPU: CPU feature %s disabled, no CPUID level 0x%x\n", + x86_cap_flags[df->feature], df->level); } } @@ -846,13 +846,13 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; } -u16 __read_mostly tlb_lli_4k[NR_INFO]; -u16 __read_mostly tlb_lli_2m[NR_INFO]; -u16 __read_mostly tlb_lli_4m[NR_INFO]; -u16 __read_mostly tlb_lld_4k[NR_INFO]; -u16 __read_mostly tlb_lld_2m[NR_INFO]; -u16 __read_mostly tlb_lld_4m[NR_INFO]; -u16 __read_mostly tlb_lld_1g[NR_INFO]; +u16 __read_mostly tlb_lli_4k; +u16 __read_mostly tlb_lli_2m; +u16 __read_mostly tlb_lli_4m; +u16 __read_mostly tlb_lld_4k; +u16 __read_mostly tlb_lld_2m; +u16 __read_mostly tlb_lld_4m; +u16 __read_mostly tlb_lld_1g; static void cpu_detect_tlb(struct cpuinfo_x86 *c) { @@ -860,12 +860,10 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) this_cpu->c_detect_tlb(c); pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", - tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], - tlb_lli_4m[ENTRIES]); + tlb_lli_4k, tlb_lli_2m, tlb_lli_4m); pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", - tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], - tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); + tlb_lld_4k, tlb_lld_2m, tlb_lld_4m, tlb_lld_1g); } void get_cpu_vendor(struct cpuinfo_x86 *c) @@ -1164,7 +1162,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB), - VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY), + VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID2,NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY), VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), @@ -1205,6 +1203,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define VULNBL_INTEL_STEPS(vfm, max_stepping, issues) \ X86_MATCH_VFM_STEPS(vfm, X86_STEP_MIN, max_stepping, issues) +#define VULNBL_INTEL_TYPE(vfm, cpu_type, issues) \ + X86_MATCH_VFM_CPU_TYPE(vfm, INTEL_CPU_TYPE_##cpu_type, issues) + #define VULNBL_AMD(family, blacklist) \ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) @@ -1253,9 +1254,9 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, RFDS), + VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), - VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, RFDS), + VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS), @@ -1479,15 +1480,96 @@ static void detect_nopl(void) #endif } +static inline bool parse_set_clear_cpuid(char *arg, bool set) +{ + char *opt; + int taint = 0; + + while (arg) { + bool found __maybe_unused = false; + unsigned int bit; + + opt = strsep(&arg, ","); + + /* + * Handle naked numbers first for feature flags which don't + * have names. It doesn't make sense for a bug not to have a + * name so don't handle bug flags here. + */ + if (!kstrtouint(opt, 10, &bit)) { + if (bit < NCAPINTS * 32) { + + if (set) { + pr_warn("setcpuid: force-enabling CPU feature flag:"); + setup_force_cpu_cap(bit); + } else { + pr_warn("clearcpuid: force-disabling CPU feature flag:"); + setup_clear_cpu_cap(bit); + } + /* empty-string, i.e., ""-defined feature flags */ + if (!x86_cap_flags[bit]) + pr_cont(" %d:%d\n", bit >> 5, bit & 31); + else + pr_cont(" %s\n", x86_cap_flags[bit]); + + taint++; + } + /* + * The assumption is that there are no feature names with only + * numbers in the name thus go to the next argument. + */ + continue; + } + + for (bit = 0; bit < 32 * (NCAPINTS + NBUGINTS); bit++) { + const char *flag; + const char *kind; + + if (bit < 32 * NCAPINTS) { + flag = x86_cap_flags[bit]; + kind = "feature"; + } else { + kind = "bug"; + flag = x86_bug_flags[bit - (32 * NCAPINTS)]; + } + + if (!flag) + continue; + + if (strcmp(flag, opt)) + continue; + + if (set) { + pr_warn("setcpuid: force-enabling CPU %s flag: %s\n", + kind, flag); + setup_force_cpu_cap(bit); + } else { + pr_warn("clearcpuid: force-disabling CPU %s flag: %s\n", + kind, flag); + setup_clear_cpu_cap(bit); + } + taint++; + found = true; + break; + } + + if (!found) + pr_warn("%s: unknown CPU flag: %s", set ? "setcpuid" : "clearcpuid", opt); + } + + return taint; +} + + /* * We parse cpu parameters early because fpu__init_system() is executed * before parse_early_param(). */ static void __init cpu_parse_early_param(void) { + bool cpuid_taint = false; char arg[128]; - char *argptr = arg, *opt; - int arglen, taint = 0; + int arglen; #ifdef CONFIG_X86_32 if (cmdline_find_option_bool(boot_command_line, "no387")) @@ -1519,61 +1601,17 @@ static void __init cpu_parse_early_param(void) setup_clear_cpu_cap(X86_FEATURE_FRED); arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); - if (arglen <= 0) - return; - - pr_info("Clearing CPUID bits:"); - - while (argptr) { - bool found __maybe_unused = false; - unsigned int bit; - - opt = strsep(&argptr, ","); - - /* - * Handle naked numbers first for feature flags which don't - * have names. - */ - if (!kstrtouint(opt, 10, &bit)) { - if (bit < NCAPINTS * 32) { - - /* empty-string, i.e., ""-defined feature flags */ - if (!x86_cap_flags[bit]) - pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit)); - else - pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); - - setup_clear_cpu_cap(bit); - taint++; - } - /* - * The assumption is that there are no feature names with only - * numbers in the name thus go to the next argument. - */ - continue; - } - - for (bit = 0; bit < 32 * NCAPINTS; bit++) { - if (!x86_cap_flag(bit)) - continue; - - if (strcmp(x86_cap_flag(bit), opt)) - continue; + if (arglen > 0) + cpuid_taint |= parse_set_clear_cpuid(arg, false); - pr_cont(" %s", opt); - setup_clear_cpu_cap(bit); - taint++; - found = true; - break; - } + arglen = cmdline_find_option(boot_command_line, "setcpuid", arg, sizeof(arg)); + if (arglen > 0) + cpuid_taint |= parse_set_clear_cpuid(arg, true); - if (!found) - pr_cont(" (unknown: %s)", opt); - } - pr_cont("\n"); - - if (taint) + if (cpuid_taint) { + pr_warn("!!! setcpuid=/clearcpuid= in use, this is for TESTING ONLY, may break things horribly. Tainting kernel.\n"); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } } /* @@ -1610,6 +1648,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) c->cpu_index = 0; filter_cpuid_features(c, false); + check_cpufeature_deps(c); if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); @@ -1870,6 +1909,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Filter out anything that depends on CPUID levels we don't have */ filter_cpuid_features(c, true); + /* Check for unmet dependencies based on the CPUID dependency table */ + check_cpufeature_deps(c); + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { const char *p; @@ -1962,9 +2004,15 @@ static __init void identify_boot_cpu(void) lkgs_init(); } -void identify_secondary_cpu(struct cpuinfo_x86 *c) +void identify_secondary_cpu(unsigned int cpu) { - BUG_ON(c == &boot_cpu_data); + struct cpuinfo_x86 *c = &cpu_data(cpu); + + /* Copy boot_cpu_data only on the first bringup */ + if (!c->initialized) + *c = boot_cpu_data; + c->cpu_index = cpu; + identify_cpu(c); #ifdef CONFIG_X86_32 enable_sep_cpu(); @@ -1975,6 +2023,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) update_gds_msr(); tsx_ap_init(); + c->initialized = true; } void print_cpu_info(struct cpuinfo_x86 *c) @@ -2005,27 +2054,40 @@ void print_cpu_info(struct cpuinfo_x86 *c) } /* - * clearcpuid= was already parsed in cpu_parse_early_param(). This dummy - * function prevents it from becoming an environment variable for init. + * clearcpuid= and setcpuid= were already parsed in cpu_parse_early_param(). + * These dummy functions prevent them from becoming an environment variable for + * init. */ + static __init int setup_clearcpuid(char *arg) { return 1; } __setup("clearcpuid=", setup_clearcpuid); -DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { - .current_task = &init_task, - .preempt_count = INIT_PREEMPT_COUNT, - .top_of_stack = TOP_OF_INIT_STACK, -}; -EXPORT_PER_CPU_SYMBOL(pcpu_hot); -EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); +static __init int setup_setcpuid(char *arg) +{ + return 1; +} +__setup("setcpuid=", setup_setcpuid); + +DEFINE_PER_CPU_CACHE_HOT(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); +EXPORT_PER_CPU_SYMBOL(const_current_task); + +DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + +DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; #ifdef CONFIG_X86_64 -DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, - fixed_percpu_data) __aligned(PAGE_SIZE) __visible; -EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); +/* + * Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING + * so that this space is reserved in the hot cache section even when the + * mitigation is disabled. + */ +DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); +EXPORT_PER_CPU_SYMBOL(__x86_call_depth); static void wrmsrl_cstar(unsigned long val) { @@ -2089,18 +2151,15 @@ void syscall_init(void) if (!cpu_feature_enabled(X86_FEATURE_FRED)) idt_syscall_init(); } - -#else /* CONFIG_X86_64 */ +#endif /* CONFIG_X86_64 */ #ifdef CONFIG_STACKPROTECTOR -DEFINE_PER_CPU(unsigned long, __stack_chk_guard); +DEFINE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard); #ifndef CONFIG_SMP EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -#endif /* CONFIG_X86_64 */ - /* * Clear all 6 debug registers: */ diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1beccefbaff9..51deb60a9d26 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -33,14 +33,6 @@ struct cpu_dev { #endif }; -struct _tlb_table { - unsigned char descriptor; - char tlb_type; - unsigned int entries; - /* unsigned int ways; */ - char info[128]; -}; - #define cpu_dev_register(cpu_devX) \ static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ __section(".x86_cpu_dev.init") = \ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index df838e3bdbe0..a2fbea0be535 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -147,3 +147,38 @@ void setup_clear_cpu_cap(unsigned int feature) { do_clear_cpu_cap(NULL, feature); } + +/* + * Return the feature "name" if available, otherwise return + * the X86_FEATURE_* numerals to make it easier to identify + * the feature. + */ +static const char *x86_feature_name(unsigned int feature, char *buf) +{ + if (x86_cap_flags[feature]) + return x86_cap_flags[feature]; + + snprintf(buf, 16, "%d*32+%2d", feature / 32, feature % 32); + + return buf; +} + +void check_cpufeature_deps(struct cpuinfo_x86 *c) +{ + char feature_buf[16], depends_buf[16]; + const struct cpuid_dep *d; + + for (d = cpuid_deps; d->feature; d++) { + if (cpu_has(c, d->feature) && !cpu_has(c, d->depends)) { + /* + * Only warn about the first unmet dependency on the + * first CPU where it is encountered to avoid spamming + * the kernel log. + */ + pr_warn_once("x86 CPU feature dependency check failure: CPU%d has '%s' enabled but '%s' disabled. Kernel might be fine, but no guarantees.\n", + smp_processor_id(), + x86_feature_name(d->feature, feature_buf), + x86_feature_name(d->depends, depends_buf)); + } + } +} diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index cacfd3f6abef..1976fef2dfe5 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -16,8 +16,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) if (!c->initialized) return 0; - seq_printf(m, "initial_apicid: %x\n", c->topo.initial_apicid); - seq_printf(m, "apicid: %x\n", c->topo.apicid); + seq_printf(m, "initial_apicid: 0x%x\n", c->topo.initial_apicid); + seq_printf(m, "apicid: 0x%x\n", c->topo.apicid); seq_printf(m, "pkg_id: %u\n", c->topo.pkg_id); seq_printf(m, "die_id: %u\n", c->topo.die_id); seq_printf(m, "cu_id: %u\n", c->topo.cu_id); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index c5191b06f9f2..6af4a4a90a52 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -240,26 +240,26 @@ static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); - tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; - tlb_lli_4k[ENTRIES] = ebx & mask; + tlb_lld_4k = (ebx >> 16) & mask; + tlb_lli_4k = ebx & mask; /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!((eax >> 16) & mask)) - tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; + tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff; else - tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + tlb_lld_2m = (eax >> 16) & mask; /* a 4M entry uses two 2M entries */ - tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + tlb_lld_4m = tlb_lld_2m >> 1; /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!(eax & mask)) { cpuid(0x80000005, &eax, &ebx, &ecx, &edx); - tlb_lli_2m[ENTRIES] = eax & 0xff; + tlb_lli_2m = eax & 0xff; } else - tlb_lli_2m[ENTRIES] = eax & mask; + tlb_lli_2m = eax & mask; - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + tlb_lli_4m = tlb_lli_2m >> 1; } static const struct cpu_dev hygon_cpu_dev = { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 134368a3f4b1..4cbb2e69bea1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1,40 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/pgtable.h> -#include <linux/string.h> #include <linux/bitops.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/sched/clock.h> -#include <linux/thread_info.h> #include <linux/init.h> -#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/minmax.h> +#include <linux/smp.h> +#include <linux/string.h> + +#ifdef CONFIG_X86_64 +#include <linux/topology.h> +#endif -#include <asm/cpufeature.h> -#include <asm/msr.h> #include <asm/bugs.h> +#include <asm/cpu_device_id.h> +#include <asm/cpufeature.h> #include <asm/cpu.h> +#include <asm/hwcap2.h> #include <asm/intel-family.h> #include <asm/microcode.h> -#include <asm/hwcap2.h> -#include <asm/elf.h> -#include <asm/cpu_device_id.h> -#include <asm/resctrl.h> +#include <asm/msr.h> #include <asm/numa.h> +#include <asm/resctrl.h> #include <asm/thermal.h> - -#ifdef CONFIG_X86_64 -#include <linux/topology.h> -#endif +#include <asm/uaccess.h> #include "cpu.h" -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/mpspec.h> -#include <asm/apic.h> -#endif - /* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists @@ -195,7 +186,7 @@ void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; - if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd)) + if (c->x86_vfm < INTEL_PENTIUM_M_DOTHAN) return; /* @@ -210,10 +201,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); @@ -256,8 +243,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) #endif /* CPUID workaround for 0F33/0F34 CPU */ - if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) + if (c->x86_vfm == INTEL_P4_PRESCOTT && + (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -266,10 +253,16 @@ static void early_init_intel(struct cpuinfo_x86 *c) * * It is also reliable across cores and sockets. (but not across * cabinets - we turn it off in that case explicitly.) + * + * Use a model-specific check for some older CPUs that have invariant + * TSC but may not report it architecturally via 8000_0007. */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_WILLAMETTE) || + (c->x86_vfm >= INTEL_CORE_YONAH && c->x86_vfm <= INTEL_IVYBRIDGE)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ @@ -298,12 +291,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_PAT); /* - * If fast string is not enabled in IA32_MISC_ENABLE for any reason, - * clear the fast string and enhanced fast string CPU capabilities. + * Modern CPUs are generally expected to have a sane fast string + * implementation. However, BIOSes typically have a knob to tweak + * the architectural MISC_ENABLE.FAST_STRING enable bit. + * + * Adhere to the preference and program the Linux-defined fast + * string flag and enhanced fast string capabilities accordingly. */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { + if (c->x86_vfm >= INTEL_PENTIUM_M_DOTHAN) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { + if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { + /* X86_FEATURE_ERMS is set based on CPUID */ + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } else { pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); @@ -350,9 +350,7 @@ static void bsp_init_intel(struct cpuinfo_x86 *c) int ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO && boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; @@ -369,9 +367,8 @@ static void intel_smp_check(struct cpuinfo_x86 *c) /* * Mask B, Pentium, but not Pentium MMX */ - if (c->x86 == 5 && - c->x86_stepping >= 1 && c->x86_stepping <= 4 && - c->x86_model <= 3) { + if (c->x86_vfm >= INTEL_FAM5_START && c->x86_vfm < INTEL_PENTIUM_MMX && + c->x86_stepping >= 1 && c->x86_stepping <= 4) { /* * Remember we have B step Pentia with bugs */ @@ -398,7 +395,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * The Quark is also family 5, but does not have the same bug. */ clear_cpu_bug(c, X86_BUG_F00F); - if (c->x86 == 5 && c->x86_model < 9) { + if (c->x86_vfm >= INTEL_FAM5_START && c->x86_vfm < INTEL_QUARK_X1000) { static int f00f_workaround_enabled; set_cpu_bug(c, X86_BUG_F00F); @@ -413,7 +410,8 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) + if ((c->x86_vfm == INTEL_PENTIUM_II_KLAMATH && c->x86_stepping < 3) || + c->x86_vfm < INTEL_PENTIUM_II_KLAMATH) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -431,7 +429,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { + if (c->x86_vfm == INTEL_P4_WILLAMETTE && c->x86_stepping == 1) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); @@ -445,27 +443,20 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * integrated APIC (see 11AP erratum in "Pentium Processor * Specification Update"). */ - if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && + if (boot_cpu_has(X86_FEATURE_APIC) && c->x86_vfm == INTEL_PENTIUM_75 && (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); - #ifdef CONFIG_X86_INTEL_USERCOPY /* - * Set up the preferred alignment for movsl bulk memory moves + * MOVSL bulk memory moves can be slow when source and dest are not + * both 8-byte aligned. PII/PIII only like MOVSL with 8-byte alignment. + * + * Set the preferred alignment for Pentium Pro and newer processors, as + * it has only been tested on these. */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ + if (c->x86_vfm >= INTEL_PENTIUM_PRO) movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } #endif intel_smp_check(c); @@ -563,8 +554,6 @@ static void init_intel(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); #else /* * Names for the Pentium II/Celeron processors @@ -622,14 +611,14 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) * to determine which, so we use a boottime override * for the 512kb model, and assume 256 otherwise. */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + if (c->x86_vfm == INTEL_PENTIUM_III_TUALATIN && size == 0) size = 256; /* * Intel Quark SoC X1000 contains a 4-way set associative * 16K cache with a 16 byte cache line and 256 lines per tag */ - if ((c->x86 == 5) && (c->x86_model == 9)) + if (c->x86_vfm == INTEL_QUARK_X1000) size = 16; return size; } @@ -667,50 +656,58 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ #define TLB_0x63_2M_4M_ENTRIES 32 +struct _tlb_table { + unsigned char descriptor; + char tlb_type; + unsigned int entries; +}; + static const struct _tlb_table intel_tlb_table[] = { - { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, - { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, - { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, - { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, - { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, - { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, - { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, - { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, - { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, - { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, - { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, - { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, - { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, - { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, - { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, - { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, - { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, - { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, - { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, - { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative" - " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" }, - { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, - { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, - { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, - { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, - { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, - { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, - { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, - { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, - { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, - { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, - { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, - { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, - { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, - { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, - { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, - { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, + { 0x01, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages, 4-way set associative */ + { 0x02, TLB_INST_4M, 2}, /* TLB_INST 4 MByte pages, full associative */ + { 0x03, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way set associative */ + { 0x04, TLB_DATA_4M, 8}, /* TLB_DATA 4 MByte pages, 4-way set associative */ + { 0x05, TLB_DATA_4M, 32}, /* TLB_DATA 4 MByte pages, 4-way set associative */ + { 0x0b, TLB_INST_4M, 4}, /* TLB_INST 4 MByte pages, 4-way set associative */ + { 0x4f, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages */ + { 0x50, TLB_INST_ALL, 64}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + { 0x51, TLB_INST_ALL, 128}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + { 0x52, TLB_INST_ALL, 256}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + { 0x55, TLB_INST_2M_4M, 7}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + { 0x56, TLB_DATA0_4M, 16}, /* TLB_DATA0 4 MByte pages, 4-way set associative */ + { 0x57, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, 4-way associative */ + { 0x59, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, fully associative */ + { 0x5a, TLB_DATA0_2M_4M, 32}, /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */ + { 0x5b, TLB_DATA_4K_4M, 64}, /* TLB_DATA 4 KByte and 4 MByte pages */ + { 0x5c, TLB_DATA_4K_4M, 128}, /* TLB_DATA 4 KByte and 4 MByte pages */ + { 0x5d, TLB_DATA_4K_4M, 256}, /* TLB_DATA 4 KByte and 4 MByte pages */ + { 0x61, TLB_INST_4K, 48}, /* TLB_INST 4 KByte pages, full associative */ + { 0x63, TLB_DATA_1G_2M_4M, 4}, /* TLB_DATA 1 GByte pages, 4-way set associative + * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */ + { 0x6b, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 8-way associative */ + { 0x6c, TLB_DATA_2M_4M, 128}, /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */ + { 0x6d, TLB_DATA_1G, 16}, /* TLB_DATA 1 GByte pages, fully associative */ + { 0x76, TLB_INST_2M_4M, 8}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + { 0xb0, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 4-way set associative */ + { 0xb1, TLB_INST_2M_4M, 4}, /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */ + { 0xb2, TLB_INST_4K, 64}, /* TLB_INST 4KByte pages, 4-way set associative */ + { 0xb3, TLB_DATA_4K, 128}, /* TLB_DATA 4 KByte pages, 4-way set associative */ + { 0xb4, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 4-way associative */ + { 0xb5, TLB_INST_4K, 64}, /* TLB_INST 4 KByte pages, 8-way set associative */ + { 0xb6, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 8-way set associative */ + { 0xba, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way associative */ + { 0xc0, TLB_DATA_4K_4M, 8}, /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */ + { 0xc1, STLB_4K_2M, 1024}, /* STLB 4 KByte and 2 MByte pages, 8-way associative */ + { 0xc2, TLB_DATA_2M_4M, 16}, /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */ + { 0xca, STLB_4K, 512}, /* STLB 4 KByte pages, 4-way associative */ { 0x00, 0, 0 } }; static void intel_tlb_lookup(const unsigned char desc) { + unsigned int entries; unsigned char k; + if (desc == 0) return; @@ -722,81 +719,58 @@ static void intel_tlb_lookup(const unsigned char desc) if (intel_tlb_table[k].tlb_type == 0) return; + entries = intel_tlb_table[k].entries; switch (intel_tlb_table[k].tlb_type) { case STLB_4K: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k = max(tlb_lli_4k, entries); + tlb_lld_4k = max(tlb_lld_4k, entries); break; case STLB_4K_2M: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k = max(tlb_lli_4k, entries); + tlb_lld_4k = max(tlb_lld_4k, entries); + tlb_lli_2m = max(tlb_lli_2m, entries); + tlb_lld_2m = max(tlb_lld_2m, entries); + tlb_lli_4m = max(tlb_lli_4m, entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_INST_ALL: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k = max(tlb_lli_4k, entries); + tlb_lli_2m = max(tlb_lli_2m, entries); + tlb_lli_4m = max(tlb_lli_4m, entries); break; case TLB_INST_4K: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k = max(tlb_lli_4k, entries); break; case TLB_INST_4M: - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4m = max(tlb_lli_4m, entries); break; case TLB_INST_2M_4M: - if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_2m = max(tlb_lli_2m, entries); + tlb_lli_4m = max(tlb_lli_4m, entries); break; case TLB_DATA_4K: case TLB_DATA0_4K: - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_4k = max(tlb_lld_4k, entries); break; case TLB_DATA_4M: case TLB_DATA0_4M: - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_DATA_2M_4M: case TLB_DATA0_2M_4M: - if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_2m = max(tlb_lld_2m, entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_DATA_4K_4M: - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_4k = max(tlb_lld_4k, entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_DATA_1G_2M_4M: - if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) - tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; - if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) - tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + tlb_lld_2m = max(tlb_lld_2m, TLB_0x63_2M_4M_ENTRIES); + tlb_lld_4m = max(tlb_lld_4m, TLB_0x63_2M_4M_ENTRIES); fallthrough; case TLB_DATA_1G: - if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_1g = max(tlb_lld_1g, entries); break; } } @@ -891,34 +865,3 @@ static const struct cpu_dev intel_cpu_dev = { }; cpu_dev_register(intel_cpu_dev); - -#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 - -/** - * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU - * - * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in - * a hybrid processor. If the processor is not hybrid, returns 0. - */ -u8 get_this_hybrid_cpu_type(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - return 0; - - return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; -} - -/** - * get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU - * - * Returns the uarch native ID [23:0] of a CPU in a hybrid processor. - * If the processor is not hybrid, returns 0. - */ -u32 get_this_hybrid_cpu_native_id(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - return 0; - - return cpuid_eax(0x0000001a) & - (BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1); -} diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 4f3c65429f82..6af1e8baeb0f 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -6,6 +6,34 @@ #include <linux/slab.h> /** + * x86_match_vendor_cpu_type - helper function to match the hardware defined + * cpu-type for a single entry in the x86_cpu_id + * table. Note, this function does not match the + * generic cpu-types TOPO_CPU_TYPE_EFFICIENCY and + * TOPO_CPU_TYPE_PERFORMANCE. + * @c: Pointer to the cpuinfo_x86 structure of the CPU to match. + * @m: Pointer to the x86_cpu_id entry to match against. + * + * Return: true if the cpu-type matches, false otherwise. + */ +static bool x86_match_vendor_cpu_type(struct cpuinfo_x86 *c, const struct x86_cpu_id *m) +{ + if (m->type == X86_CPU_TYPE_ANY) + return true; + + /* Hybrid CPUs are special, they are assumed to match all cpu-types */ + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + return true; + + if (c->x86_vendor == X86_VENDOR_INTEL) + return m->type == c->topo.intel_type; + if (c->x86_vendor == X86_VENDOR_AMD) + return m->type == c->topo.amd_type; + + return false; +} + +/** * x86_match_cpu - match current CPU against an array of x86_cpu_ids * @match: Pointer to array of x86_cpu_ids. Last entry terminated with * {}. @@ -50,6 +78,8 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; + if (!x86_match_vendor_cpu_type(c, m)) + continue; return m; } return NULL; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f3d534807d91..819199bc0119 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -74,7 +74,7 @@ void intel_collect_cpu_info(struct cpu_signature *sig) sig->pf = 0; sig->rev = intel_get_microcode_revision(); - if (x86_model(sig->sig) >= 5 || x86_family(sig->sig) > 6) { + if (IFM(x86_family(sig->sig), x86_model(sig->sig)) >= INTEL_PENTIUM_III_DESCHUTES) { unsigned int val[2]; /* get processor flags from MSR 0x17 */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 2fdfda2b60e4..e2c6b471d230 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -9,9 +9,11 @@ #include <linux/io.h> #include <linux/mm.h> #include <linux/cc_platform.h> +#include <linux/string_choices.h> #include <asm/processor-flags.h> #include <asm/cacheinfo.h> #include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> #include <asm/hypervisor.h> #include <asm/mshyperv.h> #include <asm/tlbflush.h> @@ -646,10 +648,10 @@ static void __init print_mtrr_state(void) pr_info("MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { - pr_info("MTRR fixed ranges %sabled:\n", - ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && - (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ? - "en" : "dis"); + pr_info("MTRR fixed ranges %s:\n", + str_enabled_disabled( + (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED))); print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); for (i = 0; i < 2; ++i) print_fixed(0x80000 + i * 0x20000, 0x04000, @@ -661,8 +663,8 @@ static void __init print_mtrr_state(void) /* tail */ print_fixed_last(); } - pr_info("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); + pr_info("MTRR variable ranges %s:\n", + str_enabled_disabled(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)); high_width = (boot_cpu_data.x86_phys_bits - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { @@ -1025,8 +1027,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, * For Intel PPro stepping <= 7 * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF */ - if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && + if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO && boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index a5c506f6da7f..4049235b1bfe 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -99,7 +99,6 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) char *ptr; char line[LINE_SIZE]; int length; - size_t linelen; memset(line, 0, LINE_SIZE); @@ -108,9 +107,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) if (length < 0) return length; - linelen = strlen(line); - ptr = line + linelen - 1; - if (linelen && *ptr == '\n') + ptr = line + length - 1; + if (length && *ptr == '\n') *ptr = '\0'; if (!strncmp(line, "disable=", 8)) { diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 340af8155658..0be61c45400c 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -140,7 +140,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) x86_platform.guest.enc_kexec_begin(); x86_platform.guest.enc_kexec_finish(); - crash_save_cpu(regs, safe_smp_processor_id()); + crash_save_cpu(regs, smp_processor_id()); } #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b4905d5173fd..722fd712e1cf 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type) static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* @@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index f05339fee778..6c5defd6569a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); + unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *begin; /* diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 44f937015e1e..fc1714bad045 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -19,6 +19,7 @@ #include <linux/usb/ehci_def.h> #include <linux/usb/xhci-dbgp.h> #include <asm/pci_x86.h> +#include <linux/static_call.h> /* Simple VGA output */ #define VGABASE (__ISA_IO_base + 0xb8000) @@ -94,26 +95,28 @@ static unsigned long early_serial_base = 0x3f8; /* ttyS0 */ #define DLL 0 /* Divisor Latch Low */ #define DLH 1 /* Divisor latch High */ -static unsigned int io_serial_in(unsigned long addr, int offset) +static __noendbr unsigned int io_serial_in(unsigned long addr, int offset) { return inb(addr + offset); } +ANNOTATE_NOENDBR_SYM(io_serial_in); -static void io_serial_out(unsigned long addr, int offset, int value) +static __noendbr void io_serial_out(unsigned long addr, int offset, int value) { outb(value, addr + offset); } +ANNOTATE_NOENDBR_SYM(io_serial_out); -static unsigned int (*serial_in)(unsigned long addr, int offset) = io_serial_in; -static void (*serial_out)(unsigned long addr, int offset, int value) = io_serial_out; +DEFINE_STATIC_CALL(serial_in, io_serial_in); +DEFINE_STATIC_CALL(serial_out, io_serial_out); static int early_serial_putc(unsigned char ch) { unsigned timeout = 0xffff; - while ((serial_in(early_serial_base, LSR) & XMTRDY) == 0 && --timeout) + while ((static_call(serial_in)(early_serial_base, LSR) & XMTRDY) == 0 && --timeout) cpu_relax(); - serial_out(early_serial_base, TXR, ch); + static_call(serial_out)(early_serial_base, TXR, ch); return timeout ? 0 : -1; } @@ -131,16 +134,16 @@ static __init void early_serial_hw_init(unsigned divisor) { unsigned char c; - serial_out(early_serial_base, LCR, 0x3); /* 8n1 */ - serial_out(early_serial_base, IER, 0); /* no interrupt */ - serial_out(early_serial_base, FCR, 0); /* no fifo */ - serial_out(early_serial_base, MCR, 0x3); /* DTR + RTS */ + static_call(serial_out)(early_serial_base, LCR, 0x3); /* 8n1 */ + static_call(serial_out)(early_serial_base, IER, 0); /* no interrupt */ + static_call(serial_out)(early_serial_base, FCR, 0); /* no fifo */ + static_call(serial_out)(early_serial_base, MCR, 0x3); /* DTR + RTS */ - c = serial_in(early_serial_base, LCR); - serial_out(early_serial_base, LCR, c | DLAB); - serial_out(early_serial_base, DLL, divisor & 0xff); - serial_out(early_serial_base, DLH, (divisor >> 8) & 0xff); - serial_out(early_serial_base, LCR, c & ~DLAB); + c = static_call(serial_in)(early_serial_base, LCR); + static_call(serial_out)(early_serial_base, LCR, c | DLAB); + static_call(serial_out)(early_serial_base, DLL, divisor & 0xff); + static_call(serial_out)(early_serial_base, DLH, (divisor >> 8) & 0xff); + static_call(serial_out)(early_serial_base, LCR, c & ~DLAB); } #define DEFAULT_BAUD 9600 @@ -183,28 +186,26 @@ static __init void early_serial_init(char *s) /* Convert from baud to divisor value */ divisor = 115200 / baud; - /* These will always be IO based ports */ - serial_in = io_serial_in; - serial_out = io_serial_out; - /* Set up the HW */ early_serial_hw_init(divisor); } #ifdef CONFIG_PCI -static void mem32_serial_out(unsigned long addr, int offset, int value) +static __noendbr void mem32_serial_out(unsigned long addr, int offset, int value) { u32 __iomem *vaddr = (u32 __iomem *)addr; /* shift implied by pointer type */ writel(value, vaddr + offset); } +ANNOTATE_NOENDBR_SYM(mem32_serial_out); -static unsigned int mem32_serial_in(unsigned long addr, int offset) +static __noendbr unsigned int mem32_serial_in(unsigned long addr, int offset) { u32 __iomem *vaddr = (u32 __iomem *)addr; /* shift implied by pointer type */ return readl(vaddr + offset); } +ANNOTATE_NOENDBR_SYM(mem32_serial_in); /* * early_pci_serial_init() @@ -278,15 +279,13 @@ static __init void early_pci_serial_init(char *s) */ if ((bar0 & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { /* it is IO mapped */ - serial_in = io_serial_in; - serial_out = io_serial_out; early_serial_base = bar0 & PCI_BASE_ADDRESS_IO_MASK; write_pci_config(bus, slot, func, PCI_COMMAND, cmdreg|PCI_COMMAND_IO); } else { /* It is memory mapped - assume 32-bit alignment */ - serial_in = mem32_serial_in; - serial_out = mem32_serial_out; + static_call_update(serial_in, mem32_serial_in); + static_call_update(serial_out, mem32_serial_out); /* WARNING! assuming the address is always in the first 4G */ early_serial_base = (unsigned long)early_ioremap(bar0 & PCI_BASE_ADDRESS_MEM_MASK, 0x10); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 166bc0ea3bdf..cace6e8d7cc7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -118,13 +118,10 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code, return ret; /* replace the text with the new text */ - if (ftrace_poke_late) { + if (ftrace_poke_late) text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); - } else { - mutex_lock(&text_mutex); - text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE); - mutex_unlock(&text_mutex); - } + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } @@ -321,7 +318,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; - void *ret; + int ret; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { start_offset = (unsigned long)ftrace_regs_caller; @@ -352,15 +349,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = text_poke_copy(trampoline, (void *)start_offset, size); - if (WARN_ON(!ret)) + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); + if (WARN_ON(ret < 0)) goto fail; ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else - text_poke_copy(ip, retq, sizeof(retq)); + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { @@ -368,7 +365,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + (jmp_offset - start_offset); if (WARN_ON(*(char *)ip != 0x75)) goto fail; - if (!text_poke_copy(ip, x86_nops[2], 2)) + ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); + if (ret < 0) goto fail; } @@ -381,7 +379,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) */ ptr = (unsigned long *)(trampoline + size + RET_SIZE); - text_poke_copy(ptr, &ops, sizeof(unsigned long)); + *ptr = (unsigned long)ops; op_offset -= start_offset; memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); @@ -397,7 +395,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_ptr.offset = offset; /* put in the new offset to the ftrace_ops */ - text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); /* put in the call to the function */ mutex_lock(&text_mutex); @@ -407,9 +405,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) * the depth accounting before the call already. */ dest = ftrace_ops_get_func(ops); - text_poke_copy_locked(trampoline + call_offset, - text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), - CALL_INSN_SIZE, false); + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), + CALL_INSN_SIZE); mutex_unlock(&text_mutex); /* ALLOC_TRAMP flags lets us know we created it */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index d51647228596..367da3638167 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -146,12 +146,14 @@ SYM_FUNC_END(ftrace_stub_graph) #ifdef CONFIG_DYNAMIC_FTRACE SYM_FUNC_START(__fentry__) + ANNOTATE_NOENDBR CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) SYM_FUNC_START(ftrace_caller) + ANNOTATE_NOENDBR /* save_mcount_regs fills in first two parameters */ save_mcount_regs @@ -197,6 +199,7 @@ SYM_FUNC_END(ftrace_caller); STACK_FRAME_NON_STANDARD_FP(ftrace_caller) SYM_FUNC_START(ftrace_regs_caller) + ANNOTATE_NOENDBR /* Save the current flags before any operations that can change them */ pushfq @@ -310,6 +313,7 @@ SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) SYM_FUNC_START(ftrace_stub_direct_tramp) + ANNOTATE_NOENDBR CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(ftrace_stub_direct_tramp) @@ -317,6 +321,7 @@ SYM_FUNC_END(ftrace_stub_direct_tramp) #else /* ! CONFIG_DYNAMIC_FTRACE */ SYM_FUNC_START(__fentry__) + ANNOTATE_NOENDBR CALL_DEPTH_ACCOUNT cmpq $ftrace_stub, ftrace_trace_function diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 368157a7f6d2..fa9b6339975f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -565,7 +565,7 @@ void early_setup_idt(void) */ void __head startup_64_setup_gdt_idt(void) { - struct desc_struct *gdt = (void *)(__force unsigned long)init_per_cpu_var(gdt_page.gdt); + struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt; void *handler = NULL; struct desc_ptr startup_gdt_descr = { diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 31345e0ba006..fefe2a25cf02 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64) /* Set up the stack for verify_cpu() */ leaq __top_init_kernel_stack(%rip), %rsp - /* Setup GSBASE to allow stack canary access for C code */ + /* + * Set up GSBASE. + * Note that on SMP the boot CPU uses the init data section until + * the per-CPU areas are set up. + */ movl $MSR_GS_BASE, %ecx - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx - movl %edx, %eax - shrq $32, %rdx + xorl %eax, %eax + xorl %edx, %edx wrmsr call startup_64_setup_gdt_idt @@ -319,7 +322,7 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) * * RDX contains the per-cpu offset */ - movq pcpu_hot + X86_current_task(%rdx), %rax + movq current_task(%rdx), %rax movq TASK_threadsp(%rax), %rsp /* @@ -359,17 +362,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) movl %eax,%fs movl %eax,%gs - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. If the - * stack protector canary is enabled, it is located at %gs:40. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx -#ifndef CONFIG_SMP - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx -#endif movl %edx, %eax shrq $32, %rdx wrmsr @@ -435,7 +433,7 @@ SYM_CODE_START(soft_restart_cpu) UNWIND_HINT_END_OF_STACK /* Find the idle task stack */ - movq PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx + movq PER_CPU_VAR(current_task), %rcx movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index e2fab3ceb09f..6290dd120f5e 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -144,7 +144,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * Update the sequence number to force a TSS update on return to * user mode. */ - iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); + iobm->sequence = atomic64_inc_return(&io_bitmap_sequence); return 0; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index feca4f20b06a..81f9b78e0f7b 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -33,6 +33,11 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); +EXPORT_PER_CPU_SYMBOL(__softirq_pending); + +DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); + atomic_t irq_err_count; /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index dc1049c01f9b..c7a5d2960d57 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -29,12 +29,9 @@ int sysctl_panic_on_stackoverflow __read_mostly; /* Debugging check for stack overflow: is there less than 1KB free? */ -static int check_stack_overflow(void) +static bool check_stack_overflow(void) { - long sp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (sp) : "0" (THREAD_SIZE - 1)); + unsigned long sp = current_stack_pointer & (THREAD_SIZE - 1); return sp < (sizeof(struct thread_info) + STACK_WARN); } @@ -48,18 +45,19 @@ static void print_stack_overflow(void) } #else -static inline int check_stack_overflow(void) { return 0; } +static inline bool check_stack_overflow(void) { return false; } static inline void print_stack_overflow(void) { } #endif +DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); + static void call_on_stack(void *func, void *stack) { - asm volatile("xchgl %%ebx,%%esp \n" + asm volatile("xchgl %[sp], %%esp\n" CALL_NOSPEC - "movl %%ebx,%%esp \n" - : "=b" (stack) - : "0" (stack), - [thunk_target] "D"(func) + "movl %[sp], %%esp" + : [sp] "+b" (stack) + : [thunk_target] "D" (func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -68,13 +66,13 @@ static inline void *current_stack(void) return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } -static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) +static inline bool execute_on_irq_stack(bool overflow, struct irq_desc *desc) { struct irq_stack *curstk, *irqstk; - u32 *isp, *prev_esp, arg1; + u32 *isp, *prev_esp; curstk = (struct irq_stack *) current_stack(); - irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr); + irqstk = __this_cpu_read(hardirq_stack_ptr); /* * this is where we switch to the IRQ stack. However, if we are @@ -83,7 +81,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) * current stack (which is the irq stack already after all) */ if (unlikely(curstk == irqstk)) - return 0; + return false; isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); @@ -94,14 +92,13 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); - asm volatile("xchgl %%ebx,%%esp \n" + asm volatile("xchgl %[sp], %%esp\n" CALL_NOSPEC - "movl %%ebx,%%esp \n" - : "=a" (arg1), "=b" (isp) - : "0" (desc), "1" (isp), - [thunk_target] "D" (desc->handle_irq) - : "memory", "cc", "ecx"); - return 1; + "movl %[sp], %%esp" + : "+a" (desc), [sp] "+b" (isp) + : [thunk_target] "D" (desc->handle_irq) + : "memory", "cc", "edx", "ecx"); + return true; } /* @@ -112,7 +109,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) int node = cpu_to_node(cpu); struct page *ph, *ps; - if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) + if (per_cpu(hardirq_stack_ptr, cpu)) return 0; ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); @@ -124,8 +121,8 @@ int irq_init_percpu_irqstack(unsigned int cpu) return -ENOMEM; } - per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph); - per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps); + per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); + per_cpu(softirq_stack_ptr, cpu) = page_address(ps); return 0; } @@ -135,7 +132,7 @@ void do_softirq_own_stack(void) struct irq_stack *irqstk; u32 *isp, *prev_esp; - irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr); + irqstk = __this_cpu_read(softirq_stack_ptr); /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); @@ -150,7 +147,7 @@ void do_softirq_own_stack(void) void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - int overflow = check_stack_overflow(); + bool overflow = check_stack_overflow(); if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) { if (unlikely(overflow)) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ade0043ce56e..ca78dce39361 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,8 +26,8 @@ #include <asm/io_apic.h> #include <asm/apic.h> +DEFINE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible; -DECLARE_INIT_PER_CPU(irq_stack_backing_store); #ifdef CONFIG_VMAP_STACK /* @@ -51,7 +51,7 @@ static int map_irq_stack(unsigned int cpu) return -ENOMEM; /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #else @@ -64,14 +64,14 @@ static int map_irq_stack(unsigned int cpu) void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #endif int irq_init_percpu_irqstack(unsigned int cpu) { - if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) + if (per_cpu(hardirq_stack_ptr, cpu)) return 0; return map_irq_stack(cpu); } diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S index 7f542a7799cb..fdabd5dda154 100644 --- a/arch/x86/kernel/irqflags.S +++ b/arch/x86/kernel/irqflags.S @@ -9,6 +9,7 @@ */ .pushsection .noinstr.text, "ax" SYM_FUNC_START(native_save_fl) + ENDBR pushf pop %_ASM_AX RET diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 72e6a45e7ec2..09608fd93687 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -373,16 +373,7 @@ out: kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - u32 insn; - - /* - * Since 'addr' is not guaranteed to be safe to access, use - * copy_from_kernel_nofault() to read the instruction: - */ - if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) - return NULL; - - if (is_endbr(insn)) { + if (is_endbr((u32 *)addr)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7a422a6c5983..3be9b3342c67 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -838,7 +838,6 @@ static void __init kvm_guest_init(void) #ifdef CONFIG_SMP if (pv_tlb_flush_supported()) { pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi; - pv_ops.mmu.tlb_remove_table = tlb_remove_table; pr_info("KVM setup pv remote TLB flush\n"); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8984abd91c00..a7998f351701 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -19,6 +19,7 @@ #include <linux/jump_label.h> #include <linux/random.h> #include <linux/memory.h> +#include <linux/stackprotector.h> #include <asm/text-patching.h> #include <asm/page.h> @@ -130,6 +131,20 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, goto overflow; size = 4; break; +#if defined(CONFIG_STACKPROTECTOR) && \ + defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000 + case R_X86_64_REX_GOTPCRELX: { + static unsigned long __percpu *const addr = &__stack_chk_guard; + + if (sym->st_value != (u64)addr) { + pr_err("%s: Unsupported GOTPCREL relocation\n", me->name); + return -ENOEXEC; + } + + val = (u64)&addr + rel[i].r_addend; + fallthrough; + } +#endif case R_X86_64_PC32: case R_X86_64_PLT32: val -= (u64)loc; @@ -146,21 +161,18 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, } if (apply) { - void *wr_loc = module_writable_address(me, loc); - - if (memcmp(wr_loc, &zero, size)) { + if (memcmp(loc, &zero, size)) { pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - write(wr_loc, &val, size); + write(loc, &val, size); } else { if (memcmp(loc, &val, size)) { pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - /* FIXME: needs care for ROX module allocations */ write(loc, &zero, size); } } @@ -227,7 +239,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *alt = NULL, + const Elf_Shdr *s, *alt = NULL, *locks = NULL, *orc = NULL, *orc_ip = NULL, *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, *calls = NULL, *cfi = NULL; @@ -236,6 +248,8 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks = s; if (!strcmp(".orc_unwind", secstrings + s->sh_name)) orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) @@ -266,60 +280,33 @@ int module_finalize(const Elf_Ehdr *hdr, csize = cfi->sh_size; } - apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me); + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; - apply_retpolines(rseg, rseg + retpolines->sh_size, me); + apply_retpolines(rseg, rseg + retpolines->sh_size); } if (returns) { void *rseg = (void *)returns->sh_addr; - apply_returns(rseg, rseg + returns->sh_size, me); - } - if (alt) { - /* patch .altinstructions */ - void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size, me); + apply_returns(rseg, rseg + returns->sh_size); } - if (calls || alt) { + if (calls) { struct callthunk_sites cs = {}; - if (calls) { - cs.call_start = (void *)calls->sh_addr; - cs.call_end = (void *)calls->sh_addr + calls->sh_size; - } - - if (alt) { - cs.alt_start = (void *)alt->sh_addr; - cs.alt_end = (void *)alt->sh_addr + alt->sh_size; - } + cs.call_start = (void *)calls->sh_addr; + cs.call_end = (void *)calls->sh_addr + calls->sh_size; callthunks_patch_module_calls(&cs, me); } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); } - - if (orc && orc_ip) - unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, - (void *)orc->sh_addr, orc->sh_size); - - return 0; -} - -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *locks = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; - } - if (locks) { void *lseg = (void *)locks->sh_addr; void *text = me->mem[MOD_TEXT].base; @@ -329,6 +316,10 @@ int module_post_finalize(const Elf_Ehdr *hdr, text, text_end); } + if (orc && orc_ip) + unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, + (void *)orc->sh_addr, orc->sh_size); + return 0; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ed163c8c8604..9a95d00f1423 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -40,8 +40,12 @@ #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> +/* + * An emergency handler can be set in any context including NMI + */ struct nmi_desc { raw_spinlock_t lock; + nmi_handler_t emerg_handler; struct list_head head; }; @@ -132,9 +136,22 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration) static int nmi_handle(unsigned int type, struct pt_regs *regs) { struct nmi_desc *desc = nmi_to_desc(type); + nmi_handler_t ehandler; struct nmiaction *a; int handled=0; + /* + * Call the emergency handler, if set + * + * In the case of crash_nmi_callback() emergency handler, it will + * return in the case of the crashing CPU to enable it to complete + * other necessary crashing actions ASAP. Other handlers in the + * linked list won't need to be run. + */ + ehandler = desc->emerg_handler; + if (ehandler) + return ehandler(type, regs); + rcu_read_lock(); /* @@ -224,6 +241,31 @@ void unregister_nmi_handler(unsigned int type, const char *name) } EXPORT_SYMBOL_GPL(unregister_nmi_handler); +/** + * set_emergency_nmi_handler - Set emergency handler + * @type: NMI type + * @handler: the emergency handler to be stored + * + * Set an emergency NMI handler which, if set, will preempt all the other + * handlers in the linked list. If a NULL handler is passed in, it will clear + * it. It is expected that concurrent calls to this function will not happen + * or the system is screwed beyond repair. + */ +void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler) +{ + struct nmi_desc *desc = nmi_to_desc(type); + + if (WARN_ON_ONCE(desc->emerg_handler == handler)) + return; + desc->emerg_handler = handler; + + /* + * Ensure the emergency handler is visible to other CPUs before + * function return + */ + smp_wmb(); +} + static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1ccaa3397a67..97925632c28e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -59,21 +59,6 @@ void __init native_pv_lock_init(void) static_branch_enable(&virt_spin_lock_key); } -#ifndef CONFIG_PT_RECLAIM -static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct ptdesc *ptdesc = (struct ptdesc *)table; - - pagetable_dtor(ptdesc); - tlb_remove_page(tlb, ptdesc_page(ptdesc)); -} -#else -static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - tlb_remove_table(tlb, table); -} -#endif - struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -90,30 +75,20 @@ void paravirt_set_sched_clock(u64 (*func)(void)) static_call_update(pv_sched_clock, func); } -/* These are in entry.S */ -static struct resource reserve_ioports = { - .start = 0, - .end = IO_SPACE_LIMIT, - .name = "paravirt-ioport", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; +#ifdef CONFIG_PARAVIRT_XXL +static noinstr void pv_native_write_cr2(unsigned long val) +{ + native_write_cr2(val); +} -/* - * Reserve the whole legacy IO space to prevent any legacy drivers - * from wasting time probing for their hardware. This is a fairly - * brute-force approach to disabling all non-virtual drivers. - * - * Note that this must be called very early to have any effect. - */ -int paravirt_disable_iospace(void) +static noinstr unsigned long pv_native_read_cr3(void) { - return request_resource(&ioport_resource, &reserve_ioports); + return __native_read_cr3(); } -#ifdef CONFIG_PARAVIRT_XXL -static noinstr void pv_native_write_cr2(unsigned long val) +static noinstr void pv_native_write_cr3(unsigned long cr3) { - native_write_cr2(val); + native_write_cr3(cr3); } static noinstr unsigned long pv_native_get_debugreg(int regno) @@ -195,7 +170,6 @@ struct paravirt_patch_template pv_ops = { .mmu.flush_tlb_kernel = native_flush_tlb_global, .mmu.flush_tlb_one_user = native_flush_tlb_one_user, .mmu.flush_tlb_multi = native_flush_tlb_multi, - .mmu.tlb_remove_table = native_tlb_remove_table, .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, @@ -203,8 +177,8 @@ struct paravirt_patch_template pv_ops = { #ifdef CONFIG_PARAVIRT_XXL .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2), .mmu.write_cr2 = pv_native_write_cr2, - .mmu.read_cr3 = __native_read_cr3, - .mmu.write_cr3 = native_write_cr3, + .mmu.read_cr3 = pv_native_read_cr3, + .mmu.write_cr3 = pv_native_write_cr3, .mmu.pgd_alloc = __paravirt_pgd_alloc, .mmu.pgd_free = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6da6769d7254..5452237fabd4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1043,7 +1043,7 @@ unsigned long __get_wchan(struct task_struct *p) return addr; } -long do_arch_prctl_common(int option, unsigned long arg2) +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) { switch (option) { case ARCH_GET_CPUID: @@ -1058,5 +1058,13 @@ long do_arch_prctl_common(int option, unsigned long arg2) return fpu_xstate_prctl(option, arg2); } + if (!in_ia32_syscall()) + return do_arch_prctl_64(current, option, arg2); + return -EINVAL; } + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0917c7f25720..4636ef359973 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -190,13 +190,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0 and pcpu_hot.top_of_stack. This changes + * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ update_task_stack(next_p); refresh_sysenter_cs(next); - this_cpu_write(pcpu_hot.top_of_stack, + this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); @@ -206,7 +206,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - raw_cpu_write(pcpu_hot.current_task, next_p); + raw_cpu_write(current_task, next_p); switch_fpu_finish(next_p); @@ -215,8 +215,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } - -SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) -{ - return do_arch_prctl_common(option, arg2); -} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 226472332a70..7196ca7048be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -614,7 +614,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && - this_cpu_read(pcpu_hot.hardirq_stack_inuse)); + this_cpu_read(hardirq_stack_inuse)); if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_p, cpu); @@ -668,8 +668,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - raw_cpu_write(pcpu_hot.current_task, next_p); - raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); + raw_cpu_write(current_task, next_p); + raw_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); switch_fpu_finish(next_p); @@ -942,7 +942,7 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) case ARCH_MAP_VDSO_X32: return prctl_map_vdso(&vdso_image_x32, arg2); # endif -# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION +# ifdef CONFIG_IA32_EMULATION case ARCH_MAP_VDSO_32: return prctl_map_vdso(&vdso_image_32, arg2); # endif @@ -979,26 +979,3 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) return ret; } - -SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) -{ - long ret; - - ret = do_arch_prctl_64(current, option, arg2); - if (ret == -EINVAL) - ret = do_arch_prctl_common(option, arg2); - - return ret; -} - -#ifdef CONFIG_IA32_EMULATION -COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) -{ - return do_arch_prctl_common(option, arg2); -} -#endif - -unsigned long KSTK_ESP(struct task_struct *task) -{ - return task_pt_regs(task)->sp; -} diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index dc1dd3f3e67f..964f6b0a3d68 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -921,20 +921,16 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) return; /* Make a note of crashing cpu. Will be used in NMI callback. */ - crashing_cpu = safe_smp_processor_id(); + crashing_cpu = smp_processor_id(); shootdown_callback = callback; atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - /* Would it be better to replace the trap vector here? */ - if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, - NMI_FLAG_FIRST, "crash")) - return; /* Return what? */ + /* - * Ensure the new callback function is set before sending - * out the NMI + * Set emergency handler to preempt other handlers. */ - wmb(); + set_emergency_nmi_handler(NMI_LOCAL, crash_nmi_callback); apic_send_IPI_allbutself(NMI_VECTOR); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cebee310e200..9f8ff3aad4f4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -56,6 +56,9 @@ #include <asm/unwind.h> #include <asm/vsyscall.h> #include <linux/vmalloc.h> +#if defined(CONFIG_X86_LOCAL_APIC) +#include <asm/nmi.h> +#endif /* * max_low_pfn_mapped: highest directly mapped pfn < 4 GB @@ -146,6 +149,69 @@ static size_t ima_kexec_buffer_size; /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ int bootloader_type, bootloader_version; +static const struct ctl_table x86_sysctl_table[] = { + { + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "panic_on_io_nmi", + .data = &panic_on_io_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "bootloader_type", + .data = &bootloader_type, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "bootloader_version", + .data = &bootloader_version, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "io_delay_type", + .data = &io_delay_type, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#if defined(CONFIG_X86_LOCAL_APIC) + { + .procname = "unknown_nmi_panic", + .data = &unknown_nmi_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#if defined(CONFIG_ACPI_SLEEP) + { + .procname = "acpi_video_flags", + .data = &acpi_realmode_flags, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, +#endif +}; + +static int __init init_x86_sysctl(void) +{ + register_sysctl_init("kernel", x86_sysctl_table); + return 0; +} +arch_initcall(init_x86_sysctl); + /* * Setup options */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b30d6e180df7..bfa48e7a32a2 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -23,18 +23,13 @@ #include <asm/cpumask.h> #include <asm/cpu.h> -#ifdef CONFIG_X86_64 -#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) -#else -#define BOOT_PERCPU_OFFSET 0 -#endif +DEFINE_PER_CPU_CACHE_HOT(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); -DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +DEFINE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { - [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, -}; +unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init; EXPORT_SYMBOL(__per_cpu_offset); /* @@ -169,7 +164,7 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(pcpu_hot.cpu_number, cpu) = cpu; + per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index ef654530bf5a..98123ff10506 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -33,25 +33,55 @@ #include <asm/smap.h> #include <asm/gsseg.h> +/* + * The first GDT descriptor is reserved as 'NULL descriptor'. As bits 0 + * and 1 of a segment selector, i.e., the RPL bits, are NOT used to index + * GDT, selector values 0~3 all point to the NULL descriptor, thus values + * 0, 1, 2 and 3 are all valid NULL selector values. + * + * However IRET zeros ES, FS, GS, and DS segment registers if any of them + * is found to have any nonzero NULL selector value, which can be used by + * userspace in pre-FRED systems to spot any interrupt/exception by loading + * a nonzero NULL selector and waiting for it to become zero. Before FRED + * there was nothing software could do to prevent such an information leak. + * + * ERETU, the only legit instruction to return to userspace from kernel + * under FRED, by design does NOT zero any segment register to avoid this + * problem behavior. + * + * As such, leave NULL selector values 0~3 unchanged. + */ +static inline u16 fixup_rpl(u16 sel) +{ + return sel <= 3 ? sel : sel | 3; +} + #ifdef CONFIG_IA32_EMULATION #include <asm/unistd_32_ia32.h> static inline void reload_segments(struct sigcontext_32 *sc) { - unsigned int cur; + u16 cur; + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ savesegment(gs, cur); - if ((sc->gs | 0x03) != cur) - load_gs_index(sc->gs | 0x03); + if (fixup_rpl(sc->gs) != cur) + load_gs_index(fixup_rpl(sc->gs)); savesegment(fs, cur); - if ((sc->fs | 0x03) != cur) - loadsegment(fs, sc->fs | 0x03); + if (fixup_rpl(sc->fs) != cur) + loadsegment(fs, fixup_rpl(sc->fs)); + savesegment(ds, cur); - if ((sc->ds | 0x03) != cur) - loadsegment(ds, sc->ds | 0x03); + if (fixup_rpl(sc->ds) != cur) + loadsegment(ds, fixup_rpl(sc->ds)); savesegment(es, cur); - if ((sc->es | 0x03) != cur) - loadsegment(es, sc->es | 0x03); + if (fixup_rpl(sc->es) != cur) + loadsegment(es, fixup_rpl(sc->es)); } #define sigset32_t compat_sigset_t @@ -105,18 +135,12 @@ static bool ia32_restore_sigcontext(struct pt_regs *regs, regs->orig_ax = -1; #ifdef CONFIG_IA32_EMULATION - /* - * Reload fs and gs if they have changed in the signal - * handler. This does not handle long fs/gs base changes in - * the handler, but does not clobber them at least in the - * normal case. - */ reload_segments(&sc); #else - loadsegment(gs, sc.gs); - regs->fs = sc.fs; - regs->es = sc.es; - regs->ds = sc.ds; + loadsegment(gs, fixup_rpl(sc.gs)); + regs->fs = fixup_rpl(sc.fs); + regs->es = fixup_rpl(sc.es); + regs->ds = fixup_rpl(sc.ds); #endif return fpu__restore_sig(compat_ptr(sc.fpstate), 1); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c10850ae6f09..d6cf1e23c2a3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -190,7 +190,7 @@ static void ap_starting(void) apic_ap_setup(); /* Save the processor parameters. */ - smp_store_cpu_info(cpuid); + identify_secondary_cpu(cpuid); /* * The topology information must be up to date before @@ -215,7 +215,7 @@ static void ap_calibrate_delay(void) { /* * Calibrate the delay loop and update loops_per_jiffy in cpu_data. - * smp_store_cpu_info() stored a value that is close but not as + * identify_secondary_cpu() stored a value that is close but not as * accurate as the value just calculated. * * As this is invoked after the TSC synchronization check, @@ -229,7 +229,7 @@ static void ap_calibrate_delay(void) /* * Activate a secondary processor. */ -static void notrace start_secondary(void *unused) +static void notrace __noendbr start_secondary(void *unused) { /* * Don't put *anything* except direct CPU state initialization @@ -314,26 +314,7 @@ static void notrace start_secondary(void *unused) wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ -void smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - /* Copy boot_cpu_data only on the first bringup */ - if (!c->initialized) - *c = boot_cpu_data; - c->cpu_index = id; - /* - * During boot time, CPU0 has this setup already. Save the info when - * bringing up an AP. - */ - identify_secondary_cpu(c); - c->initialized = true; -} +ANNOTATE_NOENDBR_SYM(start_secondary); static bool topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) @@ -654,10 +635,9 @@ static void impress_friends(void) * But that slows boot and resume on modern processors, which include * many cores and don't require that delay. * - * Cmdline "init_cpu_udelay=" is available to over-ride this delay. - * Modern processor families are quirked to remove the delay entirely. + * Cmdline "cpu_init_udelay=" is available to override this delay. */ -#define UDELAY_10MS_DEFAULT 10000 +#define UDELAY_10MS_LEGACY 10000 static unsigned int init_udelay = UINT_MAX; @@ -669,21 +649,21 @@ static int __init cpu_init_udelay(char *str) } early_param("cpu_init_udelay", cpu_init_udelay); -static void __init smp_quirk_init_udelay(void) +static void __init smp_set_init_udelay(void) { /* if cmdline changed it from default, leave it alone */ if (init_udelay != UINT_MAX) return; /* if modern processor, use no delay */ - if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || - ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) || - ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) { + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO) || + (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 >= 0x18) || + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0xF)) { init_udelay = 0; return; } /* else, use legacy delay */ - init_udelay = UDELAY_10MS_DEFAULT; + init_udelay = UDELAY_10MS_LEGACY; } /* @@ -841,7 +821,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); - per_cpu(pcpu_hot.current_task, cpu) = idle; + per_cpu(current_task, cpu) = idle; cpu_init_stack_canary(cpu, idle); /* Initialize the interrupt stack(s) */ @@ -851,7 +831,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); #endif return 0; } @@ -1094,7 +1074,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) uv_system_init(); - smp_quirk_init_udelay(); + smp_set_init_udelay(); speculative_store_bypass_ht_init(); @@ -1262,43 +1242,9 @@ void play_dead_common(void) * We need to flush the caches before going to sleep, lest we have * dirty data in our caches when we come back up. */ -static inline void mwait_play_dead(void) +void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_LEAF_MWAIT; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } /* Set up state for the kexec() hack below */ md->status = CPUDEAD_MWAIT_WAIT; @@ -1319,7 +1265,7 @@ static inline void mwait_play_dead(void) mb(); __monitor(md, 0, 0); mb(); - __mwait(eax, 0); + __mwait(eax_hint, 0); if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { /* @@ -1391,9 +1337,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5e3e036e6e53..9f88b8a78e50 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -94,10 +94,20 @@ __always_inline int is_valid_bugaddr(unsigned long addr) /* * Check for UD1 or UD2, accounting for Address Size Override Prefixes. - * If it's a UD1, get the ModRM byte to pass along to UBSan. + * If it's a UD1, further decode to determine its use: + * + * FineIBT: ea (bad) + * FineIBT: f0 75 f9 lock jne . - 6 + * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax + * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax + * static_call: 0f b9 cc ud1 %esp,%ecx + * + * Notably UBSAN uses EAX, static_call uses ECX. */ -__always_inline int decode_bug(unsigned long addr, u32 *imm) +__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) { + unsigned long start = addr; + bool lock = false; u8 v; if (addr < TASK_SIZE_MAX) @@ -106,28 +116,67 @@ __always_inline int decode_bug(unsigned long addr, u32 *imm) v = *(u8 *)(addr++); if (v == INSN_ASOP) v = *(u8 *)(addr++); - if (v != OPCODE_ESCAPE) + + if (v == INSN_LOCK) { + lock = true; + v = *(u8 *)(addr++); + } + + switch (v) { + case 0x70 ... 0x7f: /* Jcc.d8 */ + addr += 1; /* d8 */ + *len = addr - start; + WARN_ON_ONCE(!lock); + return BUG_LOCK; + + case 0xea: + *len = addr - start; + return BUG_EA; + + case OPCODE_ESCAPE: + break; + + default: return BUG_NONE; + } v = *(u8 *)(addr++); - if (v == SECOND_BYTE_OPCODE_UD2) + if (v == SECOND_BYTE_OPCODE_UD2) { + *len = addr - start; return BUG_UD2; + } - if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1) + if (v != SECOND_BYTE_OPCODE_UD1) return BUG_NONE; - /* Retrieve the immediate (type value) for the UBSAN UD1 */ - v = *(u8 *)(addr++); - if (X86_MODRM_RM(v) == 4) - addr++; - *imm = 0; - if (X86_MODRM_MOD(v) == 1) - *imm = *(u8 *)addr; - else if (X86_MODRM_MOD(v) == 2) - *imm = *(u32 *)addr; - else - WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v)); + v = *(u8 *)(addr++); /* ModRM */ + + if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4) + addr++; /* SIB */ + + /* Decode immediate, if present */ + switch (X86_MODRM_MOD(v)) { + case 0: if (X86_MODRM_RM(v) == 5) + addr += 4; /* RIP + disp32 */ + break; + + case 1: *imm = *(s8 *)addr; + addr += 1; + break; + + case 2: *imm = *(s32 *)addr; + addr += 4; + break; + + case 3: break; + } + + /* record instruction length */ + *len = addr - start; + + if (X86_MODRM_REG(v) == 0) /* EAX */ + return BUG_UD1_UBSAN; return BUG_UD1; } @@ -257,11 +306,12 @@ static inline void handle_invalid_op(struct pt_regs *regs) static noinstr bool handle_bug(struct pt_regs *regs) { + unsigned long addr = regs->ip; bool handled = false; - int ud_type; - u32 imm; + int ud_type, ud_len; + s32 ud_imm; - ud_type = decode_bug(regs->ip, &imm); + ud_type = decode_bug(addr, &ud_imm, &ud_len); if (ud_type == BUG_NONE) return handled; @@ -281,15 +331,47 @@ static noinstr bool handle_bug(struct pt_regs *regs) */ if (regs->flags & X86_EFLAGS_IF) raw_local_irq_enable(); - if (ud_type == BUG_UD2) { - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || - handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { - regs->ip += LEN_UD2; + + switch (ud_type) { + case BUG_UD2: + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { handled = true; + break; } - } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) { - pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip); + fallthrough; + + case BUG_EA: + case BUG_LOCK: + if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { + handled = true; + break; + } + break; + + case BUG_UD1_UBSAN: + if (IS_ENABLED(CONFIG_UBSAN_TRAP)) { + pr_crit("%s at %pS\n", + report_ubsan_failure(regs, ud_imm), + (void *)regs->ip); + } + break; + + default: + break; + } + + /* + * When continuing, and regs->ip hasn't changed, move it to the next + * instruction. When not continuing execution, restore the instruction + * pointer. + */ + if (handled) { + if (regs->ip == addr) + regs->ip += ud_len; + } else { + regs->ip = addr; } + if (regs->flags & X86_EFLAGS_IF) raw_local_irq_disable(); instrumentation_end(); diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index deeb02825670..48e6cc1cb017 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -152,7 +152,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = { X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &freq_desc_byt), X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &freq_desc_tng), X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &freq_desc_cht), - X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &freq_desc_ann), + X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2, &freq_desc_ann), X86_MATCH_VFM(INTEL_ATOM_AIRMONT_NP, &freq_desc_lgm), {} }; diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 1258a5872d12..37ad43792452 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -29,8 +29,12 @@ */ #include <asm/cpufeatures.h> +#include <asm/cpufeaturemasks.h> #include <asm/msr-index.h> +#define SSE_MASK \ + (REQUIRED_MASK0 & ((1<<(X86_FEATURE_XMM & 31)) | (1<<(X86_FEATURE_XMM2 & 31)))) + SYM_FUNC_START_LOCAL(verify_cpu) pushf # Save caller passed flags push $0 # Kill any dangerous flags diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0deb4887d6e9..ccdc45e5b759 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -43,7 +43,8 @@ ENTRY(phys_startup_64) #endif jiffies = jiffies_64; -const_pcpu_hot = pcpu_hot; +const_current_task = current_task; +const_cpu_current_top_of_stack = cpu_current_top_of_stack; #if defined(CONFIG_X86_64) /* @@ -112,12 +113,6 @@ ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(6); /* RW_ */ -#ifdef CONFIG_X86_64 -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(6); /* RW_ */ -#endif - init PT_LOAD FLAGS(7); /* RWE */ -#endif note PT_NOTE FLAGS(0); /* ___ */ } @@ -193,6 +188,8 @@ SECTIONS PAGE_ALIGNED_DATA(PAGE_SIZE) + CACHE_HOT_DATA(L1_CACHE_BYTES) + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) DATA_DATA @@ -216,21 +213,7 @@ SECTIONS __init_begin = .; /* paired with __init_end */ } -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - .init.text - should - * start another segment - init. - */ - PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) - ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START, - "per-CPU data too large - increase CONFIG_PHYSICAL_START") -#endif - INIT_TEXT_SECTION(PAGE_SIZE) -#ifdef CONFIG_X86_64 - :init -#endif /* * Section for code used exclusively before alternatives are run. All @@ -347,9 +330,8 @@ SECTIONS EXIT_DATA } -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU_SECTION(INTERNODE_CACHE_BYTES) -#endif + PERCPU_SECTION(L1_CACHE_BYTES) + ASSERT(__per_cpu_hot_end - __per_cpu_hot_start <= 64, "percpu cache hot data too large") RUNTIME_CONST_VARIABLES RUNTIME_CONST(ptr, USER_PTR_MAX) @@ -493,19 +475,6 @@ SECTIONS PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); #ifdef CONFIG_X86_64 -/* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ -#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load -INIT_PER_CPU(gdt_page); -INIT_PER_CPU(fixed_percpu_data); -INIT_PER_CPU(irq_stack_backing_store); - -#ifdef CONFIG_SMP -. = ASSERT((fixed_percpu_data == 0), - "fixed_percpu_data is not at start of per-cpu area"); -#endif #ifdef CONFIG_MITIGATION_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 633c87e2fd92..96677576c836 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -118,7 +118,7 @@ do_exception: #else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ - asm volatile("1: vmread %2, %1\n\t" + asm volatile("1: vmread %[field], %[output]\n\t" ".byte 0x3e\n\t" /* branch taken hint */ "ja 3f\n\t" @@ -127,24 +127,26 @@ do_exception: * @field, and bounce through the trampoline to preserve * volatile registers. */ - "xorl %k1, %k1\n\t" + "xorl %k[output], %k[output]\n\t" "2:\n\t" - "push %1\n\t" - "push %2\n\t" + "push %[output]\n\t" + "push %[field]\n\t" "call vmread_error_trampoline\n\t" /* * Unwind the stack. Note, the trampoline zeros out the * memory for @fault so that the result is '0' on error. */ - "pop %2\n\t" - "pop %1\n\t" + "pop %[field]\n\t" + "pop %[output]\n\t" "3:\n\t" /* VMREAD faulted. As above, except push '1' for @fault. */ - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %1) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[output]) - : ASM_CALL_CONSTRAINT, "=&r"(value) : "r"(field) : "cc"); + : ASM_CALL_CONSTRAINT, [output] "=&r" (value) + : [field] "r" (field) + : "cc"); return value; #endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 8a59c61624c2..64ccecedc9f8 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -56,7 +56,7 @@ ifeq ($(CONFIG_X86_32),y) lib-y += string_32.o lib-y += memmove_32.o lib-y += cmpxchg8b_emu.o -ifneq ($(CONFIG_X86_CMPXCHG64),y) +ifneq ($(CONFIG_X86_CX8),y) lib-y += atomic64_386_32.o endif else @@ -66,5 +66,6 @@ endif lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o copy_user_uncached_64.o - lib-y += cmpxchg16b_emu.o + lib-y += cmpxchg16b_emu.o + lib-y += bhi.o endif diff --git a/arch/x86/lib/bhi.S b/arch/x86/lib/bhi.S new file mode 100644 index 000000000000..58891681261b --- /dev/null +++ b/arch/x86/lib/bhi.S @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/unwind_hints.h> +#include <asm/nospec-branch.h> + +/* + * Notably, the FineIBT preamble calling these will have ZF set and r10 zero. + * + * The very last element is in fact larger than 32 bytes, but since its the + * last element, this does not matter, + * + * There are 2 #UD sites, located between 0,1-2,3 and 4,5-6,7 such that they + * can be reached using Jcc.d8, these elements (1 and 5) have sufficiently + * big alignment holes for this to not stagger the array. + */ + +.pushsection .noinstr.text, "ax" + + .align 32 +SYM_CODE_START(__bhi_args) + +#ifdef CONFIG_FINEIBT_BHI + + .align 32 +SYM_INNER_LABEL(__bhi_args_0, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_1 + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_1, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_1 + cmovne %r10, %rdi + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 8 + ANNOTATE_REACHABLE +.Lud_1: ud2 + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_2, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_1 + cmovne %r10, %rdi + cmovne %r10, %rsi + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_3, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_1 + cmovne %r10, %rdi + cmovne %r10, %rsi + cmovne %r10, %rdx + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_4, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_2 + cmovne %r10, %rdi + cmovne %r10, %rsi + cmovne %r10, %rdx + cmovne %r10, %rcx + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_5, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_2 + cmovne %r10, %rdi + cmovne %r10, %rsi + cmovne %r10, %rdx + cmovne %r10, %rcx + cmovne %r10, %r8 + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 8 + ANNOTATE_REACHABLE +.Lud_2: ud2 + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_6, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_2 + cmovne %r10, %rdi + cmovne %r10, %rsi + cmovne %r10, %rdx + cmovne %r10, %rcx + cmovne %r10, %r8 + cmovne %r10, %r9 + ANNOTATE_UNRET_SAFE + ret + int3 + + .align 32 +SYM_INNER_LABEL(__bhi_args_7, SYM_L_LOCAL) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + jne .Lud_2 + cmovne %r10, %rdi + cmovne %r10, %rsi + cmovne %r10, %rdx + cmovne %r10, %rcx + cmovne %r10, %r8 + cmovne %r10, %r9 + cmovne %r10, %rsp + ANNOTATE_UNRET_SAFE + ret + int3 + +#endif /* CONFIG_FINEIBT_BHI */ + + .align 32 +SYM_INNER_LABEL(__bhi_args_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + nop /* Work around toolchain+objtool quirk */ +SYM_CODE_END(__bhi_args) + +.popsection diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 2760a15fbc00..a508e4a8c66a 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> +#include <linux/objtool.h> #include <asm/asm.h> /* @@ -14,7 +16,7 @@ * Zero a page. * %rdi - page */ -SYM_FUNC_START(clear_page_rep) +SYM_TYPED_FUNC_START(clear_page_rep) movl $4096/8,%ecx xorl %eax,%eax rep stosq @@ -22,7 +24,7 @@ SYM_FUNC_START(clear_page_rep) SYM_FUNC_END(clear_page_rep) EXPORT_SYMBOL_GPL(clear_page_rep) -SYM_FUNC_START(clear_page_orig) +SYM_TYPED_FUNC_START(clear_page_orig) xorl %eax,%eax movl $4096/64,%ecx .p2align 4 @@ -44,7 +46,7 @@ SYM_FUNC_START(clear_page_orig) SYM_FUNC_END(clear_page_orig) EXPORT_SYMBOL_GPL(clear_page_orig) -SYM_FUNC_START(clear_page_erms) +SYM_TYPED_FUNC_START(clear_page_erms) movl $4096,%ecx xorl %eax,%eax rep stosb @@ -63,6 +65,7 @@ EXPORT_SYMBOL_GPL(clear_page_erms) * rcx: uncleared bytes or 0 if successful. */ SYM_FUNC_START(rep_stos_alternative) + ANNOTATE_NOENDBR cmpq $64,%rcx jae .Lunrolled diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 1c96be769adc..d4bb24347ff8 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -7,7 +7,7 @@ .text -#ifndef CONFIG_X86_CMPXCHG64 +#ifndef CONFIG_X86_CX8 /* * Emulate 'cmpxchg8b (%esi)' on UP diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index d6ae793d08fa..d8e87fedc20d 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -3,6 +3,7 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -13,7 +14,7 @@ * prefetch distance based on SMP/UP. */ ALIGN -SYM_FUNC_START(copy_page) +SYM_TYPED_FUNC_START(copy_page) ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index fc9fb5d06174..aa8c341b2441 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -8,6 +8,8 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> +#include <linux/objtool.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> #include <asm/asm.h> @@ -30,6 +32,7 @@ * it simpler for us, we can clobber rsi/rdi and rax freely. */ SYM_FUNC_START(rep_movs_alternative) + ANNOTATE_NOENDBR cmpq $64,%rcx jae .Llarge diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S index 2918e36eece2..18350b343c2a 100644 --- a/arch/x86/lib/copy_user_uncached_64.S +++ b/arch/x86/lib/copy_user_uncached_64.S @@ -5,6 +5,7 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/asm.h> /* @@ -27,6 +28,7 @@ * rax uncopied bytes or 0 if successful. */ SYM_FUNC_START(__copy_user_nocache) + ANNOTATE_NOENDBR /* If destination is not 7-byte aligned, we'll have to align it */ testb $7,%dil jne .Lalign diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 89ecd57c9d42..9d5654b8a72a 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -28,22 +28,20 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/page_types.h> #include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/asm.h> #include <asm/smap.h> +#include <asm/runtime-const.h> #define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC .macro check_range size:req .if IS_ENABLED(CONFIG_X86_64) - movq $0x0123456789abcdef,%rdx - 1: - .pushsection runtime_ptr_USER_PTR_MAX,"a" - .long 1b - 8 - . - .popsection + RUNTIME_CONST_PTR USER_PTR_MAX, rdx cmp %rdx, %rax cmova %rdx, %rax .else @@ -62,6 +60,7 @@ .text SYM_FUNC_START(__get_user_1) + ANNOTATE_NOENDBR check_range size=1 ASM_STAC UACCESS movzbl (%_ASM_AX),%edx @@ -72,6 +71,7 @@ SYM_FUNC_END(__get_user_1) EXPORT_SYMBOL(__get_user_1) SYM_FUNC_START(__get_user_2) + ANNOTATE_NOENDBR check_range size=2 ASM_STAC UACCESS movzwl (%_ASM_AX),%edx @@ -82,6 +82,7 @@ SYM_FUNC_END(__get_user_2) EXPORT_SYMBOL(__get_user_2) SYM_FUNC_START(__get_user_4) + ANNOTATE_NOENDBR check_range size=4 ASM_STAC UACCESS movl (%_ASM_AX),%edx @@ -92,6 +93,7 @@ SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) + ANNOTATE_NOENDBR #ifndef CONFIG_X86_64 xor %ecx,%ecx #endif @@ -111,6 +113,7 @@ EXPORT_SYMBOL(__get_user_8) /* .. and the same for __get_user, just without the range checks */ SYM_FUNC_START(__get_user_nocheck_1) + ANNOTATE_NOENDBR ASM_STAC ASM_BARRIER_NOSPEC UACCESS movzbl (%_ASM_AX),%edx @@ -121,6 +124,7 @@ SYM_FUNC_END(__get_user_nocheck_1) EXPORT_SYMBOL(__get_user_nocheck_1) SYM_FUNC_START(__get_user_nocheck_2) + ANNOTATE_NOENDBR ASM_STAC ASM_BARRIER_NOSPEC UACCESS movzwl (%_ASM_AX),%edx @@ -131,6 +135,7 @@ SYM_FUNC_END(__get_user_nocheck_2) EXPORT_SYMBOL(__get_user_nocheck_2) SYM_FUNC_START(__get_user_nocheck_4) + ANNOTATE_NOENDBR ASM_STAC ASM_BARRIER_NOSPEC UACCESS movl (%_ASM_AX),%edx @@ -141,6 +146,7 @@ SYM_FUNC_END(__get_user_nocheck_4) EXPORT_SYMBOL(__get_user_nocheck_4) SYM_FUNC_START(__get_user_nocheck_8) + ANNOTATE_NOENDBR ASM_STAC ASM_BARRIER_NOSPEC #ifdef CONFIG_X86_64 diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index 774bdf3e6f0a..edbeb3ecad38 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/asm.h> @@ -9,6 +10,7 @@ * %rdi: w */ SYM_FUNC_START(__sw_hweight32) + ANNOTATE_NOENDBR #ifdef CONFIG_X86_64 movl %edi, %eax # w @@ -42,6 +44,7 @@ EXPORT_SYMBOL(__sw_hweight32) */ #ifdef CONFIG_X86_64 SYM_FUNC_START(__sw_hweight64) + ANNOTATE_NOENDBR pushq %rdi pushq %rdx diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 1b60ae81ecd8..aa1f92ee6b2e 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -8,6 +8,7 @@ */ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -26,7 +27,7 @@ * Output: * rax: dest */ -SYM_FUNC_START(__memmove) +SYM_TYPED_FUNC_START(__memmove) mov %rdi, %rax diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 0199d56cb479..d66b710d628f 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index ebd259f31496..5ef8494896e8 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <linux/errno.h> +#include <linux/cfi_types.h> #include <asm/asm.h> #include <asm/msr.h> @@ -12,7 +13,7 @@ * */ .macro op_safe_regs op -SYM_FUNC_START(\op\()_safe_regs) +SYM_TYPED_FUNC_START(\op\()_safe_regs) pushq %rbx pushq %r12 movq %rdi, %r10 /* Save pointer */ diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 975c9c18263d..46d9e9b98a61 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -13,6 +13,7 @@ */ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/thread_info.h> #include <asm/errno.h> #include <asm/asm.h> @@ -45,6 +46,7 @@ .text SYM_FUNC_START(__put_user_1) + ANNOTATE_NOENDBR check_range size=1 ASM_STAC 1: movb %al,(%_ASM_CX) @@ -55,6 +57,7 @@ SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) SYM_FUNC_START(__put_user_nocheck_1) + ANNOTATE_NOENDBR ASM_STAC 2: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -64,6 +67,7 @@ SYM_FUNC_END(__put_user_nocheck_1) EXPORT_SYMBOL(__put_user_nocheck_1) SYM_FUNC_START(__put_user_2) + ANNOTATE_NOENDBR check_range size=2 ASM_STAC 3: movw %ax,(%_ASM_CX) @@ -74,6 +78,7 @@ SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) SYM_FUNC_START(__put_user_nocheck_2) + ANNOTATE_NOENDBR ASM_STAC 4: movw %ax,(%_ASM_CX) xor %ecx,%ecx @@ -83,6 +88,7 @@ SYM_FUNC_END(__put_user_nocheck_2) EXPORT_SYMBOL(__put_user_nocheck_2) SYM_FUNC_START(__put_user_4) + ANNOTATE_NOENDBR check_range size=4 ASM_STAC 5: movl %eax,(%_ASM_CX) @@ -93,6 +99,7 @@ SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) SYM_FUNC_START(__put_user_nocheck_4) + ANNOTATE_NOENDBR ASM_STAC 6: movl %eax,(%_ASM_CX) xor %ecx,%ecx @@ -102,6 +109,7 @@ SYM_FUNC_END(__put_user_nocheck_4) EXPORT_SYMBOL(__put_user_nocheck_4) SYM_FUNC_START(__put_user_8) + ANNOTATE_NOENDBR check_range size=8 ASM_STAC 7: mov %_ASM_AX,(%_ASM_CX) @@ -115,6 +123,7 @@ SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) SYM_FUNC_START(__put_user_nocheck_8) + ANNOTATE_NOENDBR ASM_STAC 9: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 391059b2c6fb..a26c43abd47d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -326,6 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret) #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) + ANNOTATE_NOENDBR ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) @@ -342,7 +343,7 @@ SYM_FUNC_START(call_depth_return_thunk) * case. */ CALL_THUNKS_DEBUG_INC_RETS - shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) + shlq $5, PER_CPU_VAR(__x86_call_depth) jz 1f ANNOTATE_UNRET_SAFE ret diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h index 60f4dcc5edc3..93cbc89b34e2 100644 --- a/arch/x86/math-emu/control_w.h +++ b/arch/x86/math-emu/control_w.h @@ -11,7 +11,7 @@ #ifndef _CONTROLW_H_ #define _CONTROLW_H_ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _Const_(x) $##x #else #define _Const_(x) x diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h index 75230b977577..59961d350bc4 100644 --- a/arch/x86/math-emu/exception.h +++ b/arch/x86/math-emu/exception.h @@ -10,7 +10,7 @@ #ifndef _EXCEPTION_H_ #define _EXCEPTION_H_ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define Const_(x) $##x #else #define Const_(x) x @@ -37,7 +37,7 @@ #define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) #define PRECISION_LOST_DOWN Const_(EX_Precision) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef DEBUG #define EXCEPTION(x) { printk("exception in %s at line %d\n", \ @@ -46,6 +46,6 @@ #define EXCEPTION(x) FPU_exception(x) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _EXCEPTION_H_ */ diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h index 0c122226ca56..def569c50b76 100644 --- a/arch/x86/math-emu/fpu_emu.h +++ b/arch/x86/math-emu/fpu_emu.h @@ -20,7 +20,7 @@ */ #define PECULIAR_486 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include "fpu_asm.h" #define Const(x) $##x #else @@ -68,7 +68,7 @@ #define FPU_Exception Const(0x80000000) /* Added to tag returns. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include "fpu_system.h" @@ -213,6 +213,6 @@ asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, #include "fpu_proto.h" #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _FPU_EMU_H_ */ diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h index b77bafec9526..f642957330ef 100644 --- a/arch/x86/math-emu/status_w.h +++ b/arch/x86/math-emu/status_w.h @@ -13,7 +13,7 @@ #include "fpu_emu.h" /* for definition of PECULIAR_486 */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define Const__(x) $##x #else #define Const__(x) x @@ -37,7 +37,7 @@ #define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define COMP_A_gt_B 1 #define COMP_A_eq_B 2 @@ -63,6 +63,6 @@ static inline void setcc(int cc) # define clear_C1() #endif /* PECULIAR_486 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _STATUS_H_ */ diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 5ab7bd2f1983..bd5d101c5c37 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -101,9 +101,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, pmd_t *pmd; bool use_gbpage; - next = (addr & PUD_MASK) + PUD_SIZE; - if (next > end) - next = end; + next = pud_addr_end(addr, end); /* if this is already a gbpage, this portion is already mapped */ if (pud_leaf(*pud)) @@ -154,10 +152,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, p4d_t *p4d = p4d_page + p4d_index(addr); pud_t *pud; - next = (addr & P4D_MASK) + P4D_SIZE; - if (next > end) - next = end; - + next = p4d_addr_end(addr, end); if (p4d_present(*p4d)) { pud = pud_offset(p4d, 0); result = ident_pud_init(info, pud, addr, next); @@ -199,10 +194,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, pgd_t *pgd = pgd_page + pgd_index(addr); p4d_t *p4d; - next = (addr & PGDIR_MASK) + PGDIR_SIZE; - if (next > end) - next = end; - + next = pgd_addr_end(addr, end); if (pgd_present(*pgd)) { p4d = p4d_offset(pgd, 0); result = ident_p4d_init(info, p4d, addr, next); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 62aa4d66a032..bfa444a7dbb0 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -645,8 +645,13 @@ static void __init memory_map_top_down(unsigned long map_start, */ addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start, map_end); - memblock_phys_free(addr, PMD_SIZE); - real_end = addr + PMD_SIZE; + if (!addr) { + pr_warn("Failed to release memory for alloc_low_pages()"); + real_end = max(map_start, ALIGN_DOWN(map_end, PMD_SIZE)); + } else { + memblock_phys_free(addr, PMD_SIZE); + real_end = addr + PMD_SIZE; + } /* step_size need to be small so pgt_buf from BRK could cover it */ step_size = PMD_SIZE; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ac41b1e0940d..f288aad8dc74 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -582,7 +582,7 @@ static void __init lowmem_pfn_init(void) "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" #define MSG_HIGHMEM_TRIMMED \ - "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" + "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n" /* * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: @@ -606,18 +606,13 @@ static void __init highmem_pfn_init(void) #ifndef CONFIG_HIGHMEM /* Maximum memory usable is what is directly addressable */ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); max_pfn = MAXMEM_PFN; #else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_HIGHMEM64G if (max_pfn > MAX_NONPAE_PFN) { max_pfn = MAX_NONPAE_PFN; printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); } -#endif /* !CONFIG_HIGHMEM64G */ #endif /* !CONFIG_HIGHMEM */ } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 38ff7791a9c7..42c90b420773 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -503,6 +503,14 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags) +{ + if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return (void __force *)ioremap_cache(phys_addr, size); + + return (void __force *)ioremap_encrypted(phys_addr, size); +} + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 11a93542d198..3c306de52fd4 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -113,8 +113,14 @@ void __init kernel_randomize_memory(void) memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) + CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; - /* Adapt physical memory region size based on available memory */ - if (memory_tb < kaslr_regions[0].size_tb) + /* + * Adapt physical memory region size based on available memory, + * except when CONFIG_PCI_P2PDMA is enabled. P2PDMA exposes the + * device BAR space assuming the direct map space is large enough + * for creating a ZONE_DEVICE mapping in the direct map corresponding + * to the physical BAR address. + */ + if (!IS_ENABLED(CONFIG_PCI_P2PDMA) && (memory_tb < kaslr_regions[0].size_tb)) kaslr_regions[0].size_tb = memory_tb; /* diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index e25288ee33c2..f8a33b25ae86 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -72,6 +72,7 @@ SYM_FUNC_START(sme_encrypt_execute) SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) + ANNOTATE_NOENDBR /* * Routine used to encrypt memory in place. * This routine must be run outside of the kernel proper since diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index b8a6ffffb451..5ed2109211da 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -84,7 +84,6 @@ static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, { unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; - unsigned long gap_min, gap_max; /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) @@ -94,13 +93,7 @@ static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, * Top of mmap area (just below the process stack). * Leave an at least ~128 MB hole with possible stack randomization. */ - gap_min = SIZE_128M; - gap_max = (task_size / 6) * 5; - - if (gap < gap_min) - gap = gap_min; - else if (gap > gap_max) - gap = gap_max; + gap = clamp(gap, SIZE_128M, (task_size / 6) * 5); return PAGE_ALIGN(task_size - gap - rnd); } diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 3d2f7f0a6ed1..ad3c1feec990 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -183,7 +183,7 @@ static int pageattr_test(void) break; case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1); break; case 2: diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index feb8cc6a12bf..e40861c9cb90 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -43,6 +43,7 @@ #include <linux/fs.h> #include <linux/rbtree.h> +#include <asm/cpu_device_id.h> #include <asm/cacheflush.h> #include <asm/cacheinfo.h> #include <asm/processor.h> @@ -290,9 +291,8 @@ void __init pat_bp_init(void) return; } - if ((c->x86_vendor == X86_VENDOR_INTEL) && - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + if ((c->x86_vfm >= INTEL_PENTIUM_PRO && c->x86_vfm <= INTEL_PENTIUM_M_DOTHAN) || + (c->x86_vfm >= INTEL_P4_WILLAMETTE && c->x86_vfm <= INTEL_P4_CEDARMILL)) { /* * PAT support with the lower four entries. Intel Pentium 2, * 3, M, and 4 are affected by PAT errata, which makes the diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index ef4514d64c05..72405d315b41 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -73,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ +#define CPA_COLLAPSE 16 /* try to collapse large pages */ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { @@ -105,6 +106,18 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } +static void collapse_page_count(int level) +{ + direct_pages_count[level]++; + if (system_state == SYSTEM_RUNNING) { + if (level == PG_LEVEL_2M) + count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE); + else if (level == PG_LEVEL_1G) + count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE); + } + direct_pages_count[level - 1] -= PTRS_PER_PTE; +} + void arch_report_meminfo(struct seq_file *m) { seq_printf(m, "DirectMap4k: %8lu kB\n", @@ -122,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m) } #else static inline void split_page_count(int level) { } +static inline void collapse_page_count(int level) { } #endif #ifdef CONFIG_X86_CPA_STATISTICS @@ -211,14 +225,14 @@ within(unsigned long addr, unsigned long start, unsigned long end) return addr >= start && addr < end; } +#ifdef CONFIG_X86_64 + static inline int within_inclusive(unsigned long addr, unsigned long start, unsigned long end) { return addr >= start && addr <= end; } -#ifdef CONFIG_X86_64 - /* * The kernel image is mapped into two places in the virtual address space * (addresses without KASLR, of course): @@ -394,16 +408,49 @@ static void __cpa_flush_tlb(void *data) flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } -static void cpa_flush(struct cpa_data *data, int cache) +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); + +static void cpa_collapse_large_pages(struct cpa_data *cpa) +{ + unsigned long start, addr, end; + struct ptdesc *ptdesc, *tmp; + LIST_HEAD(pgtables); + int collapsed = 0; + int i; + + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + for (i = 0; i < cpa->numpages; i++) + collapsed += collapse_large_pages(__cpa_addr(cpa, i), + &pgtables); + } else { + addr = __cpa_addr(cpa, 0); + start = addr & PMD_MASK; + end = addr + PAGE_SIZE * cpa->numpages; + + for (addr = start; within(addr, start, end); addr += PMD_SIZE) + collapsed += collapse_large_pages(addr, &pgtables); + } + + if (!collapsed) + return; + + flush_tlb_all(); + + list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) { + list_del(&ptdesc->pt_list); + __free_page(ptdesc_page(ptdesc)); + } +} + +static void cpa_flush(struct cpa_data *cpa, int cache) { - struct cpa_data *cpa = data; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { cpa_flush_all(cache); - return; + goto collapse_large_pages; } if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) @@ -412,7 +459,7 @@ static void cpa_flush(struct cpa_data *data, int cache) on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) - return; + goto collapse_large_pages; mb(); for (i = 0; i < cpa->numpages; i++) { @@ -428,6 +475,10 @@ static void cpa_flush(struct cpa_data *data, int cache) clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); + +collapse_large_pages: + if (cpa->flags & CPA_COLLAPSE) + cpa_collapse_large_pages(cpa); } static bool overlaps(unsigned long r1_start, unsigned long r1_end, @@ -1197,6 +1248,161 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } +static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, + struct list_head *pgtables) +{ + pmd_t _pmd, old_pmd; + pte_t *pte, first; + unsigned long pfn; + pgprot_t pgprot; + int i = 0; + + addr &= PMD_MASK; + pte = pte_offset_kernel(pmd, addr); + first = *pte; + pfn = pte_pfn(first); + + /* Make sure alignment is suitable */ + if (PFN_PHYS(pfn) & ~PMD_MASK) + return 0; + + /* The page is 4k intentionally */ + if (pte_flags(first) & _PAGE_KERNEL_4K) + return 0; + + /* Check that the rest of PTEs are compatible with the first one */ + for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) { + pte_t entry = *pte; + + if (!pte_present(entry)) + return 0; + if (pte_flags(entry) != pte_flags(first)) + return 0; + if (pte_pfn(entry) != pte_pfn(first) + i) + return 0; + } + + old_pmd = *pmd; + + /* Success: set up a large page */ + pgprot = pgprot_4k_2_large(pte_pgprot(first)); + pgprot_val(pgprot) |= _PAGE_PSE; + _pmd = pfn_pmd(pfn, pgprot); + set_pmd(pmd, _pmd); + + /* Queue the page table to be freed after TLB flush */ + list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables); + + if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) { + struct page *page; + + /* Update all PGD tables to use the same large page */ + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd = pmd_offset(pud, addr); + /* Something is wrong if entries doesn't match */ + if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd))) + continue; + set_pmd(pmd, _pmd); + } + } + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_2M); + + return 1; +} + +static int collapse_pud_page(pud_t *pud, unsigned long addr, + struct list_head *pgtables) +{ + unsigned long pfn; + pmd_t *pmd, first; + int i; + + if (!direct_gbpages) + return 0; + + addr &= PUD_MASK; + pmd = pmd_offset(pud, addr); + first = *pmd; + + /* + * To restore PUD page all PMD entries must be large and + * have suitable alignment + */ + pfn = pmd_pfn(first); + if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK)) + return 0; + + /* + * To restore PUD page, all following PMDs must be compatible with the + * first one. + */ + for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) { + pmd_t entry = *pmd; + + if (!pmd_present(entry) || !pmd_leaf(entry)) + return 0; + if (pmd_flags(entry) != pmd_flags(first)) + return 0; + if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE) + return 0; + } + + /* Restore PUD page and queue page table to be freed after TLB flush */ + list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables); + set_pud(pud, pfn_pud(pfn, pmd_pgprot(first))); + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_1G); + + return 1; +} + +/* + * Collapse PMD and PUD pages in the kernel mapping around the address where + * possible. + * + * Caller must flush TLB and free page tables queued on the list before + * touching the new entries. CPU must not see TLB entries of different size + * with different attributes. + */ +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables) +{ + int collapsed = 0; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + addr &= PMD_MASK; + + spin_lock(&pgd_lock); + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + goto out; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + goto out; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud) || pud_leaf(*pud)) + goto out; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) + goto out; + + collapsed = collapse_pmd_page(pmd, addr, pgtables); + if (collapsed) + collapsed += collapse_pud_page(pud, addr, pgtables); + +out: + spin_unlock(&pgd_lock); + return collapsed; +} + static bool try_to_free_pte_page(pte_t *pte) { int i; @@ -1942,19 +2148,6 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } -/* - * __set_memory_prot is an internal helper for callers that have been passed - * a pgprot_t value from upper layers and a reservation has already been taken. - * If you want to set the pgprot to a specific page protocol, use the - * set_memory_xx() functions. - */ -int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) -{ - return change_page_attr_set_clr(&addr, numpages, prot, - __pgprot(~pgprot_val(prot)), 0, 0, - NULL); -} - int _set_memory_uc(unsigned long addr, int numpages) { /* @@ -2120,7 +2313,8 @@ int set_memory_rox(unsigned long addr, int numpages) if (__supported_pte_mask & _PAGE_NX) clr.pgprot |= _PAGE_NX; - return change_page_attr_clear(&addr, numpages, clr, 0); + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0, + CPA_COLLAPSE, NULL); } int set_memory_rw(unsigned long addr, int numpages) @@ -2147,7 +2341,8 @@ int set_memory_p(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + return change_page_attr_set_clr(&addr, numpages, + __pgprot(_PAGE_KERNEL_4K), __pgprot(0), 1, 0, NULL); } @@ -2420,7 +2615,7 @@ static int __set_pages_np(struct page *page, int numpages) .pgd = NULL, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY), .flags = CPA_NO_CHECK_ALIAS }; /* @@ -2507,7 +2702,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW|_PAGE_DIRTY)), .flags = CPA_NO_CHECK_ALIAS, }; @@ -2550,7 +2745,7 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY), .flags = CPA_NO_CHECK_ALIAS, }; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1fef5ad32d5a..cec321fb74f2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -12,59 +12,15 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; EXPORT_SYMBOL(physical_mask); #endif -#ifdef CONFIG_HIGHPTE -#define PGTABLE_HIGHMEM __GFP_HIGHMEM -#else -#define PGTABLE_HIGHMEM 0 -#endif - -#ifndef CONFIG_PARAVIRT -#ifndef CONFIG_PT_RECLAIM -static inline -void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct ptdesc *ptdesc = (struct ptdesc *)table; - - pagetable_dtor(ptdesc); - tlb_remove_page(tlb, ptdesc_page(ptdesc)); -} -#else -static inline -void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - tlb_remove_table(tlb, table); -} -#endif /* !CONFIG_PT_RECLAIM */ -#endif /* !CONFIG_PARAVIRT */ - -gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; - pgtable_t pte_alloc_one(struct mm_struct *mm) { - return __pte_alloc_one(mm, __userpte_alloc_gfp); -} - -static int __init setup_userpte(char *arg) -{ - if (!arg) - return -EINVAL; - - /* - * "userpte=nohigh" disables allocation of user pagetables in - * high memory. - */ - if (strcmp(arg, "nohigh") == 0) - __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - else - return -EINVAL; - return 0; + return __pte_alloc_one(mm, GFP_PGTABLE_USER); } -early_param("userpte", setup_userpte); void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { paravirt_release_pte(page_to_pfn(pte)); - paravirt_tlb_remove_table(tlb, page_ptdesc(pte)); + tlb_remove_table(tlb, page_ptdesc(pte)); } #if CONFIG_PGTABLE_LEVELS > 2 @@ -78,21 +34,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd)); + tlb_remove_table(tlb, virt_to_ptdesc(pmd)); } #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud)); + tlb_remove_table(tlb, virt_to_ptdesc(pud)); } #if CONFIG_PGTABLE_LEVELS > 4 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) { paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); - paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d)); + tlb_remove_table(tlb, virt_to_ptdesc(p4d)); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6cf881a942bb..0925768d00cb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -74,13 +74,15 @@ * use different names for each of them: * * ASID - [0, TLB_NR_DYN_ASIDS-1] - * the canonical identifier for an mm + * the canonical identifier for an mm, dynamically allocated on each CPU + * [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1] + * the canonical, global identifier for an mm, identical across all CPUs * - * kPCID - [1, TLB_NR_DYN_ASIDS] + * kPCID - [1, MAX_ASID_AVAILABLE] * the value we write into the PCID part of CR3; corresponds to the * ASID+1, because PCID 0 is special. * - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] + * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE] * for KPTI each mm has two address spaces and thus needs two * PCID values, but we can still do with a single ASID denomination * for each mm. Corresponds to kPCID + 2048. @@ -225,6 +227,20 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + /* + * TLB consistency for global ASIDs is maintained with hardware assisted + * remote TLB flushing. Global ASIDs are always up to date. + */ + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + u16 global_asid = mm_global_asid(next); + + if (global_asid) { + *new_asid = global_asid; + *need_flush = false; + return; + } + } + if (this_cpu_read(cpu_tlbstate.invalidate_other)) clear_asid_other(); @@ -252,6 +268,268 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, } /* + * Global ASIDs are allocated for multi-threaded processes that are + * active on multiple CPUs simultaneously, giving each of those + * processes the same PCID on every CPU, for use with hardware-assisted + * TLB shootdown on remote CPUs, like AMD INVLPGB or Intel RAR. + * + * These global ASIDs are held for the lifetime of the process. + */ +static DEFINE_RAW_SPINLOCK(global_asid_lock); +static u16 last_global_asid = MAX_ASID_AVAILABLE; +static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE); +static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE); +static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1; + +/* + * When the search for a free ASID in the global ASID space reaches + * MAX_ASID_AVAILABLE, a global TLB flush guarantees that previously + * freed global ASIDs are safe to re-use. + * + * This way the global flush only needs to happen at ASID rollover + * time, and not at ASID allocation time. + */ +static void reset_global_asid_space(void) +{ + lockdep_assert_held(&global_asid_lock); + + invlpgb_flush_all_nonglobals(); + + /* + * The TLB flush above makes it safe to re-use the previously + * freed global ASIDs. + */ + bitmap_andnot(global_asid_used, global_asid_used, + global_asid_freed, MAX_ASID_AVAILABLE); + bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE); + + /* Restart the search from the start of global ASID space. */ + last_global_asid = TLB_NR_DYN_ASIDS; +} + +static u16 allocate_global_asid(void) +{ + u16 asid; + + lockdep_assert_held(&global_asid_lock); + + /* The previous allocation hit the edge of available address space */ + if (last_global_asid >= MAX_ASID_AVAILABLE - 1) + reset_global_asid_space(); + + asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid); + + if (asid >= MAX_ASID_AVAILABLE && !global_asid_available) { + /* This should never happen. */ + VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n", + global_asid_available); + return 0; + } + + /* Claim this global ASID. */ + __set_bit(asid, global_asid_used); + last_global_asid = asid; + global_asid_available--; + return asid; +} + +/* + * Check whether a process is currently active on more than @threshold CPUs. + * This is a cheap estimation on whether or not it may make sense to assign + * a global ASID to this process, and use broadcast TLB invalidation. + */ +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) +{ + int count = 0; + int cpu; + + /* This quick check should eliminate most single threaded programs. */ + if (cpumask_weight(mm_cpumask(mm)) <= threshold) + return false; + + /* Slower check to make sure. */ + for_each_cpu(cpu, mm_cpumask(mm)) { + /* Skip the CPUs that aren't really running this process. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) + continue; + + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + continue; + + if (++count > threshold) + return true; + } + return false; +} + +/* + * Assign a global ASID to the current process, protecting against + * races between multiple threads in the process. + */ +static void use_global_asid(struct mm_struct *mm) +{ + u16 asid; + + guard(raw_spinlock_irqsave)(&global_asid_lock); + + /* This process is already using broadcast TLB invalidation. */ + if (mm_global_asid(mm)) + return; + + /* + * The last global ASID was consumed while waiting for the lock. + * + * If this fires, a more aggressive ASID reuse scheme might be + * needed. + */ + if (!global_asid_available) { + VM_WARN_ONCE(1, "Ran out of global ASIDs\n"); + return; + } + + asid = allocate_global_asid(); + if (!asid) + return; + + mm_assign_global_asid(mm, asid); +} + +void mm_free_global_asid(struct mm_struct *mm) +{ + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return; + + if (!mm_global_asid(mm)) + return; + + guard(raw_spinlock_irqsave)(&global_asid_lock); + + /* The global ASID can be re-used only after flush at wrap-around. */ +#ifdef CONFIG_BROADCAST_TLB_FLUSH + __set_bit(mm->context.global_asid, global_asid_freed); + + mm->context.global_asid = 0; + global_asid_available++; +#endif +} + +/* + * Is the mm transitioning from a CPU-local ASID to a global ASID? + */ +static bool mm_needs_global_asid(struct mm_struct *mm, u16 asid) +{ + u16 global_asid = mm_global_asid(mm); + + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return false; + + /* Process is transitioning to a global ASID */ + if (global_asid && asid != global_asid) + return true; + + return false; +} + +/* + * x86 has 4k ASIDs (2k when compiled with KPTI), but the largest x86 + * systems have over 8k CPUs. Because of this potential ASID shortage, + * global ASIDs are handed out to processes that have frequent TLB + * flushes and are active on 4 or more CPUs simultaneously. + */ +static void consider_global_asid(struct mm_struct *mm) +{ + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return; + + /* Check every once in a while. */ + if ((current->pid & 0x1f) != (jiffies & 0x1f)) + return; + + /* + * Assign a global ASID if the process is active on + * 4 or more CPUs simultaneously. + */ + if (mm_active_cpus_exceeds(mm, 3)) + use_global_asid(mm); +} + +static void finish_asid_transition(struct flush_tlb_info *info) +{ + struct mm_struct *mm = info->mm; + int bc_asid = mm_global_asid(mm); + int cpu; + + if (!mm_in_asid_transition(mm)) + return; + + for_each_cpu(cpu, mm_cpumask(mm)) { + /* + * The remote CPU is context switching. Wait for that to + * finish, to catch the unlikely case of it switching to + * the target mm with an out of date ASID. + */ + while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING) + cpu_relax(); + + if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) + continue; + + /* + * If at least one CPU is not using the global ASID yet, + * send a TLB flush IPI. The IPI should cause stragglers + * to transition soon. + * + * This can race with the CPU switching to another task; + * that results in a (harmless) extra IPI. + */ + if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) { + flush_tlb_multi(mm_cpumask(info->mm), info); + return; + } + } + + /* All the CPUs running this process are using the global ASID. */ + mm_clear_asid_transition(mm); +} + +static void broadcast_tlb_flush(struct flush_tlb_info *info) +{ + bool pmd = info->stride_shift == PMD_SHIFT; + unsigned long asid = mm_global_asid(info->mm); + unsigned long addr = info->start; + + /* + * TLB flushes with INVLPGB are kicked off asynchronously. + * The inc_mm_tlb_gen() guarantees page table updates are done + * before these TLB flushes happen. + */ + if (info->end == TLB_FLUSH_ALL) { + invlpgb_flush_single_pcid_nosync(kern_pcid(asid)); + /* Do any CPUs supporting INVLPGB need PTI? */ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + invlpgb_flush_single_pcid_nosync(user_pcid(asid)); + } else do { + unsigned long nr = 1; + + if (info->stride_shift <= PMD_SHIFT) { + nr = (info->end - addr) >> info->stride_shift; + nr = clamp_val(nr, 1, invlpgb_count_max); + } + + invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd); + if (cpu_feature_enabled(X86_FEATURE_PTI)) + invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd); + + addr += nr << info->stride_shift; + } while (addr < info->end); + + finish_asid_transition(info); + + /* Wait for the INVLPGBs kicked off above to finish. */ + __tlbsync(); +} + +/* * Given an ASID, flush the corresponding user ASID. We can delay this * until the next time we switch to it. * @@ -556,7 +834,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, */ if (prev == next) { /* Not actually switching mm's */ - VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != + VM_WARN_ON(is_dyn_asid(prev_asid) && + this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); /* @@ -573,6 +852,20 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, !cpumask_test_cpu(cpu, mm_cpumask(next)))) cpumask_set_cpu(cpu, mm_cpumask(next)); + /* Check if the current mm is transitioning to a global ASID */ + if (mm_needs_global_asid(next, prev_asid)) { + next_tlb_gen = atomic64_read(&next->context.tlb_gen); + choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); + goto reload_tlb; + } + + /* + * Broadcast TLB invalidation keeps this ASID up to date + * all the time. + */ + if (is_global_asid(prev_asid)) + return; + /* * If the CPU is not in lazy TLB mode, we are just switching * from one thread in a process to another thread in the same @@ -607,6 +900,13 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* + * Let nmi_uaccess_okay() and finish_asid_transition() + * know that CR3 is changing. + */ + this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); + barrier(); + + /* * Leave this CPU in prev's mm_cpumask. Atomic writes to * mm_cpumask can be expensive under contention. The CPU * will be removed lazily at TLB flush time. @@ -620,14 +920,12 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, next_tlb_gen = atomic64_read(&next->context.tlb_gen); choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); - - /* Let nmi_uaccess_okay() know that we're changing CR3. */ - this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); } +reload_tlb: new_lam = mm_lam_cr3_mask(next); if (need_flush) { + VM_WARN_ON_ONCE(is_global_asid(new_asid)); this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, new_lam, true); @@ -746,7 +1044,7 @@ static void flush_tlb_func(void *info) const struct flush_tlb_info *f = info; struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); - u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); + u64 local_tlb_gen; bool local = smp_processor_id() == f->initiating_cpu; unsigned long nr_invalidate = 0; u64 mm_tlb_gen; @@ -769,6 +1067,16 @@ static void flush_tlb_func(void *info) if (unlikely(loaded_mm == &init_mm)) return; + /* Reload the ASID if transitioning into or out of a global ASID */ + if (mm_needs_global_asid(loaded_mm, loaded_mm_asid)) { + switch_mm_irqs_off(NULL, loaded_mm, NULL); + loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + } + + /* Broadcast ASIDs are always kept up to date with INVLPGB. */ + if (is_global_asid(loaded_mm_asid)) + return; + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); @@ -786,6 +1094,8 @@ static void flush_tlb_func(void *info) return; } + local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); + if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && f->new_tlb_gen <= local_tlb_gen)) { /* @@ -953,7 +1263,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ - if (info->freed_tables) + if (info->freed_tables || mm_in_asid_transition(info->mm)) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, @@ -1000,6 +1310,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1); #endif + /* + * If the number of flushes is so large that a full flush + * would be faster, do a full flush. + */ + if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { + start = 0; + end = TLB_FLUSH_ALL; + } + info->start = start; info->end = end; info->mm = mm; @@ -1026,17 +1345,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, bool freed_tables) { struct flush_tlb_info *info; + int cpu = get_cpu(); u64 new_tlb_gen; - int cpu; - - cpu = get_cpu(); - - /* Should we flush just the requested range? */ - if ((end == TLB_FLUSH_ALL) || - ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { - start = 0; - end = TLB_FLUSH_ALL; - } /* This is also a barrier that synchronizes with switch_mm(). */ new_tlb_gen = inc_mm_tlb_gen(mm); @@ -1049,9 +1359,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + if (mm_global_asid(mm)) { + broadcast_tlb_flush(info); + } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); + consider_global_asid(mm); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); local_irq_disable(); @@ -1064,7 +1377,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } - static void do_flush_tlb_all(void *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); @@ -1074,7 +1386,32 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); - on_each_cpu(do_flush_tlb_all, NULL, 1); + + /* First try (faster) hardware-assisted TLB invalidation. */ + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) + invlpgb_flush_all(); + else + /* Fall back to the IPI-based invalidation. */ + on_each_cpu(do_flush_tlb_all, NULL, 1); +} + +/* Flush an arbitrarily large range of memory with INVLPGB. */ +static void invlpgb_kernel_range_flush(struct flush_tlb_info *info) +{ + unsigned long addr, nr; + + for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { + nr = (info->end - addr) >> PAGE_SHIFT; + + /* + * INVLPGB has a limit on the size of ranges it can + * flush. Break up large flushes. + */ + nr = clamp_val(nr, 1, invlpgb_count_max); + + invlpgb_flush_addr_nosync(addr, nr); + } + __tlbsync(); } static void do_kernel_range_flush(void *info) @@ -1087,24 +1424,37 @@ static void do_kernel_range_flush(void *info) flush_tlb_one_kernel(addr); } -void flush_tlb_kernel_range(unsigned long start, unsigned long end) +static void kernel_tlb_flush_all(struct flush_tlb_info *info) { - /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) + invlpgb_flush_all(); + else on_each_cpu(do_flush_tlb_all, NULL, 1); - } else { - struct flush_tlb_info *info; - - preempt_disable(); - info = get_flush_tlb_info(NULL, start, end, 0, false, - TLB_GENERATION_INVALID); +} +static void kernel_tlb_flush_range(struct flush_tlb_info *info) +{ + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) + invlpgb_kernel_range_flush(info); + else on_each_cpu(do_kernel_range_flush, info, 1); +} - put_flush_tlb_info(); - preempt_enable(); - } +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct flush_tlb_info *info; + + guard(preempt)(); + + info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, + TLB_GENERATION_INVALID); + + if (info->end == TLB_FLUSH_ALL) + kernel_tlb_flush_all(info); + else + kernel_tlb_flush_range(info); + + put_flush_tlb_info(); } /* @@ -1283,7 +1633,10 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * a local TLB flush is needed. Optimize this use-case by calling * flush_tlb_func_local() directly in this case. */ - if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { + if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) { + invlpgb_flush_all_nonglobals(); + batch->unmapped_pages = false; + } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { flush_tlb_multi(&batch->cpumask, info); } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { lockdep_assert_irqs_enabled(); @@ -1325,7 +1678,7 @@ bool nmi_uaccess_okay(void) if (loaded_mm != current_mm) return false; - VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); + VM_WARN_ON_ONCE(__pa(current_mm->pgd) != read_cr3_pa()); return true; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a43fc5af973d..72776dcb75aa 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -410,16 +410,20 @@ static void emit_nops(u8 **pprog, int len) * Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT * in arch/x86/kernel/alternative.c */ +static int emit_call(u8 **prog, void *func, void *ip); -static void emit_fineibt(u8 **pprog, u32 hash) +static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity) { u8 *prog = *pprog; EMIT_ENDBR(); EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */ - EMIT2(0x74, 0x07); /* jz.d8 +7 */ - EMIT2(0x0f, 0x0b); /* ud2 */ - EMIT1(0x90); /* nop */ + if (cfi_bhi) { + emit_call(&prog, __bhi_args[arity], ip + 11); + } else { + EMIT2(0x75, 0xf9); /* jne.d8 .-7 */ + EMIT3(0x0f, 0x1f, 0x00); /* nop3 */ + } EMIT_ENDBR_POISON(); *pprog = prog; @@ -448,13 +452,13 @@ static void emit_kcfi(u8 **pprog, u32 hash) *pprog = prog; } -static void emit_cfi(u8 **pprog, u32 hash) +static void emit_cfi(u8 **pprog, u8 *ip, u32 hash, int arity) { u8 *prog = *pprog; switch (cfi_mode) { case CFI_FINEIBT: - emit_fineibt(&prog, hash); + emit_fineibt(&prog, ip, hash, arity); break; case CFI_KCFI: @@ -505,13 +509,17 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes * while jumping to another program */ -static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, +static void emit_prologue(u8 **pprog, u8 *ip, u32 stack_depth, bool ebpf_from_cbpf, bool tail_call_reachable, bool is_subprog, bool is_exception_cb) { u8 *prog = *pprog; - emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash); + if (is_subprog) { + emit_cfi(&prog, ip, cfi_bpf_subprog_hash, 5); + } else { + emit_cfi(&prog, ip, cfi_bpf_hash, 1); + } /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ @@ -641,7 +649,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, * See emit_prologue(), for IBT builds the trampoline hook is preceded * with an ENDBR instruction. */ - if (is_endbr(*(u32 *)ip)) + if (is_endbr(ip)) ip += ENDBR_INSN_SIZE; return __bpf_arch_text_poke(ip, t, old_addr, new_addr); @@ -1480,7 +1488,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image detect_reg_usage(insn, insn_cnt, callee_regs_used); - emit_prologue(&prog, stack_depth, + emit_prologue(&prog, image, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); /* Exception callback will clobber callee regs for its own use, and @@ -3036,7 +3044,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ - if (is_endbr(*(u32 *)orig_call)) + if (is_endbr(orig_call)) orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; } @@ -3047,7 +3055,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* * Indirect call for bpf_struct_ops */ - emit_cfi(&prog, cfi_get_func_hash(func_addr)); + emit_cfi(&prog, image, + cfi_get_func_hash(func_addr), + cfi_get_func_arity(func_addr)); } else { /* * Direct-call fentry stub, as such it needs accounting for the diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 48bcada5cabe..4933fb337983 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -12,8 +12,6 @@ obj-$(CONFIG_X86_INTEL_CE) += ce4100.o obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o -obj-$(CONFIG_STA2X11) += sta2x11-fixup.o - obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c deleted file mode 100644 index 8c8ddc4dcc08..000000000000 --- a/arch/x86/pci/sta2x11-fixup.c +++ /dev/null @@ -1,233 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping - * - * ST Microelectronics ConneXt (STA2X11/STA2X10) - * - * Copyright (c) 2010-2011 Wind River Systems, Inc. - */ - -#include <linux/pci.h> -#include <linux/pci_ids.h> -#include <linux/export.h> -#include <linux/list.h> -#include <linux/dma-map-ops.h> -#include <linux/swiotlb.h> -#include <asm/iommu.h> -#include <asm/sta2x11.h> - -#define STA2X11_SWIOTLB_SIZE (4*1024*1024) - -/* - * We build a list of bus numbers that are under the ConneXt. The - * main bridge hosts 4 busses, which are the 4 endpoints, in order. - */ -#define STA2X11_NR_EP 4 /* 0..3 included */ -#define STA2X11_NR_FUNCS 8 /* 0..7 included */ -#define STA2X11_AMBA_SIZE (512 << 20) - -struct sta2x11_ahb_regs { /* saved during suspend */ - u32 base, pexlbase, pexhbase, crw; -}; - -struct sta2x11_mapping { - int is_suspended; - struct sta2x11_ahb_regs regs[STA2X11_NR_FUNCS]; -}; - -struct sta2x11_instance { - struct list_head list; - int bus0; - struct sta2x11_mapping map[STA2X11_NR_EP]; -}; - -static LIST_HEAD(sta2x11_instance_list); - -/* At probe time, record new instances of this bridge (likely one only) */ -static void sta2x11_new_instance(struct pci_dev *pdev) -{ - struct sta2x11_instance *instance; - - instance = kzalloc(sizeof(*instance), GFP_ATOMIC); - if (!instance) - return; - /* This has a subordinate bridge, with 4 more-subordinate ones */ - instance->bus0 = pdev->subordinate->number + 1; - - if (list_empty(&sta2x11_instance_list)) { - int size = STA2X11_SWIOTLB_SIZE; - /* First instance: register your own swiotlb area */ - dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size); - if (swiotlb_init_late(size, GFP_DMA, NULL)) - dev_emerg(&pdev->dev, "init swiotlb failed\n"); - } - list_add(&instance->list, &sta2x11_instance_list); -} -DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, 0xcc17, sta2x11_new_instance); - -/* - * Utility functions used in this file from below - */ -static struct sta2x11_instance *sta2x11_pdev_to_instance(struct pci_dev *pdev) -{ - struct sta2x11_instance *instance; - int ep; - - list_for_each_entry(instance, &sta2x11_instance_list, list) { - ep = pdev->bus->number - instance->bus0; - if (ep >= 0 && ep < STA2X11_NR_EP) - return instance; - } - return NULL; -} - -static int sta2x11_pdev_to_ep(struct pci_dev *pdev) -{ - struct sta2x11_instance *instance; - - instance = sta2x11_pdev_to_instance(pdev); - if (!instance) - return -1; - - return pdev->bus->number - instance->bus0; -} - -/* This is exported, as some devices need to access the MFD registers */ -struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev) -{ - return sta2x11_pdev_to_instance(pdev); -} -EXPORT_SYMBOL(sta2x11_get_instance); - -/* At setup time, we use our own ops if the device is a ConneXt one */ -static void sta2x11_setup_pdev(struct pci_dev *pdev) -{ - struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); - - if (!instance) /* either a sta2x11 bridge or another ST device */ - return; - - /* We must enable all devices as master, for audio DMA to work */ - pci_set_master(pdev); -} -DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_setup_pdev); - -/* - * At boot we must set up the mappings for the pcie-to-amba bridge. - * It involves device access, and the same happens at suspend/resume time - */ - -#define AHB_MAPB 0xCA4 -#define AHB_CRW(i) (AHB_MAPB + 0 + (i) * 0x10) -#define AHB_CRW_SZMASK 0xfffffc00UL -#define AHB_CRW_ENABLE (1 << 0) -#define AHB_CRW_WTYPE_MEM (2 << 1) -#define AHB_CRW_ROE (1UL << 3) /* Relax Order Ena */ -#define AHB_CRW_NSE (1UL << 4) /* No Snoop Enable */ -#define AHB_BASE(i) (AHB_MAPB + 4 + (i) * 0x10) -#define AHB_PEXLBASE(i) (AHB_MAPB + 8 + (i) * 0x10) -#define AHB_PEXHBASE(i) (AHB_MAPB + 12 + (i) * 0x10) - -/* At probe time, enable mapping for each endpoint, using the pdev */ -static void sta2x11_map_ep(struct pci_dev *pdev) -{ - struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); - struct device *dev = &pdev->dev; - u32 amba_base, max_amba_addr; - int i, ret; - - if (!instance) - return; - - pci_read_config_dword(pdev, AHB_BASE(0), &amba_base); - max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1; - - ret = dma_direct_set_offset(dev, 0, amba_base, STA2X11_AMBA_SIZE); - if (ret) - dev_err(dev, "sta2x11: could not set DMA offset\n"); - - dev->bus_dma_limit = max_amba_addr; - dma_set_mask_and_coherent(&pdev->dev, max_amba_addr); - - /* Configure AHB mapping */ - pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0); - pci_write_config_dword(pdev, AHB_PEXHBASE(0), 0); - pci_write_config_dword(pdev, AHB_CRW(0), STA2X11_AMBA_SIZE | - AHB_CRW_WTYPE_MEM | AHB_CRW_ENABLE); - - /* Disable all the other windows */ - for (i = 1; i < STA2X11_NR_FUNCS; i++) - pci_write_config_dword(pdev, AHB_CRW(i), 0); - - dev_info(&pdev->dev, - "sta2x11: Map EP %i: AMBA address %#8x-%#8x\n", - sta2x11_pdev_to_ep(pdev), amba_base, max_amba_addr); -} -DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_map_ep); - -#ifdef CONFIG_PM /* Some register values must be saved and restored */ - -static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev) -{ - struct sta2x11_instance *instance; - int ep; - - instance = sta2x11_pdev_to_instance(pdev); - if (!instance) - return NULL; - ep = sta2x11_pdev_to_ep(pdev); - return instance->map + ep; -} - -static void suspend_mapping(struct pci_dev *pdev) -{ - struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); - int i; - - if (!map) - return; - - if (map->is_suspended) - return; - map->is_suspended = 1; - - /* Save all window configs */ - for (i = 0; i < STA2X11_NR_FUNCS; i++) { - struct sta2x11_ahb_regs *regs = map->regs + i; - - pci_read_config_dword(pdev, AHB_BASE(i), ®s->base); - pci_read_config_dword(pdev, AHB_PEXLBASE(i), ®s->pexlbase); - pci_read_config_dword(pdev, AHB_PEXHBASE(i), ®s->pexhbase); - pci_read_config_dword(pdev, AHB_CRW(i), ®s->crw); - } -} -DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, suspend_mapping); - -static void resume_mapping(struct pci_dev *pdev) -{ - struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); - int i; - - if (!map) - return; - - - if (!map->is_suspended) - goto out; - map->is_suspended = 0; - - /* Restore all window configs */ - for (i = 0; i < STA2X11_NR_FUNCS; i++) { - struct sta2x11_ahb_regs *regs = map->regs + i; - - pci_write_config_dword(pdev, AHB_BASE(i), regs->base); - pci_write_config_dword(pdev, AHB_PEXLBASE(i), regs->pexlbase); - pci_write_config_dword(pdev, AHB_PEXHBASE(i), regs->pexhbase); - pci_write_config_dword(pdev, AHB_CRW(i), regs->crw); - } -out: - pci_set_master(pdev); /* Like at boot, enable master on all devices */ -} -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, resume_mapping); - -#endif /* CONFIG_PM */ diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 4733a5f467b8..cfa18ec7d55f 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -173,10 +173,14 @@ SYM_CODE_START(pvh_start_xen) 1: UNWIND_HINT_END_OF_STACK - /* Set base address in stack canary descriptor. */ - mov $MSR_GS_BASE,%ecx - leal canary(%rip), %eax - xor %edx, %edx + /* + * Set up GSBASE. + * Note that on SMP the boot CPU uses the init data section until + * the per-CPU areas are set up. + */ + movl $MSR_GS_BASE,%ecx + xorl %eax, %eax + xorl %edx, %edx wrmsr /* Call xen_prepare_pvh() via the kernel virtual mapping */ @@ -238,8 +242,6 @@ SYM_DATA_START_LOCAL(gdt_start) SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) .balign 16 -SYM_DATA_LOCAL(canary, .fill 48, 1, 0) - SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 0a0539e1cc81..8c534c36adfa 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -26,6 +26,7 @@ /* code below belongs to the image kernel */ .align PAGE_SIZE SYM_FUNC_START(restore_registers) + ANNOTATE_NOENDBR /* go back to the original page tables */ movq %r9, %cr3 @@ -119,6 +120,7 @@ SYM_FUNC_END(restore_image) /* code below has been relocated to a safe page */ SYM_FUNC_START(core_restore_code) + ANNOTATE_NOENDBR /* switch to temporary page tables */ movq %rax, %cr3 /* flush TLB */ diff --git a/arch/x86/realmode/rm/realmode.h b/arch/x86/realmode/rm/realmode.h index c76041a35397..867e55f1d6af 100644 --- a/arch/x86/realmode/rm/realmode.h +++ b/arch/x86/realmode/rm/realmode.h @@ -2,7 +2,7 @@ #ifndef ARCH_X86_REALMODE_RM_REALMODE_H #define ARCH_X86_REALMODE_RM_REALMODE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * 16-bit ljmpw to the real_mode_seg @@ -12,7 +12,7 @@ */ #define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Signature at the end of the realmode region diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h index 0e4fd08ae447..3b6d8fa82d3e 100644 --- a/arch/x86/realmode/rm/wakeup.h +++ b/arch/x86/realmode/rm/wakeup.h @@ -7,7 +7,7 @@ #ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H #define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> /* This must match data at wakeup.S */ diff --git a/arch/x86/tools/cpufeaturemasks.awk b/arch/x86/tools/cpufeaturemasks.awk new file mode 100755 index 000000000000..173d5bf2d999 --- /dev/null +++ b/arch/x86/tools/cpufeaturemasks.awk @@ -0,0 +1,88 @@ +#!/usr/bin/awk +# +# Convert cpufeatures.h to a list of compile-time masks +# Note: this blithely assumes that each word has at least one +# feature defined in it; if not, something else is wrong! +# + +BEGIN { + printf "#ifndef _ASM_X86_CPUFEATUREMASKS_H\n"; + printf "#define _ASM_X86_CPUFEATUREMASKS_H\n\n"; + + file = 0 +} + +FNR == 1 { + ++file; + + # arch/x86/include/asm/cpufeatures.h + if (file == 1) + FS = "[ \t()*+]+"; + + # .config + if (file == 2) + FS = "="; +} + +# Create a dictionary of sorts, containing all defined feature bits +file == 1 && $1 ~ /^#define$/ && $2 ~ /^X86_FEATURE_/ { + nfeat = $3 * $4 + $5; + feat = $2; + sub(/^X86_FEATURE_/, "", feat); + feats[nfeat] = feat; +} +file == 1 && $1 ~ /^#define$/ && $2 == "NCAPINTS" { + ncapints = int($3); +} + +# Create a dictionary featstat[REQUIRED|DISABLED, FEATURE_NAME] = on | off +file == 2 && $1 ~ /^CONFIG_X86_(REQUIRED|DISABLED)_FEATURE_/ { + on = ($2 == "y"); + if (split($1, fs, "CONFIG_X86_|_FEATURE_") == 3) + featstat[fs[2], fs[3]] = on; +} + +END { + sets[1] = "REQUIRED"; + sets[2] = "DISABLED"; + + for (ns in sets) { + s = sets[ns]; + + printf "/*\n"; + printf " * %s features:\n", s; + printf " *\n"; + fstr = ""; + for (i = 0; i < ncapints; i++) { + mask = 0; + for (j = 0; j < 32; j++) { + feat = feats[i*32 + j]; + if (featstat[s, feat]) { + nfstr = fstr " " feat; + if (length(nfstr) > 72) { + printf " * %s\n", fstr; + nfstr = " " feat; + } + fstr = nfstr; + mask += (2 ^ j); + } + } + masks[i] = mask; + } + printf " * %s\n */\n", fstr; + + for (i = 0; i < ncapints; i++) + printf "#define %s_MASK%d\t0x%08xU\n", s, i, masks[i]; + + printf "\n#define %s_MASK_BIT_SET(x)\t\t\t\\\n", s; + printf "\t((\t\t\t\t\t"; + for (i = 0; i < ncapints; i++) { + if (masks[i]) + printf "\t\\\n\t\t((x) >> 5) == %2d ? %s_MASK%d :", i, s, i; + } + printf " 0\t\\\n"; + printf "\t) & (1U << ((x) & 31)))\n\n"; + } + + printf "#endif /* _ASM_X86_CPUFEATUREMASKS_H */\n"; +} diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index e937be979ec8..5778bc498415 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -29,9 +29,13 @@ static struct relocs relocs16; static struct relocs relocs32; #if ELF_BITS == 64 -static struct relocs relocs32neg; static struct relocs relocs64; # define FMT PRIu64 + +#ifndef R_X86_64_REX_GOTPCRELX +# define R_X86_64_REX_GOTPCRELX 42 +#endif + #else # define FMT PRIu32 #endif @@ -86,8 +90,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "__initramfs_start|" "(jiffies|jiffies_64)|" #if ELF_BITS == 64 - "__per_cpu_load|" - "init_per_cpu__.*|" "__end_rodata_hpage_align|" #endif "_end)$" @@ -227,6 +229,7 @@ static const char *rel_type(unsigned type) REL_TYPE(R_X86_64_PC16), REL_TYPE(R_X86_64_8), REL_TYPE(R_X86_64_PC8), + REL_TYPE(R_X86_64_REX_GOTPCRELX), #else REL_TYPE(R_386_NONE), REL_TYPE(R_386_32), @@ -284,34 +287,6 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym) return name; } -static Elf_Sym *sym_lookup(const char *symname) -{ - int i; - - for (i = 0; i < shnum; i++) { - struct section *sec = &secs[i]; - long nsyms; - char *strtab; - Elf_Sym *symtab; - Elf_Sym *sym; - - if (sec->shdr.sh_type != SHT_SYMTAB) - continue; - - nsyms = sec->shdr.sh_size/sizeof(Elf_Sym); - symtab = sec->symtab; - strtab = sec->link->strtab; - - for (sym = symtab; --nsyms >= 0; sym++) { - if (!sym->st_name) - continue; - if (strcmp(symname, strtab + sym->st_name) == 0) - return sym; - } - } - return 0; -} - #if BYTE_ORDER == LITTLE_ENDIAN # define le16_to_cpu(val) (val) # define le32_to_cpu(val) (val) @@ -760,84 +735,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, } } -/* - * The .data..percpu section is a special case for x86_64 SMP kernels. - * It is used to initialize the actual per_cpu areas and to provide - * definitions for the per_cpu variables that correspond to their offsets - * within the percpu area. Since the values of all of the symbols need - * to be offsets from the start of the per_cpu area the virtual address - * (sh_addr) of .data..percpu is 0 in SMP kernels. - * - * This means that: - * - * Relocations that reference symbols in the per_cpu area do not - * need further relocation (since the value is an offset relative - * to the start of the per_cpu area that does not change). - * - * Relocations that apply to the per_cpu area need to have their - * offset adjusted by by the value of __per_cpu_load to make them - * point to the correct place in the loaded image (because the - * virtual address of .data..percpu is 0). - * - * For non SMP kernels .data..percpu is linked as part of the normal - * kernel data and does not require special treatment. - * - */ -static int per_cpu_shndx = -1; -static Elf_Addr per_cpu_load_addr; - -static void percpu_init(void) -{ - int i; - - for (i = 0; i < shnum; i++) { - ElfW(Sym) *sym; - - if (strcmp(sec_name(i), ".data..percpu")) - continue; - - if (secs[i].shdr.sh_addr != 0) /* non SMP kernel */ - return; - - sym = sym_lookup("__per_cpu_load"); - if (!sym) - die("can't find __per_cpu_load\n"); - - per_cpu_shndx = i; - per_cpu_load_addr = sym->st_value; - - return; - } -} - #if ELF_BITS == 64 -/* - * Check to see if a symbol lies in the .data..percpu section. - * - * The linker incorrectly associates some symbols with the - * .data..percpu section so we also need to check the symbol - * name to make sure that we classify the symbol correctly. - * - * The GNU linker incorrectly associates: - * __init_begin - * __per_cpu_load - * - * The "gold" linker incorrectly associates: - * init_per_cpu__fixed_percpu_data - * init_per_cpu__gdt_page - */ -static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) -{ - int shndx = sym_index(sym); - - return (shndx == per_cpu_shndx) && - strcmp(symname, "__init_begin") && - strcmp(symname, "__per_cpu_load") && - strncmp(symname, "init_per_cpu_", 13); -} - - static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, const char *symname) { @@ -848,12 +747,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, if (sym->st_shndx == SHN_UNDEF) return 0; - /* - * Adjust the offset if this reloc applies to the percpu section. - */ - if (sec->shdr.sh_info == per_cpu_shndx) - offset += per_cpu_load_addr; - switch (r_type) { case R_X86_64_NONE: /* NONE can be ignored. */ @@ -861,33 +754,23 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, case R_X86_64_PC32: case R_X86_64_PLT32: + case R_X86_64_REX_GOTPCRELX: /* - * PC relative relocations don't need to be adjusted unless - * referencing a percpu symbol. + * PC relative relocations don't need to be adjusted. * * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ - if (is_percpu_sym(sym, symname)) - add_reloc(&relocs32neg, offset); break; case R_X86_64_PC64: /* * Only used by jump labels */ - if (is_percpu_sym(sym, symname)) - die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n", symname); break; case R_X86_64_32: case R_X86_64_32S: case R_X86_64_64: - /* - * References to the percpu area don't need to be adjusted. - */ - if (is_percpu_sym(sym, symname)) - break; - if (shn_abs) { /* * Whitelisted absolute symbols do not require @@ -1055,7 +938,8 @@ static int cmp_relocs(const void *va, const void *vb) static void sort_relocs(struct relocs *r) { - qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); + if (r->count) + qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); } static int write32(uint32_t v, FILE *f) @@ -1099,7 +983,6 @@ static void emit_relocs(int as_text, int use_real_mode) /* Order the relocations for more efficient processing */ sort_relocs(&relocs32); #if ELF_BITS == 64 - sort_relocs(&relocs32neg); sort_relocs(&relocs64); #else sort_relocs(&relocs16); @@ -1131,13 +1014,6 @@ static void emit_relocs(int as_text, int use_real_mode) /* Now print each relocation */ for (i = 0; i < relocs64.count; i++) write_reloc(relocs64.offset[i], stdout); - - /* Print a stop */ - write_reloc(0, stdout); - - /* Now print each inverse 32-bit relocation */ - for (i = 0; i < relocs32neg.count; i++) - write_reloc(relocs32neg.offset[i], stdout); #endif /* Print a stop */ @@ -1190,9 +1066,6 @@ void process(FILE *fp, int use_real_mode, int as_text, read_symtabs(fp); read_relocs(fp); - if (ELF_BITS == 64) - percpu_init(); - if (show_absolute_syms) { print_absolute_symbols(); return; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 77e788e928cd..98d8a50d2aed 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -9,7 +9,7 @@ config XEN select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8) + depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MATOM) depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5e57835e999d..dcc2041f8e61 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -73,6 +73,7 @@ #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/cpu.h> +#include <asm/irq_stack.h> #ifdef CONFIG_X86_IOPL_IOPERM #include <asm/io_bitmap.h> #endif @@ -94,6 +95,44 @@ void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); static int xen_cpu_dead_pv(unsigned int cpu); +#ifndef CONFIG_PREEMPTION +/* + * Some hypercalls issued by the toolstack can take many 10s of + * seconds. Allow tasks running hypercalls via the privcmd driver to + * be voluntarily preempted even if full kernel preemption is + * disabled. + * + * Such preemptible hypercalls are bracketed by + * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() + * calls. + */ +DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); +EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); + +/* + * In case of scheduling the flag must be cleared and restored after + * returning from schedule as the task might move to a different CPU. + */ +static __always_inline bool get_and_clear_inhcall(void) +{ + bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); + + __this_cpu_write(xen_in_preemptible_hcall, false); + return inhcall; +} + +static __always_inline void restore_inhcall(bool inhcall) +{ + __this_cpu_write(xen_in_preemptible_hcall, inhcall); +} + +#else + +static __always_inline bool get_and_clear_inhcall(void) { return false; } +static __always_inline void restore_inhcall(bool inhcall) { } + +#endif + struct tls_descs { struct desc_struct desc[3]; }; @@ -687,6 +726,36 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check) } #endif +static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + inc_irq_stat(irq_hv_callback_count); + + xen_evtchn_do_upcall(); + + set_irq_regs(old_regs); +} + +__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + bool inhcall; + + instrumentation_begin(); + run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); + + inhcall = get_and_clear_inhcall(); + if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { + irqentry_exit_cond_resched(); + instrumentation_end(); + restore_inhcall(inhcall); + } else { + instrumentation_end(); + irqentry_exit(regs, state); + } +} + struct trap_array_entry { void (*orig)(void); void (*xen)(void); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d078de2c952b..38971c6dcd4b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2189,7 +2189,6 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .flush_tlb_kernel = xen_flush_tlb, .flush_tlb_one_user = xen_flush_tlb_one_user, .flush_tlb_multi = xen_flush_tlb_multi, - .tlb_remove_table = tlb_remove_table, .pgd_alloc = xen_pgd_alloc, .pgd_free = xen_pgd_free, diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 6863d3da7dec..688ff59318ae 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -70,7 +70,7 @@ static void cpu_bringup(void) xen_enable_syscall(); } cpu = smp_processor_id(); - smp_store_cpu_info(cpu); + identify_secondary_cpu(cpu); set_cpu_sibling_map(cpu); speculative_store_bypass_ht_init(); diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index b518f36d1ca2..109af12f7647 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -51,6 +51,7 @@ SYM_FUNC_END(xen_hypercall_pv) * non-zero. */ SYM_FUNC_START(xen_irq_disable_direct) + ENDBR movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) RET SYM_FUNC_END(xen_irq_disable_direct) @@ -90,6 +91,7 @@ SYM_FUNC_END(check_events) * then enter the hypervisor to get them handled. */ SYM_FUNC_START(xen_irq_enable_direct) + ENDBR FRAME_BEGIN /* Unmask events */ movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) @@ -120,6 +122,7 @@ SYM_FUNC_END(xen_irq_enable_direct) * x86 use opposite senses (mask vs enable). */ SYM_FUNC_START(xen_save_fl_direct) + ENDBR testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) setz %ah addb %ah, %ah @@ -127,6 +130,7 @@ SYM_FUNC_START(xen_save_fl_direct) SYM_FUNC_END(xen_save_fl_direct) SYM_FUNC_START(xen_read_cr2) + ENDBR FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX @@ -135,6 +139,7 @@ SYM_FUNC_START(xen_read_cr2) SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) + ENDBR FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX FRAME_END diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 894edf8d6d62..5dad6c51cdc3 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -31,16 +31,14 @@ SYM_CODE_START(startup_xen) leaq __top_init_kernel_stack(%rip), %rsp - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. If the - * stack protector canary is enabled, it is located at %gs:40. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax - cdq + xorl %eax, %eax + xorl %edx, %edx wrmsr mov %rsi, %rdi @@ -133,11 +131,13 @@ SYM_FUNC_START(xen_hypercall_hvm) SYM_FUNC_END(xen_hypercall_hvm) SYM_FUNC_START(xen_hypercall_amd) + ANNOTATE_NOENDBR vmmcall RET SYM_FUNC_END(xen_hypercall_amd) SYM_FUNC_START(xen_hypercall_intel) + ANNOTATE_NOENDBR vmcall RET SYM_FUNC_END(xen_hypercall_intel) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 698897b29de2..586cc7d1d8aa 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -590,6 +590,8 @@ static void acpi_idle_play_dead(struct cpuidle_device *dev, int index) raw_safe_halt(); else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); + } else if (cx->entry_method == ACPI_CSTATE_FFH) { + acpi_processor_ffh_play_dead(cx); } else return; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9c4cc01fd51a..f06b9bc99945 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2200,28 +2200,20 @@ static int knl_get_turbo_pstate(int cpu) return ret; } -static void hybrid_get_type(void *data) -{ - u8 *cpu_type = data; - - *cpu_type = get_this_hybrid_cpu_type(); -} - static int hwp_get_cpu_scaling(int cpu) { if (hybrid_scaling_factor) { - u8 cpu_type = 0; - - smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + u8 cpu_type = c->topo.intel_type; /* * Return the hybrid scaling factor for P-cores and use the * default core scaling for E-cores. */ - if (cpu_type == 0x40) + if (cpu_type == INTEL_CPU_TYPE_CORE) return hybrid_scaling_factor; - if (cpu_type == 0x20) + if (cpu_type == INTEL_CPU_TYPE_ATOM) return core_get_scaling(); } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 0fdb1d1316c4..5687089e406a 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -58,6 +58,7 @@ #include <asm/spec-ctrl.h> #include <asm/tsc.h> #include <asm/fpu/api.h> +#include <asm/smp.h> #define INTEL_IDLE_VERSION "0.5.1" @@ -229,6 +230,15 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, return 0; } +static void intel_idle_enter_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + + mwait_play_dead(eax); +} + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -1804,6 +1814,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) mark_tsc_unstable("TSC halts in idle"); state->enter = intel_idle; + state->enter_dead = intel_idle_enter_dead; state->enter_s2idle = intel_idle_s2idle; } } @@ -2153,6 +2164,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) !cpuidle_state_table[cstate].enter_s2idle) break; + if (!cpuidle_state_table[cstate].enter_dead) + cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead; + /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index 67d9391f1855..7575fee96cc6 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig @@ -3,7 +3,7 @@ config INTEL_MEI tristate "Intel Management Engine Interface" depends on X86 && PCI - default GENERIC_CPU || MCORE2 || MATOM || X86_GENERIC + default X86_64 || MATOM help The Intel Management Engine (Intel ME) provides Manageability, Security and Media services for system containing Intel chipsets. diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2fbd379923fd..5c3054aaec8c 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -203,6 +203,12 @@ config PCI_P2PDMA P2P DMA transactions must be between devices behind the same root port. + Enabling this option will reduce the entropy of x86 KASLR memory + regions. For example - on a 46 bit system, the entropy goes down + from 16 bits to 15 bits. The actual reduction in entropy depends + on the physical address bits, on processor features, kernel config + (5 level page table) and physical memory present on the system. + If unsure, say N. config PCI_LABEL diff --git a/drivers/platform/x86/amd/hsmp/Kconfig b/drivers/platform/x86/amd/hsmp/Kconfig index 7d10d4462a45..d6f7a62d55b5 100644 --- a/drivers/platform/x86/amd/hsmp/Kconfig +++ b/drivers/platform/x86/amd/hsmp/Kconfig @@ -7,7 +7,7 @@ config AMD_HSMP tristate menu "AMD HSMP Driver" - depends on AMD_NB || COMPILE_TEST + depends on AMD_NODE || COMPILE_TEST config AMD_HSMP_ACPI tristate "AMD HSMP ACPI device driver" diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index 444b43be35a2..c1eccb3c80c5 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -10,7 +10,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <asm/amd_hsmp.h> -#include <asm/amd_nb.h> #include <linux/acpi.h> #include <linux/device.h> @@ -24,6 +23,8 @@ #include <uapi/asm-generic/errno-base.h> +#include <asm/amd_node.h> + #include "hsmp.h" #define DRIVER_NAME "amd_hsmp" @@ -321,8 +322,8 @@ static int hsmp_acpi_probe(struct platform_device *pdev) return -ENOMEM; if (!hsmp_pdev->is_probed) { - hsmp_pdev->num_sockets = amd_nb_num(); - if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_SOCKETS) + hsmp_pdev->num_sockets = amd_num_nodes(); + if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_NUM_NODES) return -ENODEV; hsmp_pdev->sock = devm_kcalloc(&pdev->dev, hsmp_pdev->num_sockets, diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 03164e30b3a5..a3ac09a90de4 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -8,7 +8,6 @@ */ #include <asm/amd_hsmp.h> -#include <asm/amd_nb.h> #include <linux/acpi.h> #include <linux/delay.h> diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h index e852f0a947e4..af8b21f821d6 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.h +++ b/drivers/platform/x86/amd/hsmp/hsmp.h @@ -21,8 +21,6 @@ #define HSMP_ATTR_GRP_NAME_SIZE 10 -#define MAX_AMD_SOCKETS 8 - #define HSMP_CDEV_NAME "hsmp_cdev" #define HSMP_DEVNODE_NAME "hsmp" @@ -41,7 +39,6 @@ struct hsmp_socket { void __iomem *virt_base_addr; struct semaphore hsmp_sem; char name[HSMP_ATTR_GRP_NAME_SIZE]; - struct pci_dev *root; struct device *dev; u16 sock_ind; int (*amd_hsmp_rdwr)(struct hsmp_socket *sock, u32 off, u32 *val, bool rw); diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c index 02ca85762b68..b9782a078dbd 100644 --- a/drivers/platform/x86/amd/hsmp/plat.c +++ b/drivers/platform/x86/amd/hsmp/plat.c @@ -10,14 +10,16 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <asm/amd_hsmp.h> -#include <asm/amd_nb.h> +#include <linux/build_bug.h> #include <linux/device.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/sysfs.h> +#include <asm/amd_node.h> + #include "hsmp.h" #define DRIVER_NAME "amd_hsmp" @@ -34,28 +36,12 @@ #define SMN_HSMP_MSG_RESP 0x0010980 #define SMN_HSMP_MSG_DATA 0x00109E0 -#define HSMP_INDEX_REG 0xc4 -#define HSMP_DATA_REG 0xc8 - static struct hsmp_plat_device *hsmp_pdev; static int amd_hsmp_pci_rdwr(struct hsmp_socket *sock, u32 offset, u32 *value, bool write) { - int ret; - - if (!sock->root) - return -ENODEV; - - ret = pci_write_config_dword(sock->root, HSMP_INDEX_REG, - sock->mbinfo.base_addr + offset); - if (ret) - return ret; - - ret = (write ? pci_write_config_dword(sock->root, HSMP_DATA_REG, *value) - : pci_read_config_dword(sock->root, HSMP_DATA_REG, value)); - - return ret; + return amd_smn_hsmp_rdwr(sock->sock_ind, sock->mbinfo.base_addr + offset, value, write); } static ssize_t hsmp_metric_tbl_plat_read(struct file *filp, struct kobject *kobj, @@ -95,7 +81,12 @@ static umode_t hsmp_is_sock_attr_visible(struct kobject *kobj, * Static array of 8 + 1(for NULL) elements is created below * to create sysfs groups for sockets. * is_bin_visible function is used to show / hide the necessary groups. + * + * Validate the maximum number against MAX_AMD_NUM_NODES. If this changes, + * then the attributes and groups below must be adjusted. */ +static_assert(MAX_AMD_NUM_NODES == 8); + #define HSMP_BIN_ATTR(index, _list) \ static const struct bin_attribute attr##index = { \ .attr = { .name = HSMP_METRICS_TABLE_NAME, .mode = 0444}, \ @@ -159,10 +150,7 @@ static int init_platform_device(struct device *dev) int ret, i; for (i = 0; i < hsmp_pdev->num_sockets; i++) { - if (!node_to_amd_nb(i)) - return -ENODEV; sock = &hsmp_pdev->sock[i]; - sock->root = node_to_amd_nb(i)->root; sock->sock_ind = i; sock->dev = dev; sock->mbinfo.base_addr = SMN_HSMP_BASE; @@ -305,11 +293,11 @@ static int __init hsmp_plt_init(void) return -ENOMEM; /* - * amd_nb_num() returns number of SMN/DF interfaces present in the system + * amd_num_nodes() returns number of SMN/DF interfaces present in the system * if we have N SMN/DF interfaces that ideally means N sockets */ - hsmp_pdev->num_sockets = amd_nb_num(); - if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_SOCKETS) + hsmp_pdev->num_sockets = amd_num_nodes(); + if (hsmp_pdev->num_sockets == 0 || hsmp_pdev->num_sockets > MAX_AMD_NUM_NODES) return ret; ret = platform_driver_register(&amd_hsmp_driver); diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 77d75e1f14a9..5ccde3982314 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1274,7 +1274,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt), X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht), X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng), - X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &rapl_defaults_ann), + X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2,&rapl_defaults_ann), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core), diff --git a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h index 049246774ced..6146555fe9cf 100644 --- a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h +++ b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h @@ -172,10 +172,10 @@ void atomisp_unregister_subdev(struct v4l2_subdev *subdev); #define IS_BYT __IS_SOC(INTEL_ATOM_SILVERMONT) #define IS_CHT __IS_SOC(INTEL_ATOM_AIRMONT) #define IS_MRFD __IS_SOC(INTEL_ATOM_SILVERMONT_MID) -#define IS_MOFD __IS_SOC(INTEL_ATOM_AIRMONT_MID) +#define IS_MOFD __IS_SOC(INTEL_ATOM_SILVERMONT_MID2) /* Both CHT and MOFD come with ISP2401 */ #define IS_ISP2401 __IS_SOCS(INTEL_ATOM_AIRMONT, \ - INTEL_ATOM_AIRMONT_MID) + INTEL_ATOM_SILVERMONT_MID2) #endif /* ATOMISP_PLATFORM_H_ */ diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c index 817421508d5c..b2a615aea7c1 100644 --- a/drivers/thermal/intel/intel_tcc.c +++ b/drivers/thermal/intel/intel_tcc.c @@ -106,7 +106,7 @@ static const struct x86_cpu_id intel_tcc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D, &temp_broadwell), X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &temp_broadwell), X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &temp_broadwell), - X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &temp_broadwell), + X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2, &temp_broadwell), X86_MATCH_VFM(INTEL_ATOM_AIRMONT_NP, &temp_broadwell), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &temp_goldmont), X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &temp_goldmont), diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 70fbc9a3e703..cf3fb61f4d5b 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -23,6 +23,7 @@ #include <linux/cleanup.h> #include <linux/uuid.h> #include <linux/configfs.h> +#include <linux/mm.h> #include <uapi/linux/sev-guest.h> #include <uapi/linux/psp-sev.h> diff --git a/include/acpi/processor.h b/include/acpi/processor.h index a17e97e634a6..d0eccbd920e5 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -280,6 +280,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg); void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate); +void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx); #else static inline void acpi_processor_power_init_bm_check(struct acpi_processor_flags @@ -300,6 +301,10 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx { return; } +static inline void __noreturn acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx) +{ + BUG(); +} #endif static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg, diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index c768de6f19a9..0755bc39b0d8 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -39,7 +39,7 @@ extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char __start_ro_after_init[], __end_ro_after_init[]; extern char _end[]; -extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; +extern char __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0d5b186abee8..4925441bc471 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) . = ALIGN(PAGE_SIZE); \ __nosave_end = .; +#define CACHE_HOT_DATA(align) \ + . = ALIGN(align); \ + *(SORT_BY_ALIGNMENT(.data..hot.*)) \ + . = ALIGN(align); + #define PAGE_ALIGNED_DATA(page_align) \ . = ALIGN(page_align); \ *(.data..page_aligned) \ @@ -1062,10 +1067,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) */ #define PERCPU_INPUT(cacheline) \ __per_cpu_start = .; \ - *(.data..percpu..first) \ . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ + __per_cpu_hot_start = .; \ + *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \ + __per_cpu_hot_end = .; \ + . = ALIGN(cacheline); \ *(.data..percpu..read_mostly) \ . = ALIGN(cacheline); \ *(.data..percpu) \ @@ -1074,52 +1082,17 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) __per_cpu_end = .; /** - * PERCPU_VADDR - define output section for percpu area + * PERCPU_SECTION - define output section for percpu area * @cacheline: cacheline size - * @vaddr: explicit base address (optional) - * @phdr: destination PHDR (optional) * * Macro which expands to output section for percpu area. * * @cacheline is used to align subsections to avoid false cacheline * sharing between subsections for different purposes. - * - * If @vaddr is not blank, it specifies explicit base address and all - * percpu symbols will be offset from the given address. If blank, - * @vaddr always equals @laddr + LOAD_OFFSET. - * - * @phdr defines the output PHDR to use if not blank. Be warned that - * output PHDR is sticky. If @phdr is specified, the next output - * section in the linker script will go there too. @phdr should have - * a leading colon. - * - * Note that this macros defines __per_cpu_load as an absolute symbol. - * If there is no need to put the percpu section at a predetermined - * address, use PERCPU_SECTION. - */ -#define PERCPU_VADDR(cacheline, vaddr, phdr) \ - __per_cpu_load = .; \ - .data..percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ - PERCPU_INPUT(cacheline) \ - } phdr \ - . = __per_cpu_load + SIZEOF(.data..percpu); - -/** - * PERCPU_SECTION - define output section for percpu area, simple version - * @cacheline: cacheline size - * - * Align to PAGE_SIZE and outputs output section for percpu area. This - * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and - * __per_cpu_start will be identical. - * - * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,) - * except that __per_cpu_load is defined as a relative symbol against - * .data..percpu which is required for relocatable x86_32 configuration. */ #define PERCPU_SECTION(cacheline) \ . = ALIGN(PAGE_SIZE); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ - __per_cpu_load = .; \ PERCPU_INPUT(cacheline) \ } @@ -1148,6 +1121,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) INIT_TASK_DATA(inittask) \ NOSAVE_DATA \ PAGE_ALIGNED_DATA(pagealigned) \ + CACHE_HOT_DATA(cacheline) \ CACHELINE_ALIGNED_DATA(cacheline) \ READ_MOSTLY_DATA(cacheline) \ DATA_DATA \ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4e495b29c640..a70e62d69dc7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -330,7 +330,6 @@ static inline bool acpi_sci_irq_valid(void) } extern int sbf_port; -extern unsigned long acpi_realmode_flags; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); diff --git a/include/linux/cfi.h b/include/linux/cfi.h index f0df518e11dd..1db17ecbb86c 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,6 +11,8 @@ #include <linux/module.h> #include <asm/cfi.h> +extern bool cfi_warn; + #ifndef cfi_get_offset static inline int cfi_get_offset(void) { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6db889c9efcc..9fc30b6b80c9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -228,6 +228,16 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* __KERNEL__ */ +#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +/* + * Force a reference to the external symbol so the compiler generates + * __kcfi_typid. + */ +#define KCFI_REFERENCE(sym) __ADDRESSABLE(sym) +#else +#define KCFI_REFERENCE(sym) +#endif + /** * offset_to_ptr - convert a relative memory offset to an absolute pointer * @off: the address of the 32-bit offset value diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 64130ae19690..65655a5d1be2 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -65,6 +65,37 @@ enum execmem_range_flags { * Architectures that use EXECMEM_ROX_CACHE must implement this. */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); + +/** + * execmem_make_temp_rw - temporarily remap region with read-write + * permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Remaps a part of the cached large page in the ROX cache in the range + * [@ptr, @ptr + @size) as writable and not executable. The caller must + * have exclusive ownership of this range and ensure nothing will try to + * execute code in this range. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_make_temp_rw(void *ptr, size_t size); + +/** + * execmem_restore_rox - restore read-only-execute permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Restores read-only-execute permissions on a range [@ptr, @ptr + @size) + * after it was temporarily remapped as writable. Relies on architecture + * implementation of set_memory_rox() to restore mapping using large pages. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_restore_rox(void *ptr, size_t size); +#else +static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } +static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif /** diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index d67614f7b7f1..bd7e60c0b72f 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -692,6 +692,7 @@ struct x86_cpu_id { __u16 feature; /* bit index */ /* Solely for kernel-internal use: DO NOT EXPORT to userspace! */ __u16 flags; + __u8 type; kernel_ulong_t driver_data; }; @@ -703,6 +704,7 @@ struct x86_cpu_id { #define X86_STEP_MIN 0 #define X86_STEP_MAX 0xf #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +#define X86_CPU_TYPE_ANY 0 /* * Generic table type for matching CPU features. diff --git a/include/linux/module.h b/include/linux/module.h index 30e5b19bafa9..9937e71a3b5b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -370,7 +370,6 @@ enum mod_mem_type { struct module_memory { void *base; - void *rw_copy; bool is_rox; unsigned int size; @@ -772,16 +771,6 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); -void *__module_writable_address(struct module *mod, void *loc); - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod || - mod->state != MODULE_STATE_UNFORMED) - return loc; - return __module_writable_address(mod, loc); -} - #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -889,11 +878,6 @@ static inline bool module_is_coming(struct module *mod) { return false; } - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - return loc; -} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1f5507ba5a12..e395461d59e5 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *mod); - #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/include/linux/objtool.h b/include/linux/objtool.h index c722a921165b..3ca965a2ddc8 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -128,7 +128,7 @@ #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) -#define __ASM_ANNOTATE(label, type) +#define __ASM_ANNOTATE(label, type) "" #define ASM_ANNOTATE(type) #else .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 @@ -147,6 +147,8 @@ * these relocations will never be used for indirect calls. */ #define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) +#define ANNOTATE_NOENDBR_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR)) + /* * This should be used immediately before an indirect jump/call. It tells * objtool the subsequent indirect jump/call is vouched safe for retpoline diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5b520fe86b60..0fcacb909778 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -26,13 +26,11 @@ #define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" #endif -#define PER_CPU_FIRST_SECTION "..first" #else #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" -#define PER_CPU_FIRST_SECTION "" #endif @@ -115,14 +113,17 @@ DEFINE_PER_CPU_SECTION(type, name, "") /* - * Declaration/definition used for per-CPU variables that must come first in - * the set of variables. + * Declaration/definition used for per-CPU variables that are frequently + * accessed and should be in a single cacheline. + * + * For use only by architecture and core code. Only use scalar or pointer + * types to maximize density. */ -#define DECLARE_PER_CPU_FIRST(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) +#define DECLARE_PER_CPU_CACHE_HOT(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name) -#define DEFINE_PER_CPU_FIRST(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) +#define DEFINE_PER_CPU_CACHE_HOT(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name) /* * Declaration/definition used for per-CPU variables that must be cacheline diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 3e9808f2b549..b0af8d4ef6e6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -319,6 +319,7 @@ do { \ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; +struct task_struct; /** * preempt_ops - notifiers called when a task is preempted and rescheduled diff --git a/include/linux/sizes.h b/include/linux/sizes.h index c3a00b967d18..49039494076f 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -23,17 +23,25 @@ #define SZ_4K 0x00001000 #define SZ_8K 0x00002000 #define SZ_16K 0x00004000 +#define SZ_24K 0x00006000 #define SZ_32K 0x00008000 #define SZ_64K 0x00010000 #define SZ_128K 0x00020000 +#define SZ_192K 0x00030000 #define SZ_256K 0x00040000 +#define SZ_384K 0x00060000 #define SZ_512K 0x00080000 #define SZ_1M 0x00100000 #define SZ_2M 0x00200000 +#define SZ_3M 0x00300000 #define SZ_4M 0x00400000 +#define SZ_6M 0x00600000 #define SZ_8M 0x00800000 +#define SZ_12M 0x00c00000 #define SZ_16M 0x01000000 +#define SZ_18M 0x01200000 +#define SZ_24M 0x01800000 #define SZ_32M 0x02000000 #define SZ_64M 0x04000000 #define SZ_128M 0x08000000 diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d0958095c..5a37cb2b6f93 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, + DIRECT_MAP_LEVEL2_COLLAPSE, + DIRECT_MAP_LEVEL3_COLLAPSE, #endif #ifdef CONFIG_PER_VMA_LOCK_STATS VMA_LOCK_SUCCESS, diff --git a/init/Kconfig b/init/Kconfig index aeea36d11dda..681f38ee68db 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1880,11 +1880,6 @@ config KALLSYMS_ALL Say N unless you really need all symbols, or kernel live patching. -config KALLSYMS_ABSOLUTE_PERCPU - bool - depends on KALLSYMS - default X86_64 && SMP - # end of the "standard kernel features (expert users)" menu config ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 60611df77957..6e604caa870c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21702,12 +21702,12 @@ patch_map_ops_generic: if (insn->imm == BPF_FUNC_get_smp_processor_id && verifier_inlines_helper_call(env, insn->imm)) { /* BPF_FUNC_get_smp_processor_id inlining is an - * optimization, so if pcpu_hot.cpu_number is ever + * optimization, so if cpu_number is ever * changed in some incompatible and hard to support * way, it's fine to back out this inlining logic */ #ifdef CONFIG_SMP - insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number); + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number); insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); cnt = 3; diff --git a/kernel/cfi.c b/kernel/cfi.c index 08caad776717..19be79639542 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -7,6 +7,8 @@ #include <linux/cfi.h> +bool cfi_warn __ro_after_init = IS_ENABLED(CONFIG_CFI_PERMISSIVE); + enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, unsigned long *target, u32 type) { @@ -17,7 +19,7 @@ enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, pr_err("CFI failure at %pS (no target information)\n", (void *)addr); - if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) { + if (cfi_warn) { __warn(NULL, 0, (void *)addr, 0, regs, NULL); return BUG_TRAP_TYPE_WARN; } diff --git a/kernel/iomem.c b/kernel/iomem.c index dc2120776e1c..75e61c1c6bc0 100644 --- a/kernel/iomem.c +++ b/kernel/iomem.c @@ -6,7 +6,8 @@ #include <linux/ioremap.h> #ifndef arch_memremap_wb -static void *arch_memremap_wb(resource_size_t offset, unsigned long size) +static void *arch_memremap_wb(resource_size_t offset, unsigned long size, + unsigned long flags) { #ifdef ioremap_cache return (__force void *)ioremap_cache(offset, size); @@ -91,7 +92,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags) if (is_ram == REGION_INTERSECTS) addr = try_ram_remap(offset, size, flags); if (!addr) - addr = arch_memremap_wb(offset, size); + addr = arch_memremap_wb(offset, size, flags); } /* diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index a9a0ca605d4a..4198f30aac3c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -148,16 +148,8 @@ static unsigned int get_symbol_offset(unsigned long pos) unsigned long kallsyms_sym_address(int idx) { - /* values are unsigned offsets if --absolute-percpu is not in effect */ - if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU)) - return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; - - /* ...otherwise, positive offsets are absolute values */ - if (kallsyms_offsets[idx] >= 0) - return kallsyms_offsets[idx]; - - /* ...and negative offsets are relative to kallsyms_relative_base - 1 */ - return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; + /* values are unsigned offsets */ + return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; } static unsigned int get_symbol_seq(int index) diff --git a/kernel/module/main.c b/kernel/module/main.c index 1fb9ad289a6f..a256cc919ad7 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1221,18 +1221,6 @@ void __weak module_arch_freeing_init(struct module *mod) { } -void *__module_writable_address(struct module *mod, void *loc) -{ - for_class_mod_mem_type(type, text) { - struct module_memory *mem = &mod->mem[type]; - - if (loc >= mem->base && loc < mem->base + mem->size) - return loc + (mem->rw_copy - mem->base); - } - - return loc; -} - static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); @@ -1250,21 +1238,15 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (!ptr) return -ENOMEM; - mod->mem[type].base = ptr; - if (execmem_is_rox(execmem_type)) { - ptr = vzalloc(size); + int err = execmem_make_temp_rw(ptr, size); - if (!ptr) { - execmem_free(mod->mem[type].base); + if (err) { + execmem_free(ptr); return -ENOMEM; } - mod->mem[type].rw_copy = ptr; mod->mem[type].is_rox = true; - } else { - mod->mem[type].rw_copy = mod->mem[type].base; - memset(mod->mem[type].base, 0, size); } /* @@ -1278,18 +1260,29 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) * *do* eventually get freed, but let's just keep things simple * and avoid *any* false positives. */ - kmemleak_not_leak(ptr); + if (!mod->mem[type].is_rox) + kmemleak_not_leak(ptr); + + memset(ptr, 0, size); + mod->mem[type].base = ptr; return 0; } +static void module_memory_restore_rox(struct module *mod) +{ + for_class_mod_mem_type(type, text) { + struct module_memory *mem = &mod->mem[type]; + + if (mem->is_rox) + execmem_restore_rox(mem->base, mem->size); + } +} + static void module_memory_free(struct module *mod, enum mod_mem_type type) { struct module_memory *mem = &mod->mem[type]; - if (mem->is_rox) - vfree(mem->rw_copy); - execmem_free(mem->base); } @@ -2642,7 +2635,6 @@ static int move_module(struct module *mod, struct load_info *info) for_each_mod_mem_type(type) { if (!mod->mem[type].size) { mod->mem[type].base = NULL; - mod->mem[type].rw_copy = NULL; continue; } @@ -2659,7 +2651,6 @@ static int move_module(struct module *mod, struct load_info *info) void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; const char *sname; - unsigned long addr; if (!(shdr->sh_flags & SHF_ALLOC)) continue; @@ -2680,14 +2671,12 @@ static int move_module(struct module *mod, struct load_info *info) ret = PTR_ERR(dest); goto out_err; } - addr = (unsigned long)dest; codetag_section_found = true; } else { enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; - addr = (unsigned long)mod->mem[type].base + offset; - dest = mod->mem[type].rw_copy + offset; + dest = mod->mem[type].base + offset; } if (shdr->sh_type != SHT_NOBITS) { @@ -2710,13 +2699,14 @@ static int move_module(struct module *mod, struct load_info *info) * users of info can keep taking advantage and using the newly * minted official memory area. */ - shdr->sh_addr = addr; + shdr->sh_addr = (unsigned long)dest; pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr, (long)shdr->sh_size, info->secstrings + shdr->sh_name); } return 0; out_err: + module_memory_restore_rox(mod); for (t--; t >= 0; t--) module_memory_free(mod, t); if (codetag_section_found) @@ -2863,17 +2853,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; } -int __weak module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - return 0; -} - static int post_relocation(struct module *mod, const struct load_info *info) { - int ret; - /* Sort exception table now relocations are done. */ sort_extable(mod->extable, mod->extable + mod->num_exentries); @@ -2885,24 +2866,7 @@ static int post_relocation(struct module *mod, const struct load_info *info) add_kallsyms(mod, info); /* Arch-specific module finalizing. */ - ret = module_finalize(info->hdr, info->sechdrs, mod); - if (ret) - return ret; - - for_each_mod_mem_type(type) { - struct module_memory *mem = &mod->mem[type]; - - if (mem->is_rox) { - if (!execmem_update_copy(mem->base, mem->rw_copy, - mem->size)) - return -ENOMEM; - - vfree(mem->rw_copy); - mem->rw_copy = NULL; - } - } - - return module_post_finalize(info->hdr, info->sechdrs, mod); + return module_finalize(info->hdr, info->sechdrs, mod); } /* Call module constructors. */ @@ -3499,6 +3463,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod->mem[type].size); } + module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy: /* diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 74834ba15615..03f4142cfbf4 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> +#include <linux/execmem.h> #include "internal.h" static int module_set_memory(const struct module *mod, enum mod_mem_type type, @@ -32,12 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type, int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { + const struct module_memory *mem = &mod->mem[type]; int ret; - if (mod->mem[type].is_rox) - continue; - - if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + if (mem->is_rox) + ret = execmem_restore_rox(mem->base, mem->size); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox); else ret = module_set_memory(mod, type, set_memory_x); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4484cdb504c7..4ebe6136b08d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1824,16 +1824,6 @@ static const struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) - { - .procname = "unknown_nmi_panic", - .data = &unknown_nmi_panic, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif - #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \ defined(CONFIG_DEBUG_STACKOVERFLOW) { @@ -1844,43 +1834,6 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#if defined(CONFIG_X86) - { - .procname = "panic_on_unrecovered_nmi", - .data = &panic_on_unrecovered_nmi, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "panic_on_io_nmi", - .data = &panic_on_io_nmi, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "bootloader_type", - .data = &bootloader_type, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "bootloader_version", - .data = &bootloader_version, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "io_delay_type", - .data = &io_delay_type, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #if defined(CONFIG_MMU) { .procname = "randomize_va_space", @@ -1899,15 +1852,6 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) - { - .procname = "acpi_video_flags", - .data = &acpi_realmode_flags, - .maxlen = sizeof (unsigned long), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - }, -#endif #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN { .procname = "ignore-unaligned-usertrap", diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index adc947587eb8..997fb2a47c92 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1038,27 +1038,14 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { .arg1_type = ARG_PTR_TO_CTX, }; -#ifdef CONFIG_X86_KERNEL_IBT -static unsigned long get_entry_ip(unsigned long fentry_ip) +static inline unsigned long get_entry_ip(unsigned long fentry_ip) { - u32 instr; - - /* We want to be extra safe in case entry ip is on the page edge, - * but otherwise we need to avoid get_kernel_nofault()'s overhead. - */ - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) - return fentry_ip; - } else { - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); - } - if (is_endbr(instr)) +#ifdef CONFIG_X86_KERNEL_IBT + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) fentry_ip -= ENDBR_INSN_SIZE; +#endif return fentry_ip; } -#else -#define get_entry_ip(fentry_ip) fentry_ip -#endif BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 759ea1783cc5..d726068358c7 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -254,7 +254,7 @@ static __init int test_atomics_init(void) pr_info("passed for %s platform %s CX8 and %s SSE\n", #ifdef CONFIG_X86_64 "x86-64", -#elif defined(CONFIG_X86_CMPXCHG64) +#elif defined(CONFIG_X86_CX8) "i586+", #else "i386+", diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h index 0e3b2c0a527d..0dde8bf56595 100644 --- a/lib/zstd/common/portability_macros.h +++ b/lib/zstd/common/portability_macros.h @@ -55,7 +55,7 @@ #ifndef DYNAMIC_BMI2 #if ((defined(__clang__) && __has_attribute(__target__)) \ || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (__GNUC__ >= 11))) \ && (defined(__x86_64__) || defined(_M_X64)) \ && !defined(__BMI2__) # define DYNAMIC_BMI2 1 diff --git a/mm/execmem.c b/mm/execmem.c index 317b6a8d35be..e6c4f5076ca8 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -257,7 +257,6 @@ out_unlock: static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; - unsigned long start, end; struct vm_struct *vm; size_t alloc_size; int err = -ENOMEM; @@ -275,26 +274,18 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - start = (unsigned long)p; - end = start + alloc_size; - - vunmap_range(start, end); - - err = execmem_set_direct_map_valid(vm, false); - if (err) - goto err_free_mem; - - err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages, - PMD_SHIFT); + err = set_memory_rox((unsigned long)p, vm->nr_pages); if (err) goto err_free_mem; err = execmem_cache_add(p, alloc_size); if (err) - goto err_free_mem; + goto err_reset_direct_map; return 0; +err_reset_direct_map: + execmem_set_direct_map_valid(vm, true); err_free_mem: vfree(p); return err; @@ -344,6 +335,28 @@ static bool execmem_cache_free(void *ptr) return true; } + +int execmem_make_temp_rw(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + int ret; + + ret = set_memory_nx(addr, nr); + if (ret) + return ret; + + return set_memory_rw(addr, nr); +} + +int execmem_restore_rox(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + + return set_memory_rox(addr, nr); +} + #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ static void *execmem_cache_alloc(struct execmem_range *range, size_t size) { diff --git a/mm/percpu.c b/mm/percpu.c index ac61e3fc5f15..7b5835356d1e 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3071,7 +3071,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, continue; } /* copy and return the unused part */ - memcpy(ptr, __per_cpu_load, ai->static_size); + memcpy(ptr, __per_cpu_start, ai->static_size); pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum); } } @@ -3240,7 +3240,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size); /* copy static data */ - memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); + memcpy((void *)unit_addr, __per_cpu_start, ai->static_size); } /* we're ready, commit */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 16bfe1c694dd..88998725f1c5 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1435,6 +1435,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_X86 "direct_map_level2_splits", "direct_map_level3_splits", + "direct_map_level2_collapses", + "direct_map_level3_collapses", #endif #ifdef CONFIG_PER_VMA_LOCK_STATS "vma_lock_success", diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh deleted file mode 100755 index 9459ca4f0f11..000000000000 --- a/scripts/gcc-x86_32-has-stack-protector.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# This requires GCC 8.1 or better. Specifically, we require -# -mstack-protector-guard-reg, added by -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708 - -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs" diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh deleted file mode 100755 index f680bb01aeeb..000000000000 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 13eb8b3901b8..8f7c4fb78c2c 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -164,7 +164,7 @@ def get_current_task(cpu): var_ptr = gdb.parse_and_eval("(struct task_struct *)cpu_tasks[0].task") return var_ptr.dereference() else: - var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") + var_ptr = gdb.parse_and_eval("¤t_task") return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): current_task_addr = gdb.parse_and_eval("(unsigned long)$SP_EL0") diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 03852da3d249..4b0234e4b12f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,7 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S + * Usage: kallsyms [--all-symbols] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -37,7 +37,6 @@ struct sym_entry { unsigned long long addr; unsigned int len; unsigned int seq; - bool percpu_absolute; unsigned char sym[]; }; @@ -55,14 +54,9 @@ static struct addr_range text_ranges[] = { #define text_range_text (&text_ranges[0]) #define text_range_inittext (&text_ranges[1]) -static struct addr_range percpu_range = { - "__per_cpu_start", "__per_cpu_end", -1ULL, 0 -}; - static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; -static int absolute_percpu; static int token_profit[0x10000]; @@ -73,7 +67,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] in.map > out.S\n"); exit(1); } @@ -164,7 +158,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) return NULL; check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); - check_symbol_range(name, addr, &percpu_range, 1); /* include the type field in the symbol name, so that it gets * compressed together */ @@ -175,7 +168,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) sym->len = len; sym->sym[0] = type; strcpy(sym_name(sym), name); - sym->percpu_absolute = false; return sym; } @@ -319,11 +311,6 @@ static int expand_symbol(const unsigned char *data, int len, char *result) return total; } -static bool symbol_absolute(const struct sym_entry *s) -{ - return s->percpu_absolute; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -455,22 +442,11 @@ static void write_src(void) */ long long offset; - bool overflow; - - if (!absolute_percpu) { - offset = table[i]->addr - relative_base; - overflow = offset < 0 || offset > UINT_MAX; - } else if (symbol_absolute(table[i])) { - offset = table[i]->addr; - overflow = offset < 0 || offset > INT_MAX; - } else { - offset = relative_base - table[i]->addr - 1; - overflow = offset < INT_MIN || offset >= 0; - } - if (overflow) { + + offset = table[i]->addr - relative_base; + if (offset < 0 || offset > UINT_MAX) { fprintf(stderr, "kallsyms failure: " - "%s symbol value %#llx out of range in relative mode\n", - symbol_absolute(table[i]) ? "absolute" : "relative", + "relative symbol value %#llx out of range\n", table[i]->addr); exit(EXIT_FAILURE); } @@ -725,36 +701,15 @@ static void sort_symbols(void) qsort(table, table_cnt, sizeof(table[0]), compare_symbols); } -static void make_percpus_absolute(void) -{ - unsigned int i; - - for (i = 0; i < table_cnt; i++) - if (symbol_in_range(table[i], &percpu_range, 1)) { - /* - * Keep the 'A' override for percpu symbols to - * ensure consistent behavior compared to older - * versions of this tool. - */ - table[i]->sym[0] = 'A'; - table[i]->percpu_absolute = true; - } -} - /* find the minimum non-absolute symbol address */ static void record_relative_base(void) { - unsigned int i; - - for (i = 0; i < table_cnt; i++) - if (!symbol_absolute(table[i])) { - /* - * The table is sorted by address. - * Take the first non-absolute symbol value. - */ - relative_base = table[i]->addr; - return; - } + /* + * The table is sorted by address. + * Take the first symbol value. + */ + if (table_cnt) + relative_base = table[0]->addr; } int main(int argc, char **argv) @@ -762,7 +717,6 @@ int main(int argc, char **argv) while (1) { static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, - {"absolute-percpu", no_argument, &absolute_percpu, 1}, {}, }; @@ -779,8 +733,6 @@ int main(int argc, char **argv) read_map(argv[optind]); shrink_table(); - if (absolute_percpu) - make_percpus_absolute(); sort_symbols(); record_relative_base(); optimize_token_table(); diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 56a077d204cf..67e66333bd2a 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -144,10 +144,6 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi - if is_enabled CONFIG_KALLSYMS_ABSOLUTE_PERCPU; then - kallsymopt="${kallsymopt} --absolute-percpu" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 91c91201212c..787868183b84 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -19,12 +19,14 @@ binutils) gcc) if [ "$ARCH" = parisc64 ]; then echo 12.0.0 + elif [ "$SRCARCH" = x86 ]; then + echo 8.1.0 else echo 5.1.0 fi ;; llvm) - if [ "$SRCARCH" = s390 ]; then + if [ "$SRCARCH" = s390 -o "$SRCARCH" = x86 ]; then echo 15.0.0 elif [ "$SRCARCH" = loongarch ]; then echo 18.0.0 diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h index 3ad3da9a7d97..dbe39b44256b 100644 --- a/tools/arch/x86/include/asm/asm.h +++ b/tools/arch/x86/include/asm/asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, @@ -123,7 +123,7 @@ #ifdef __KERNEL__ /* Exception table entry */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ .pushsection "__ex_table","a" ; \ .balign 4 ; \ @@ -154,7 +154,7 @@ # define _ASM_NOKPROBE(entry) # endif -#else /* ! __ASSEMBLY__ */ +#else /* ! __ASSEMBLER__ */ # define _EXPAND_EXTABLE_HANDLE(x) #x # define _ASM_EXTABLE_HANDLE(from, to, handler) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -186,7 +186,7 @@ */ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __KERNEL__ */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 17b6590748c0..c691481d59ce 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -2,14 +2,6 @@ #ifndef _ASM_X86_CPUFEATURES_H #define _ASM_X86_CPUFEATURES_H -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#include <asm/required-features.h> -#endif - -#ifndef _ASM_X86_DISABLED_FEATURES_H -#include <asm/disabled-features.h> -#endif - /* * Defines x86 CPU feature bits */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h deleted file mode 100644 index c492bdc97b05..000000000000 --- a/tools/arch/x86/include/asm/disabled-features.h +++ /dev/null @@ -1,161 +0,0 @@ -#ifndef _ASM_X86_DISABLED_FEATURES_H -#define _ASM_X86_DISABLED_FEATURES_H - -/* These features, although they might be available in a CPU - * will not be used because the compile options to support - * them are not present. - * - * This code allows them to be checked and disabled at - * compile time without an explicit #ifdef. Use - * cpu_feature_enabled(). - */ - -#ifdef CONFIG_X86_UMIP -# define DISABLE_UMIP 0 -#else -# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) -#endif - -#ifdef CONFIG_X86_64 -# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) -# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) -# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) -# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) -# define DISABLE_PCID 0 -#else -# define DISABLE_VME 0 -# define DISABLE_K6_MTRR 0 -# define DISABLE_CYRIX_ARR 0 -# define DISABLE_CENTAUR_MCR 0 -# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -# define DISABLE_PKU 0 -# define DISABLE_OSPKE 0 -#else -# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) -# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ - -#ifdef CONFIG_X86_5LEVEL -# define DISABLE_LA57 0 -#else -# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#endif - -#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION -# define DISABLE_PTI 0 -#else -# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) -#endif - -#ifdef CONFIG_MITIGATION_RETPOLINE -# define DISABLE_RETPOLINE 0 -#else -# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) -#endif - -#ifdef CONFIG_MITIGATION_RETHUNK -# define DISABLE_RETHUNK 0 -#else -# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) -#endif - -#ifdef CONFIG_MITIGATION_UNRET_ENTRY -# define DISABLE_UNRET 0 -#else -# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) -#endif - -#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING -# define DISABLE_CALL_DEPTH_TRACKING 0 -#else -# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) -#endif - -#ifdef CONFIG_ADDRESS_MASKING -# define DISABLE_LAM 0 -#else -# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31)) -#endif - -#ifdef CONFIG_INTEL_IOMMU_SVM -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif - -#ifdef CONFIG_X86_SGX -# define DISABLE_SGX 0 -#else -# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) -#endif - -#ifdef CONFIG_XEN_PV -# define DISABLE_XENPV 0 -#else -# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31)) -#endif - -#ifdef CONFIG_INTEL_TDX_GUEST -# define DISABLE_TDX_GUEST 0 -#else -# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) -#endif - -#ifdef CONFIG_X86_USER_SHADOW_STACK -#define DISABLE_USER_SHSTK 0 -#else -#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31)) -#endif - -#ifdef CONFIG_X86_KERNEL_IBT -#define DISABLE_IBT 0 -#else -#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) -#endif - -#ifdef CONFIG_X86_FRED -# define DISABLE_FRED 0 -#else -# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31)) -#endif - -#ifdef CONFIG_KVM_AMD_SEV -#define DISABLE_SEV_SNP 0 -#else -#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31)) -#endif - -/* - * Make sure to add features to the correct mask - */ -#define DISABLED_MASK0 (DISABLE_VME) -#define DISABLED_MASK1 0 -#define DISABLED_MASK2 0 -#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 (DISABLE_PCID) -#define DISABLED_MASK5 0 -#define DISABLED_MASK6 0 -#define DISABLED_MASK7 (DISABLE_PTI) -#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST) -#define DISABLED_MASK9 (DISABLE_SGX) -#define DISABLED_MASK10 0 -#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ - DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK) -#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM) -#define DISABLED_MASK13 0 -#define DISABLED_MASK14 0 -#define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ - DISABLE_ENQCMD) -#define DISABLED_MASK17 0 -#define DISABLED_MASK18 (DISABLE_IBT) -#define DISABLED_MASK19 (DISABLE_SEV_SNP) -#define DISABLED_MASK20 0 -#define DISABLED_MASK21 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) - -#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 3ae84c3b8e6d..dc1c1057f26e 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -25,6 +25,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ #define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) @@ -34,6 +35,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_TCE (1<<_EFER_TCE) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h index 1c1b7550fa55..cd94221d8335 100644 --- a/tools/arch/x86/include/asm/nops.h +++ b/tools/arch/x86/include/asm/nops.h @@ -82,7 +82,7 @@ #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const unsigned char * const x86_nops[]; #endif diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 46d7e06763c9..e0125afa53fb 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -45,7 +45,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/byteorder.h> /* @@ -73,6 +73,6 @@ struct orc_entry { #endif } __packed; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/arch/x86/include/asm/pvclock-abi.h b/tools/arch/x86/include/asm/pvclock-abi.h index 1436226efe3e..b9fece5fc96d 100644 --- a/tools/arch/x86/include/asm/pvclock-abi.h +++ b/tools/arch/x86/include/asm/pvclock-abi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_ABI_H #define _ASM_X86_PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * These structs MUST NOT be changed. @@ -44,5 +44,5 @@ struct pvclock_wall_clock { #define PVCLOCK_GUEST_STOPPED (1 << 1) /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h deleted file mode 100644 index e9187ddd3d1f..000000000000 --- a/tools/arch/x86/include/asm/required-features.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#define _ASM_X86_REQUIRED_FEATURES_H - -/* Define minimum CPUID feature set for kernel These bits are checked - really early to actually display a visible error message before the - kernel dies. Make sure to assign features to the proper mask! - - Some requirements that are not in CPUID yet are also in the - CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. - - The real information is in arch/x86/Kconfig.cpu, this just converts - the CONFIGs into a bitmask */ - -#ifndef CONFIG_MATH_EMULATION -# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) -#else -# define NEED_FPU 0 -#endif - -#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) -# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) -#else -# define NEED_PAE 0 -#endif - -#ifdef CONFIG_X86_CMPXCHG64 -# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) -#else -# define NEED_CX8 0 -#endif - -#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) -# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) -#else -# define NEED_CMOV 0 -#endif - -# define NEED_3DNOW 0 - -#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) -# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) -#else -# define NEED_NOPL 0 -#endif - -#ifdef CONFIG_MATOM -# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) -#else -# define NEED_MOVBE 0 -#endif - -#ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT_XXL -/* Paravirtualized systems may not have PSE or PGE available */ -#define NEED_PSE 0 -#define NEED_PGE 0 -#else -#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) -#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) -#endif -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) -#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) -#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -#define NEED_LM (1<<(X86_FEATURE_LM & 31)) -#else -#define NEED_PSE 0 -#define NEED_MSR 0 -#define NEED_PGE 0 -#define NEED_FXSR 0 -#define NEED_XMM 0 -#define NEED_XMM2 0 -#define NEED_LM 0 -#endif - -#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ - NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ - NEED_XMM|NEED_XMM2) -#define SSE_MASK (NEED_XMM|NEED_XMM2) - -#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) - -#define REQUIRED_MASK2 0 -#define REQUIRED_MASK3 (NEED_NOPL) -#define REQUIRED_MASK4 (NEED_MOVBE) -#define REQUIRED_MASK5 0 -#define REQUIRED_MASK6 0 -#define REQUIRED_MASK7 0 -#define REQUIRED_MASK8 0 -#define REQUIRED_MASK9 0 -#define REQUIRED_MASK10 0 -#define REQUIRED_MASK11 0 -#define REQUIRED_MASK12 0 -#define REQUIRED_MASK13 0 -#define REQUIRED_MASK14 0 -#define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 0 -#define REQUIRED_MASK17 0 -#define REQUIRED_MASK18 0 -#define REQUIRED_MASK19 0 -#define REQUIRED_MASK20 0 -#define REQUIRED_MASK21 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) - -#endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index fb9691a34d92..7567c893f45e 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -850,6 +850,7 @@ bool arch_is_rethunk(struct symbol *sym) bool arch_is_embedded_insn(struct symbol *sym) { return !strcmp(sym->name, "retbleed_return_thunk") || + !strcmp(sym->name, "srso_alias_safe_ret") || !strcmp(sym->name, "srso_safe_ret"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 48d7bc5b4736..ca3435acc326 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1284,15 +1284,6 @@ static void annotate_call_site(struct objtool_file *file, if (!sym) sym = reloc->sym; - /* - * Alternative replacement code is just template code which is - * sometimes copied to the original instruction. For now, don't - * annotate it. (In the future we might consider annotating the - * original instruction if/when it ever makes sense to do so.) - */ - if (!strcmp(insn->sec->name, ".altinstr_replacement")) - return; - if (sym->static_call_tramp) { list_add_tail(&insn->call_node, &file->static_call_list); return; @@ -1350,7 +1341,8 @@ static void annotate_call_site(struct objtool_file *file, return; } - if (insn->type == INSN_CALL && !insn->sec->init) + if (insn->type == INSN_CALL && !insn->sec->init && + !insn->_call_dest->embedded_insn) list_add_tail(&insn->call_node, &file->call_list); if (!sibling && dead_end_function(file, sym)) diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 6bb7edda3094..eacfe3b0a8d1 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -16,6 +16,7 @@ NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) NORETURN(__x64_sys_exit) NORETURN(__x64_sys_exit_group) +NORETURN(acpi_processor_ffh_play_dead) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) @@ -34,6 +35,7 @@ NORETURN(kunit_try_catch_throw) NORETURN(machine_real_restart) NORETURN(make_task_dead) NORETURN(mpt_halt_firmware) +NORETURN(mwait_play_dead) NORETURN(nmi_panic_self_stop) NORETURN(panic) NORETURN(panic_smp_self_stop) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index d3c6e10dce73..a4499e5a6f9c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -26,8 +26,6 @@ FILES=( "include/linux/hash.h" "include/linux/list-sort.h" "include/uapi/linux/hw_breakpoint.h" - "arch/x86/include/asm/disabled-features.h" - "arch/x86/include/asm/required-features.h" "arch/x86/include/asm/cpufeatures.h" "arch/x86/include/asm/inat_types.h" "arch/x86/include/asm/emulate_prefix.h" diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8d5011a0bf60..26057af6b5a1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1056,7 +1056,7 @@ static const struct platform_data turbostat_pdata[] = { * Missing support for * INTEL_ICELAKE * INTEL_ATOM_SILVERMONT_MID - * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_SILVERMONT_MID2 * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 4d4a76532dc9..18d736640ece 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <sys/syscall.h> +#include <sys/ioctl.h> #include <time.h> #include <signal.h> #include <setjmp.h> @@ -43,7 +44,15 @@ #define FUNC_INHERITE 0x20 #define FUNC_PASID 0x40 +/* get_user() pointer test cases */ +#define GET_USER_USER 0 +#define GET_USER_KERNEL_TOP 1 +#define GET_USER_KERNEL_BOT 2 +#define GET_USER_KERNEL 3 + #define TEST_MASK 0x7f +#define L5_SIGN_EXT_MASK (0xFFUL << 56) +#define L4_SIGN_EXT_MASK (0x1FFFFUL << 47) #define LOW_ADDR (0x1UL << 30) #define HIGH_ADDR (0x3UL << 48) @@ -115,23 +124,42 @@ static void segv_handler(int sig) siglongjmp(segv_env, 1); } -static inline int cpu_has_lam(void) +static inline int lam_is_available(void) { unsigned int cpuinfo[4]; + unsigned long bits = 0; + int ret; __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); - return (cpuinfo[0] & (1 << 26)); + /* Check if cpu supports LAM */ + if (!(cpuinfo[0] & (1 << 26))) { + ksft_print_msg("LAM is not supported!\n"); + return 0; + } + + /* Return 0 if CONFIG_ADDRESS_MASKING is not set */ + ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits); + if (ret) { + ksft_print_msg("LAM is disabled in the kernel!\n"); + return 0; + } + + return 1; } -/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */ -static inline int cpu_has_la57(void) +static inline int la57_enabled(void) { - unsigned int cpuinfo[4]; + int ret; + void *p; + + p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); + ret = p == MAP_FAILED ? 0 : 1; - return (cpuinfo[2] & (1 << 16)); + munmap(p, PAGE_SIZE); + return ret; } /* @@ -322,7 +350,7 @@ static int handle_mmap(struct testcases *test) flags, -1, 0); if (ptr == MAP_FAILED) { if (test->addr == HIGH_ADDR) - if (!cpu_has_la57()) + if (!la57_enabled()) return 3; /* unsupport LA57 */ return 1; } @@ -370,6 +398,78 @@ static int handle_syscall(struct testcases *test) return ret; } +static int get_user_syscall(struct testcases *test) +{ + uint64_t ptr_address, bitmask; + int fd, ret = 0; + void *ptr; + + if (la57_enabled()) { + bitmask = L5_SIGN_EXT_MASK; + ptr_address = HIGH_ADDR; + } else { + bitmask = L4_SIGN_EXT_MASK; + ptr_address = LOW_ADDR; + } + + ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + + if (ptr == MAP_FAILED) { + perror("failed to map byte to pass into get_user"); + return 1; + } + + if (set_lam(test->lam) != 0) { + ret = 2; + goto error; + } + + fd = memfd_create("lam_ioctl", 0); + if (fd == -1) { + munmap(ptr, PAGE_SIZE); + exit(EXIT_FAILURE); + } + + switch (test->later) { + case GET_USER_USER: + /* Control group - properly tagged user pointer */ + ptr = (void *)set_metadata((uint64_t)ptr, test->lam); + break; + case GET_USER_KERNEL_TOP: + /* Kernel address with top bit cleared */ + bitmask &= (bitmask >> 1); + ptr = (void *)((uint64_t)ptr | bitmask); + break; + case GET_USER_KERNEL_BOT: + /* Kernel address with bottom sign-extension bit cleared */ + bitmask &= (bitmask << 1); + ptr = (void *)((uint64_t)ptr | bitmask); + break; + case GET_USER_KERNEL: + /* Try to pass a kernel address */ + ptr = (void *)((uint64_t)ptr | bitmask); + break; + default: + printf("Invalid test case value passed!\n"); + break; + } + + /* + * Use FIOASYNC ioctl because it utilizes get_user() internally and is + * very non-invasive to the system. Pass differently tagged pointers to + * get_user() in order to verify that valid user pointers are going + * through and invalid kernel/non-canonical pointers are not. + */ + if (ioctl(fd, FIOASYNC, ptr) != 0) + ret = 1; + + close(fd); +error: + munmap(ptr, PAGE_SIZE); + return ret; +} + int sys_uring_setup(unsigned int entries, struct io_uring_params *p) { return (int)syscall(__NR_io_uring_setup, entries, p); @@ -596,8 +696,10 @@ int do_uring(unsigned long lam) fi->file_fd = file_fd; ring = malloc(sizeof(*ring)); - if (!ring) + if (!ring) { + free(fi); return 1; + } memset(ring, 0, sizeof(struct io_ring)); @@ -883,6 +985,33 @@ static struct testcases syscall_cases[] = { .test_func = handle_syscall, .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n", }, + { + .later = GET_USER_USER, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n", + }, + { + .later = GET_USER_KERNEL_TOP, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n", + }, + { + .later = GET_USER_KERNEL_BOT, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n", + }, + { + .later = GET_USER_KERNEL, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n", + }, }; static struct testcases mmap_cases[] = { @@ -1181,10 +1310,8 @@ int main(int argc, char **argv) tests_cnt = 0; - if (!cpu_has_lam()) { - ksft_print_msg("Unsupported LAM feature!\n"); + if (!lam_is_available()) return KSFT_SKIP; - } while ((c = getopt(argc, argv, "ht:")) != -1) { switch (c) { |