diff options
Diffstat (limited to 'arch')
548 files changed, 9213 insertions, 10927 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 16dca28ebf17..3fed97478058 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -509,3 +509,4 @@ 577 common open_tree_attr sys_open_tree_attr 578 common file_getattr sys_file_getattr 579 common file_setattr sys_file_setattr +580 common listns sys_listns diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts index aa9576d8ab56..48ca25f57ef6 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts @@ -1254,3 +1254,17 @@ max-frequency = <25000000>; bus-width = <4>; }; + +/* + * FIXME: rgmii delay is introduced by MAC (configured in u-boot now) + * instead of PCB on fuji board, so the "phy-mode" should be updated to + * "rgmii-[tx|rx]id" when the aspeed-mac driver can handle the delay + * properly. + */ +&mac3 { + status = "okay"; + phy-mode = "rgmii"; + phy-handle = <ðphy3>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_rgmii4_default>; +}; diff --git a/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts b/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts index ac44c745bdf8..a39a021a3910 100644 --- a/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts +++ b/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts @@ -55,8 +55,8 @@ mdio { /delete-node/ switch@1e; - bcm54210e: ethernet-phy@0 { - reg = <0>; + bcm54210e: ethernet-phy@25 { + reg = <25>; }; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts b/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts index 06545a6052f7..43ff5eafb2bb 100644 --- a/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts @@ -259,7 +259,7 @@ pinctrl-0 = <&pinctrl_audmux>; status = "okay"; - ssi2 { + mux-ssi2 { fsl,audmux-port = <1>; fsl,port-config = < (IMX_AUDMUX_V2_PTCR_SYN | @@ -271,7 +271,7 @@ >; }; - aud3 { + mux-aud3 { fsl,audmux-port = <2>; fsl,port-config = < IMX_AUDMUX_V2_PTCR_SYN diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi index 6de224dd2bb9..6eb80f867f50 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi @@ -339,7 +339,7 @@ #sound-dai-cells = <0>; compatible = "fsl,imx6ul-sai", "fsl,imx6sx-sai"; reg = <0x02030000 0x4000>; - interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_SAI3_IPG>, <&clks IMX6UL_CLK_SAI3>, <&clks IMX6UL_CLK_DUMMY>, <&clks IMX6UL_CLK_DUMMY>; diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts index 107b00b9a939..540642e99a41 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts @@ -136,7 +136,7 @@ interrupt-parent = <&gpio2>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; - report-rate-hz = <6>; + report-rate-hz = <60>; /* settings valid only for Hycon touchscreen */ touchscreen-size-x = <1280>; touchscreen-size-y = <800>; diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index c436eec22d86..f30d743df264 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -33,22 +33,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm using: - NEON (Advanced SIMD) extensions -config CRYPTO_BLAKE2B_NEON - tristate "Hash functions: BLAKE2b (NEON)" - depends on KERNEL_MODE_NEON - select CRYPTO_BLAKE2B - help - BLAKE2b cryptographic hash function (RFC 7693) - - Architecture: arm using - - NEON (Advanced SIMD) extensions - - BLAKE2b digest algorithm optimized with ARM NEON instructions. - On ARM processors that have NEON support but not the ARMv8 - Crypto Extensions, typically this BLAKE2b implementation is - much faster than the SHA-2 family and slightly faster than - SHA-1. - config CRYPTO_AES_ARM tristate "Ciphers: AES" select CRYPTO_ALGAPI diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 6346a73effc0..86dd43313dbf 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o -obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o @@ -13,7 +12,6 @@ obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o -blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o diff --git a/arch/arm/crypto/blake2b-neon-core.S b/arch/arm/crypto/blake2b-neon-core.S deleted file mode 100644 index 0406a186377f..000000000000 --- a/arch/arm/crypto/blake2b-neon-core.S +++ /dev/null @@ -1,347 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * BLAKE2b digest algorithm, NEON accelerated - * - * Copyright 2020 Google LLC - * - * Author: Eric Biggers <ebiggers@google.com> - */ - -#include <linux/linkage.h> - - .text - .fpu neon - - // The arguments to blake2b_compress_neon() - STATE .req r0 - BLOCK .req r1 - NBLOCKS .req r2 - INC .req r3 - - // Pointers to the rotation tables - ROR24_TABLE .req r4 - ROR16_TABLE .req r5 - - // The original stack pointer - ORIG_SP .req r6 - - // NEON registers which contain the message words of the current block. - // M_0-M_3 are occasionally used for other purposes too. - M_0 .req d16 - M_1 .req d17 - M_2 .req d18 - M_3 .req d19 - M_4 .req d20 - M_5 .req d21 - M_6 .req d22 - M_7 .req d23 - M_8 .req d24 - M_9 .req d25 - M_10 .req d26 - M_11 .req d27 - M_12 .req d28 - M_13 .req d29 - M_14 .req d30 - M_15 .req d31 - - .align 4 - // Tables for computing ror64(x, 24) and ror64(x, 16) using the vtbl.8 - // instruction. This is the most efficient way to implement these - // rotation amounts with NEON. (On Cortex-A53 it's the same speed as - // vshr.u64 + vsli.u64, while on Cortex-A7 it's faster.) -.Lror24_table: - .byte 3, 4, 5, 6, 7, 0, 1, 2 -.Lror16_table: - .byte 2, 3, 4, 5, 6, 7, 0, 1 - // The BLAKE2b initialization vector -.Lblake2b_IV: - .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b - .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1 - .quad 0x510e527fade682d1, 0x9b05688c2b3e6c1f - .quad 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 - -// Execute one round of BLAKE2b by updating the state matrix v[0..15] in the -// NEON registers q0-q7. The message block is in q8..q15 (M_0-M_15). The stack -// pointer points to a 32-byte aligned buffer containing a copy of q8 and q9 -// (M_0-M_3), so that they can be reloaded if they are used as temporary -// registers. The macro arguments s0-s15 give the order in which the message -// words are used in this round. 'final' is 1 if this is the final round. -.macro _blake2b_round s0, s1, s2, s3, s4, s5, s6, s7, \ - s8, s9, s10, s11, s12, s13, s14, s15, final=0 - - // Mix the columns: - // (v[0], v[4], v[8], v[12]), (v[1], v[5], v[9], v[13]), - // (v[2], v[6], v[10], v[14]), and (v[3], v[7], v[11], v[15]). - - // a += b + m[blake2b_sigma[r][2*i + 0]]; - vadd.u64 q0, q0, q2 - vadd.u64 q1, q1, q3 - vadd.u64 d0, d0, M_\s0 - vadd.u64 d1, d1, M_\s2 - vadd.u64 d2, d2, M_\s4 - vadd.u64 d3, d3, M_\s6 - - // d = ror64(d ^ a, 32); - veor q6, q6, q0 - veor q7, q7, q1 - vrev64.32 q6, q6 - vrev64.32 q7, q7 - - // c += d; - vadd.u64 q4, q4, q6 - vadd.u64 q5, q5, q7 - - // b = ror64(b ^ c, 24); - vld1.8 {M_0}, [ROR24_TABLE, :64] - veor q2, q2, q4 - veor q3, q3, q5 - vtbl.8 d4, {d4}, M_0 - vtbl.8 d5, {d5}, M_0 - vtbl.8 d6, {d6}, M_0 - vtbl.8 d7, {d7}, M_0 - - // a += b + m[blake2b_sigma[r][2*i + 1]]; - // - // M_0 got clobbered above, so we have to reload it if any of the four - // message words this step needs happens to be M_0. Otherwise we don't - // need to reload it here, as it will just get clobbered again below. -.if \s1 == 0 || \s3 == 0 || \s5 == 0 || \s7 == 0 - vld1.8 {M_0}, [sp, :64] -.endif - vadd.u64 q0, q0, q2 - vadd.u64 q1, q1, q3 - vadd.u64 d0, d0, M_\s1 - vadd.u64 d1, d1, M_\s3 - vadd.u64 d2, d2, M_\s5 - vadd.u64 d3, d3, M_\s7 - - // d = ror64(d ^ a, 16); - vld1.8 {M_0}, [ROR16_TABLE, :64] - veor q6, q6, q0 - veor q7, q7, q1 - vtbl.8 d12, {d12}, M_0 - vtbl.8 d13, {d13}, M_0 - vtbl.8 d14, {d14}, M_0 - vtbl.8 d15, {d15}, M_0 - - // c += d; - vadd.u64 q4, q4, q6 - vadd.u64 q5, q5, q7 - - // b = ror64(b ^ c, 63); - // - // This rotation amount isn't a multiple of 8, so it has to be - // implemented using a pair of shifts, which requires temporary - // registers. Use q8-q9 (M_0-M_3) for this, and reload them afterwards. - veor q8, q2, q4 - veor q9, q3, q5 - vshr.u64 q2, q8, #63 - vshr.u64 q3, q9, #63 - vsli.u64 q2, q8, #1 - vsli.u64 q3, q9, #1 - vld1.8 {q8-q9}, [sp, :256] - - // Mix the diagonals: - // (v[0], v[5], v[10], v[15]), (v[1], v[6], v[11], v[12]), - // (v[2], v[7], v[8], v[13]), and (v[3], v[4], v[9], v[14]). - // - // There are two possible ways to do this: use 'vext' instructions to - // shift the rows of the matrix so that the diagonals become columns, - // and undo it afterwards; or just use 64-bit operations on 'd' - // registers instead of 128-bit operations on 'q' registers. We use the - // latter approach, as it performs much better on Cortex-A7. - - // a += b + m[blake2b_sigma[r][2*i + 0]]; - vadd.u64 d0, d0, d5 - vadd.u64 d1, d1, d6 - vadd.u64 d2, d2, d7 - vadd.u64 d3, d3, d4 - vadd.u64 d0, d0, M_\s8 - vadd.u64 d1, d1, M_\s10 - vadd.u64 d2, d2, M_\s12 - vadd.u64 d3, d3, M_\s14 - - // d = ror64(d ^ a, 32); - veor d15, d15, d0 - veor d12, d12, d1 - veor d13, d13, d2 - veor d14, d14, d3 - vrev64.32 d15, d15 - vrev64.32 d12, d12 - vrev64.32 d13, d13 - vrev64.32 d14, d14 - - // c += d; - vadd.u64 d10, d10, d15 - vadd.u64 d11, d11, d12 - vadd.u64 d8, d8, d13 - vadd.u64 d9, d9, d14 - - // b = ror64(b ^ c, 24); - vld1.8 {M_0}, [ROR24_TABLE, :64] - veor d5, d5, d10 - veor d6, d6, d11 - veor d7, d7, d8 - veor d4, d4, d9 - vtbl.8 d5, {d5}, M_0 - vtbl.8 d6, {d6}, M_0 - vtbl.8 d7, {d7}, M_0 - vtbl.8 d4, {d4}, M_0 - - // a += b + m[blake2b_sigma[r][2*i + 1]]; -.if \s9 == 0 || \s11 == 0 || \s13 == 0 || \s15 == 0 - vld1.8 {M_0}, [sp, :64] -.endif - vadd.u64 d0, d0, d5 - vadd.u64 d1, d1, d6 - vadd.u64 d2, d2, d7 - vadd.u64 d3, d3, d4 - vadd.u64 d0, d0, M_\s9 - vadd.u64 d1, d1, M_\s11 - vadd.u64 d2, d2, M_\s13 - vadd.u64 d3, d3, M_\s15 - - // d = ror64(d ^ a, 16); - vld1.8 {M_0}, [ROR16_TABLE, :64] - veor d15, d15, d0 - veor d12, d12, d1 - veor d13, d13, d2 - veor d14, d14, d3 - vtbl.8 d12, {d12}, M_0 - vtbl.8 d13, {d13}, M_0 - vtbl.8 d14, {d14}, M_0 - vtbl.8 d15, {d15}, M_0 - - // c += d; - vadd.u64 d10, d10, d15 - vadd.u64 d11, d11, d12 - vadd.u64 d8, d8, d13 - vadd.u64 d9, d9, d14 - - // b = ror64(b ^ c, 63); - veor d16, d4, d9 - veor d17, d5, d10 - veor d18, d6, d11 - veor d19, d7, d8 - vshr.u64 q2, q8, #63 - vshr.u64 q3, q9, #63 - vsli.u64 q2, q8, #1 - vsli.u64 q3, q9, #1 - // Reloading q8-q9 can be skipped on the final round. -.if ! \final - vld1.8 {q8-q9}, [sp, :256] -.endif -.endm - -// -// void blake2b_compress_neon(struct blake2b_state *state, -// const u8 *block, size_t nblocks, u32 inc); -// -// Only the first three fields of struct blake2b_state are used: -// u64 h[8]; (inout) -// u64 t[2]; (inout) -// u64 f[2]; (in) -// - .align 5 -ENTRY(blake2b_compress_neon) - push {r4-r10} - - // Allocate a 32-byte stack buffer that is 32-byte aligned. - mov ORIG_SP, sp - sub ip, sp, #32 - bic ip, ip, #31 - mov sp, ip - - adr ROR24_TABLE, .Lror24_table - adr ROR16_TABLE, .Lror16_table - - mov ip, STATE - vld1.64 {q0-q1}, [ip]! // Load h[0..3] - vld1.64 {q2-q3}, [ip]! // Load h[4..7] -.Lnext_block: - adr r10, .Lblake2b_IV - vld1.64 {q14-q15}, [ip] // Load t[0..1] and f[0..1] - vld1.64 {q4-q5}, [r10]! // Load IV[0..3] - vmov r7, r8, d28 // Copy t[0] to (r7, r8) - vld1.64 {q6-q7}, [r10] // Load IV[4..7] - adds r7, r7, INC // Increment counter - bcs .Lslow_inc_ctr - vmov.i32 d28[0], r7 - vst1.64 {d28}, [ip] // Update t[0] -.Linc_ctr_done: - - // Load the next message block and finish initializing the state matrix - // 'v'. Fortunately, there are exactly enough NEON registers to fit the - // entire state matrix in q0-q7 and the entire message block in q8-15. - // - // However, _blake2b_round also needs some extra registers for rotates, - // so we have to spill some registers. It's better to spill the message - // registers than the state registers, as the message doesn't change. - // Therefore we store a copy of the first 32 bytes of the message block - // (q8-q9) in an aligned buffer on the stack so that they can be - // reloaded when needed. (We could just reload directly from the - // message buffer, but it's faster to use aligned loads.) - vld1.8 {q8-q9}, [BLOCK]! - veor q6, q6, q14 // v[12..13] = IV[4..5] ^ t[0..1] - vld1.8 {q10-q11}, [BLOCK]! - veor q7, q7, q15 // v[14..15] = IV[6..7] ^ f[0..1] - vld1.8 {q12-q13}, [BLOCK]! - vst1.8 {q8-q9}, [sp, :256] - mov ip, STATE - vld1.8 {q14-q15}, [BLOCK]! - - // Execute the rounds. Each round is provided the order in which it - // needs to use the message words. - _blake2b_round 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - _blake2b_round 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 - _blake2b_round 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 - _blake2b_round 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 - _blake2b_round 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 - _blake2b_round 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 - _blake2b_round 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 - _blake2b_round 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 - _blake2b_round 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 - _blake2b_round 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 - _blake2b_round 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - _blake2b_round 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 \ - final=1 - - // Fold the final state matrix into the hash chaining value: - // - // for (i = 0; i < 8; i++) - // h[i] ^= v[i] ^ v[i + 8]; - // - vld1.64 {q8-q9}, [ip]! // Load old h[0..3] - veor q0, q0, q4 // v[0..1] ^= v[8..9] - veor q1, q1, q5 // v[2..3] ^= v[10..11] - vld1.64 {q10-q11}, [ip] // Load old h[4..7] - veor q2, q2, q6 // v[4..5] ^= v[12..13] - veor q3, q3, q7 // v[6..7] ^= v[14..15] - veor q0, q0, q8 // v[0..1] ^= h[0..1] - veor q1, q1, q9 // v[2..3] ^= h[2..3] - mov ip, STATE - subs NBLOCKS, NBLOCKS, #1 // nblocks-- - vst1.64 {q0-q1}, [ip]! // Store new h[0..3] - veor q2, q2, q10 // v[4..5] ^= h[4..5] - veor q3, q3, q11 // v[6..7] ^= h[6..7] - vst1.64 {q2-q3}, [ip]! // Store new h[4..7] - - // Advance to the next block, if there is one. - bne .Lnext_block // nblocks != 0? - - mov sp, ORIG_SP - pop {r4-r10} - mov pc, lr - -.Lslow_inc_ctr: - // Handle the case where the counter overflowed its low 32 bits, by - // carrying the overflow bit into the full 128-bit counter. - vmov r9, r10, d29 - adcs r8, r8, #0 - adcs r9, r9, #0 - adc r10, r10, #0 - vmov d28, r7, r8 - vmov d29, r9, r10 - vst1.64 {q14}, [ip] // Update t[0] and t[1] - b .Linc_ctr_done -ENDPROC(blake2b_compress_neon) diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c deleted file mode 100644 index 2ff443a91724..000000000000 --- a/arch/arm/crypto/blake2b-neon-glue.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * BLAKE2b digest algorithm, NEON accelerated - * - * Copyright 2020 Google LLC - */ - -#include <crypto/internal/blake2b.h> -#include <crypto/internal/hash.h> - -#include <linux/module.h> -#include <linux/sizes.h> - -#include <asm/neon.h> -#include <asm/simd.h> - -asmlinkage void blake2b_compress_neon(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc); - -static void blake2b_compress_arch(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc) -{ - do { - const size_t blocks = min_t(size_t, nblocks, - SZ_4K / BLAKE2B_BLOCK_SIZE); - - kernel_neon_begin(); - blake2b_compress_neon(state, block, blocks, inc); - kernel_neon_end(); - - nblocks -= blocks; - block += blocks * BLAKE2B_BLOCK_SIZE; - } while (nblocks); -} - -static int crypto_blake2b_update_neon(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2b_update_bo(desc, in, inlen, blake2b_compress_arch); -} - -static int crypto_blake2b_finup_neon(struct shash_desc *desc, const u8 *in, - unsigned int inlen, u8 *out) -{ - return crypto_blake2b_finup(desc, in, inlen, out, - blake2b_compress_arch); -} - -#define BLAKE2B_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \ - CRYPTO_AHASH_ALG_BLOCK_ONLY | \ - CRYPTO_AHASH_ALG_FINAL_NONZERO, \ - .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2b_setkey, \ - .init = crypto_blake2b_init, \ - .update = crypto_blake2b_update_neon, \ - .finup = crypto_blake2b_finup_neon, \ - .descsize = sizeof(struct blake2b_state), \ - .statesize = BLAKE2B_STATE_SIZE, \ - } - -static struct shash_alg blake2b_neon_algs[] = { - BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE), - BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE), - BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE), - BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE), -}; - -static int __init blake2b_neon_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_shashes(blake2b_neon_algs, - ARRAY_SIZE(blake2b_neon_algs)); -} - -static void __exit blake2b_neon_mod_exit(void) -{ - crypto_unregister_shashes(blake2b_neon_algs, - ARRAY_SIZE(blake2b_neon_algs)); -} - -module_init(blake2b_neon_mod_init); -module_exit(blake2b_neon_mod_exit); - -MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); -MODULE_ALIAS_CRYPTO("blake2b-160"); -MODULE_ALIAS_CRYPTO("blake2b-160-neon"); -MODULE_ALIAS_CRYPTO("blake2b-256"); -MODULE_ALIAS_CRYPTO("blake2b-256-neon"); -MODULE_ALIAS_CRYPTO("blake2b-384"); -MODULE_ALIAS_CRYPTO("blake2b-384-neon"); -MODULE_ALIAS_CRYPTO("blake2b-512"); -MODULE_ALIAS_CRYPTO("blake2b-512-neon"); diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h index be08a8da046f..8549fa8b7253 100644 --- a/arch/arm/include/asm/simd.h +++ b/arch/arm/include/asm/simd.h @@ -2,14 +2,21 @@ #ifndef _ASM_SIMD_H #define _ASM_SIMD_H +#include <linux/cleanup.h> #include <linux/compiler_attributes.h> #include <linux/preempt.h> #include <linux/types.h> +#include <asm/neon.h> + static __must_check inline bool may_use_simd(void) { return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq() && !irqs_disabled(); } +DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end()) + +#define scoped_ksimd() scoped_guard(ksimd) + #endif /* _ASM_SIMD_H */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index f90be312418e..d6ae80b5df36 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -283,10 +283,17 @@ extern int __put_user_8(void *, unsigned long long); __gu_err; \ }) +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __long_type(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + #define __get_user_err(x, ptr, err, __t) \ do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ - unsigned long __gu_val; \ + __long_type(x) __gu_val; \ unsigned int __ua_flags; \ __chk_user_ptr(ptr); \ might_fault(); \ @@ -295,6 +302,7 @@ do { \ case 1: __get_user_asm_byte(__gu_val, __gu_addr, err, __t); break; \ case 2: __get_user_asm_half(__gu_val, __gu_addr, err, __t); break; \ case 4: __get_user_asm_word(__gu_val, __gu_addr, err, __t); break; \ + case 8: __get_user_asm_dword(__gu_val, __gu_addr, err, __t); break; \ default: (__gu_val) = __get_user_bad(); \ } \ uaccess_restore(__ua_flags); \ @@ -353,6 +361,22 @@ do { \ #define __get_user_asm_word(x, addr, err, __t) \ __get_user_asm(x, addr, err, "ldr" __t) +#ifdef __ARMEB__ +#define __WORD0_OFFS 4 +#define __WORD1_OFFS 0 +#else +#define __WORD0_OFFS 0 +#define __WORD1_OFFS 4 +#endif + +#define __get_user_asm_dword(x, addr, err, __t) \ + ({ \ + unsigned long __w0, __w1; \ + __get_user_asm(__w0, addr + __WORD0_OFFS, err, "ldr" __t); \ + __get_user_asm(__w1, addr + __WORD1_OFFS, err, "ldr" __t); \ + (x) = ((u64)__w1 << 32) | (u64) __w0; \ +}) + #define __put_user_switch(x, ptr, __err, __fn) \ do { \ const __typeof__(*(ptr)) __user *__pu_ptr = (ptr); \ diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b07e699aaa3c..fd09afae72a2 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -484,3 +484,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6663ffd23f25..65db12f66b8f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -47,7 +47,6 @@ config ARM64 select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY - select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX @@ -2023,6 +2022,31 @@ config ARM64_TLB_RANGE ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a range of input addresses. +config ARM64_MPAM + bool "Enable support for MPAM" + select ARM64_MPAM_DRIVER if EXPERT # does nothing yet + select ACPI_MPAM if ACPI + help + Memory System Resource Partitioning and Monitoring (MPAM) is an + optional extension to the Arm architecture that allows each + transaction issued to the memory system to be labelled with a + Partition identifier (PARTID) and Performance Monitoring Group + identifier (PMG). + + Memory system components, such as the caches, can be configured with + policies to control how much of various physical resources (such as + memory bandwidth or cache memory) the transactions labelled with each + PARTID can consume. Depending on the capabilities of the hardware, + the PARTID and PMG can also be used as filtering criteria to measure + the memory system resource consumption of different parts of a + workload. + + Use of this extension requires CPU support, support in the + Memory System Components (MSC), and a description from firmware + of where the MSCs are in the address space. + + MPAM is exposed to user-space via the resctrl pseudo filesystem. + endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" diff --git a/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts b/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts index b8f256545022..3e0319fdb93f 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts @@ -18,11 +18,21 @@ #include "bcm2712-rpi-5-b-ovl-rp1.dts" +/ { + aliases { + ethernet0 = &rp1_eth; + }; +}; + &pcie2 { #include "rp1-nexus.dtsi" }; &rp1_eth { + assigned-clocks = <&rp1_clocks RP1_CLK_ETH_TSU>, + <&rp1_clocks RP1_CLK_ETH>; + assigned-clock-rates = <50000000>, + <125000000>; status = "okay"; phy-mode = "rgmii-id"; phy-handle = <&phy1>; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi index 2cf0f7208350..a72b2f1c4a1b 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi @@ -67,7 +67,6 @@ img_subsys: bus@58000000 { power-domains = <&pd IMX_SC_R_CSI_0>; fsl,channel = <0>; fsl,num-irqs = <32>; - status = "disabled"; }; gpio0_mipi_csi0: gpio@58222000 { @@ -144,7 +143,6 @@ img_subsys: bus@58000000 { power-domains = <&pd IMX_SC_R_CSI_1>; fsl,channel = <0>; fsl,num-irqs = <32>; - status = "disabled"; }; gpio0_mipi_csi1: gpio@58242000 { diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi index a66ba6d0a8c0..da33a35c6d46 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi @@ -29,8 +29,8 @@ compatible = "nxp,imx8dxl-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x5b050000 0x10000>; interrupt-parent = <&gic>; - interrupts = <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq", "eth_wake_irq"; clocks = <&eqos_lpcg IMX_LPCG_CLK_4>, <&eqos_lpcg IMX_LPCG_CLK_6>, diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi index ec466e4d7df5..5c0d09c5c086 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi @@ -54,3 +54,8 @@ interrupt-names = "dma"; }; }; + +&pcieb_ep { + interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "dma"; +}; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts index 614b4ce330b1..0924ac50fd2d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts @@ -16,11 +16,20 @@ ethernet1 = &eqos; }; - extcon_usbc: usbc { - compatible = "linux,extcon-usb-gpio"; + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + label = "Type-C"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1_id>; - id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + type = "micro"; + vbus-supply = <®_usb1_vbus>; + + port { + usb_dr_connector: endpoint { + remote-endpoint = <&usb3_dwc>; + }; + }; }; leds { @@ -244,9 +253,15 @@ hnp-disable; srp-disable; dr_mode = "otg"; - extcon = <&extcon_usbc>; usb-role-switch; + role-switch-default-mode = "peripheral"; status = "okay"; + + port { + usb3_dwc: endpoint { + remote-endpoint = <&usb_dr_connector>; + }; + }; }; &usb_dwc3_1 { @@ -273,7 +288,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts index 202d5c67ac40..9c0b6b8d6459 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts @@ -217,8 +217,8 @@ compatible = "nxp,cbdtu02043", "gpio-sbu-mux"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_typec_mux>; - select-gpios = <&lsio_gpio4 6 GPIO_ACTIVE_LOW>; - enable-gpios = <&lsio_gpio4 19 GPIO_ACTIVE_HIGH>; + select-gpios = <&lsio_gpio4 6 GPIO_ACTIVE_HIGH>; + enable-gpios = <&lsio_gpio4 19 GPIO_ACTIVE_LOW>; orientation-switch; port { diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 1292677cbe4e..6da961eb3fe5 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1886,7 +1886,7 @@ assigned-clock-rates = <3600000000>, <100000000>, <10000000>; assigned-clock-parents = <0>, <0>, <&scmi_clk IMX95_CLK_SYSPLL1_PFD1_DIV2>; - msi-map = <0x0 &its 0x98 0x1>; + msi-map = <0x0 &its 0x10 0x1>; power-domains = <&scmi_devpd IMX95_PD_HSIO_TOP>; status = "disabled"; }; @@ -1963,6 +1963,7 @@ assigned-clock-rates = <3600000000>, <100000000>, <10000000>; assigned-clock-parents = <0>, <0>, <&scmi_clk IMX95_CLK_SYSPLL1_PFD1_DIV2>; + msi-map = <0x0 &its 0x98 0x1>; power-domains = <&scmi_devpd IMX95_PD_HSIO_TOP>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi index a410fc335fa3..c0f17f8189fa 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi @@ -42,6 +42,7 @@ interrupt-parent = <&gpio>; interrupts = <TEGRA194_MAIN_GPIO(G, 4) IRQ_TYPE_LEVEL_LOW>; #phy-cells = <0>; + wakeup-source; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 283d9cbc4368..03b7c4313750 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -598,7 +598,6 @@ pinctrl-2 = <&otp_pin>; resets = <&cru SRST_TSADC>; reset-names = "tsadc-apb"; - rockchip,grf = <&grf>; rockchip,hw-tshut-temp = <100000>; #thermal-sensor-cells = <1>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi index c4f4f1ff6117..9da6fd82e46b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi @@ -3,7 +3,7 @@ * Copyright (c) 2016-2017 Fuzhou Rockchip Electronics Co., Ltd */ -#include "rk3399.dtsi" +#include "rk3399-base.dtsi" / { cluster0_opp: opp-table-0 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso index 5e8f729c2cf2..141a921a06e4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso @@ -45,11 +45,11 @@ cam_dovdd_1v8: regulator-cam-dovdd-1v8 { compatible = "regulator-fixed"; - gpio = <&pca9670 3 GPIO_ACTIVE_LOW>; - regulator-max-microvolt = <1800000>; - regulator-min-microvolt = <1800000>; - regulator-name = "cam-dovdd-1v8"; - vin-supply = <&vcc1v8_video>; + gpio = <&pca9670 3 GPIO_ACTIVE_LOW>; + regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <1800000>; + regulator-name = "cam-dovdd-1v8"; + vin-supply = <&vcc1v8_video>; }; cam_dvdd_1v2: regulator-cam-dvdd-1v2 { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi index 7f578c50b4ad..b6cf03a7ba66 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi @@ -120,7 +120,7 @@ compatible = "regulator-fixed"; regulator-name = "vcc3v3_pcie"; enable-active-high; - gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>; + gpios = <&gpio4 RK_PB1 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie_drv>; regulator-always-on; @@ -187,7 +187,7 @@ vcc5v0_usb2b: regulator-vcc5v0-usb2b { compatible = "regulator-fixed"; enable-active-high; - gpio = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; + gpio = <&gpio4 RK_PC4 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_usb2b_en>; regulator-name = "vcc5v0_usb2b"; @@ -199,7 +199,7 @@ vcc5v0_usb2t: regulator-vcc5v0-usb2t { compatible = "regulator-fixed"; enable-active-high; - gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>; + gpios = <&gpio3 RK_PD5 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_usb2t_en>; regulator-name = "vcc5v0_usb2t"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi index d0e38412d56a..08bf40de17ea 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi @@ -789,7 +789,7 @@ vccio1-supply = <&vccio_acodec>; vccio2-supply = <&vcc_1v8>; vccio3-supply = <&vccio_sd>; - vccio4-supply = <&vcc_1v8>; + vccio4-supply = <&vcca1v8_pmu>; vccio5-supply = <&vcc_1v8>; vccio6-supply = <&vcc1v8_dvp>; vccio7-supply = <&vcc_3v3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts index 0f844806ec54..442a2bc43ba8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts @@ -482,6 +482,8 @@ }; &i2s1_8ch { + pinctrl-names = "default"; + pinctrl-0 = <&i2s1m0_sclktx &i2s1m0_lrcktx &i2s1m0_sdi0 &i2s1m0_sdo0>; rockchip,trcm-sync-tx-only; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index fc4e9e07f1cf..a86fc6b4e8c4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -276,12 +276,6 @@ opp-microvolt = <900000 900000 950000>; clock-latency-ns = <40000>; }; - - opp-2208000000 { - opp-hz = /bits/ 64 <2208000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; cluster1_opp_table: opp-table-cluster1 { @@ -348,12 +342,6 @@ opp-microvolt = <925000 925000 950000>; clock-latency-ns = <40000>; }; - - opp-2304000000 { - opp-hz = /bits/ 64 <2304000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; gpu_opp_table: opp-table-gpu { @@ -2561,8 +2549,6 @@ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&i2c9m0_xfer>; - resets = <&cru SRST_I2C9>, <&cru SRST_P_I2C9>; - reset-names = "i2c", "apb"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi index 0f1a77697351..b5d630d2c879 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi @@ -115,7 +115,7 @@ }; }; - gpu_opp_table: opp-table { + gpu_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-300000000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi index b44e89e1bb15..365c1d958f2d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi @@ -382,14 +382,12 @@ cap-mmc-highspeed; mmc-ddr-1_8v; mmc-hs200-1_8v; - mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; mmc-pwrseq = <&emmc_pwrseq>; no-sdio; no-sd; non-removable; pinctrl-names = "default"; - pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk &emmc_data_strobe>; + pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk>; vmmc-supply = <&vcc_3v3_s3>; vqmmc-supply = <&vcc_1v8_s3>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi index 9884a5df47df..e1e0e3fc0ca7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi @@ -66,7 +66,7 @@ }; }; - gpu_opp_table: opp-table { + gpu_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-300000000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts index ad6d04793b0a..83b9b6645a1e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts @@ -14,8 +14,8 @@ gpios = <&gpio0 RK_PC5 GPIO_ACTIVE_HIGH>; regulator-name = "vcc3v3_pcie20"; regulator-boot-on; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; startup-delay-us = <50000>; vin-supply = <&vcc5v0_sys>; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index e3a2d37bd104..997fa7cd9de5 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1341,7 +1341,7 @@ CONFIG_COMMON_CLK_RS9_PCIE=y CONFIG_COMMON_CLK_VC3=y CONFIG_COMMON_CLK_VC5=y CONFIG_COMMON_CLK_BD718XX=m -CONFIG_CLK_RASPBERRYPI=m +CONFIG_CLK_RASPBERRYPI=y CONFIG_CLK_IMX8MM=y CONFIG_CLK_IMX8MN=y CONFIG_CLK_IMX8MP=y @@ -1783,10 +1783,10 @@ CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_GHASH_ARM64_CE=y -CONFIG_CRYPTO_SHA3_ARM64=m CONFIG_CRYPTO_SM3_ARM64_CE=m CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_BS=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 91f3093eee6a..bdd276a6e540 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_SHA3_ARM64 - tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA3 - help - SHA-3 secure hash algorithms (FIPS 202) - - Architecture: arm64 using: - - ARMv8.2 Crypto Extensions - config CRYPTO_SM3_NEON tristate "Hash functions: SM3 (NEON)" depends on KERNEL_MODE_NEON @@ -58,16 +47,6 @@ config CRYPTO_SM3_ARM64_CE Architecture: arm64 using: - ARMv8.2 Crypto Extensions -config CRYPTO_POLYVAL_ARM64_CE - tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_POLYVAL - help - POLYVAL hash function for HCTR2 - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_AES_ARM64 tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS" select CRYPTO_AES diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index a8b2cdbe202c..1e330aa08d3f 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -5,9 +5,6 @@ # Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> # -obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o -sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o - obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o @@ -32,9 +29,6 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o -obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o -polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o - obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 2d791d51891b..c4fd648471f1 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -8,7 +8,6 @@ * Author: Ard Biesheuvel <ardb@kernel.org> */ -#include <asm/neon.h> #include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/scatterwalk.h> @@ -16,6 +15,8 @@ #include <crypto/internal/skcipher.h> #include <linux/module.h> +#include <asm/simd.h> + #include "aes-ce-setkey.h" MODULE_IMPORT_NS("CRYPTO_INTERNAL"); @@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, in += adv; abytes -= adv; - if (unlikely(rem)) { - kernel_neon_end(); - kernel_neon_begin(); + if (unlikely(rem)) macp = 0; - } } else { u32 l = min(AES_BLOCK_SIZE - macp, abytes); @@ -187,40 +185,38 @@ static int ccm_encrypt(struct aead_request *req) if (unlikely(err)) return err; - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + u8 *final_iv = NULL; - do { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - u8 buf[AES_BLOCK_SIZE]; - u8 *final_iv = NULL; - - if (walk.nbytes == walk.total) { - tail = 0; - final_iv = orig_iv; - } - - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], - src, walk.nbytes); + if (walk.nbytes == walk.total) { + tail = 0; + final_iv = orig_iv; + } - ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, - ctx->key_enc, num_rounds(ctx), - mac, walk.iv, final_iv); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], + src, walk.nbytes); - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, dst, walk.nbytes); + ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, + ctx->key_enc, num_rounds(ctx), + mac, walk.iv, final_iv); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - } - } while (walk.nbytes); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, dst, walk.nbytes); - kernel_neon_end(); + if (walk.nbytes) { + err = skcipher_walk_done(&walk, tail); + } + } while (walk.nbytes); + } if (unlikely(err)) return err; @@ -254,40 +250,38 @@ static int ccm_decrypt(struct aead_request *req) if (unlikely(err)) return err; - kernel_neon_begin(); - - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - do { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - u8 buf[AES_BLOCK_SIZE]; - u8 *final_iv = NULL; - - if (walk.nbytes == walk.total) { - tail = 0; - final_iv = orig_iv; - } + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + u8 *final_iv = NULL; - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], - src, walk.nbytes); + if (walk.nbytes == walk.total) { + tail = 0; + final_iv = orig_iv; + } - ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, - ctx->key_enc, num_rounds(ctx), - mac, walk.iv, final_iv); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], + src, walk.nbytes); - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, dst, walk.nbytes); + ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, + ctx->key_enc, num_rounds(ctx), + mac, walk.iv, final_iv); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - } - } while (walk.nbytes); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, dst, walk.nbytes); - kernel_neon_end(); + if (walk.nbytes) { + err = skcipher_walk_done(&walk, tail); + } + } while (walk.nbytes); + } if (unlikely(err)) return err; diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index 00b8749013c5..a4dad370991d 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) return; } - kernel_neon_begin(); - __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); - kernel_neon_end(); + scoped_ksimd() + __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); } static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) @@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) return; } - kernel_neon_begin(); - __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); - kernel_neon_end(); + scoped_ksimd() + __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); } int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -94,47 +92,48 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, for (i = 0; i < kwords; i++) ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); - kernel_neon_begin(); - for (i = 0; i < sizeof(rcon); i++) { - u32 *rki = ctx->key_enc + (i * kwords); - u32 *rko = rki + kwords; - - rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; - rko[1] = rko[0] ^ rki[1]; - rko[2] = rko[1] ^ rki[2]; - rko[3] = rko[2] ^ rki[3]; - - if (key_len == AES_KEYSIZE_192) { - if (i >= 7) - break; - rko[4] = rko[3] ^ rki[4]; - rko[5] = rko[4] ^ rki[5]; - } else if (key_len == AES_KEYSIZE_256) { - if (i >= 6) - break; - rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; - rko[5] = rko[4] ^ rki[5]; - rko[6] = rko[5] ^ rki[6]; - rko[7] = rko[6] ^ rki[7]; + scoped_ksimd() { + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ + rcon[i] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } } - } - /* - * Generate the decryption keys for the Equivalent Inverse Cipher. - * This involves reversing the order of the round keys, and applying - * the Inverse Mix Columns transformation on all but the first and - * the last one. - */ - key_enc = (struct aes_block *)ctx->key_enc; - key_dec = (struct aes_block *)ctx->key_dec; - j = num_rounds(ctx); - - key_dec[0] = key_enc[j]; - for (i = 1, j--; j > 0; i++, j--) - __aes_ce_invert(key_dec + i, key_enc + j); - key_dec[i] = key_enc[0]; + /* + * Generate the decryption keys for the Equivalent Inverse + * Cipher. This involves reversing the order of the round + * keys, and applying the Inverse Mix Columns transformation on + * all but the first and the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + __aes_ce_invert(key_dec + i, key_enc + j); + key_dec[i] = key_enc[0]; + } - kernel_neon_end(); return 0; } EXPORT_SYMBOL(ce_aes_expandkey); diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5e207ff34482..b087b900d279 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -5,8 +5,6 @@ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/hwcap.h> -#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/ctr.h> #include <crypto/internal/hash.h> @@ -20,6 +18,9 @@ #include <linux/module.h> #include <linux/string.h> +#include <asm/hwcap.h> +#include <asm/simd.h> + #include "aes-ce-setkey.h" #ifdef USE_V8_CRYPTO_EXTENSIONS @@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req, unsigned int blocks; while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, - ctx->key_enc, rounds, blocks, walk->iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_enc, rounds, blocks, walk->iv); err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; @@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req, unsigned int blocks; while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, - ctx->key_dec, rounds, blocks, walk->iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_dec, rounds, blocks, walk->iv); err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; @@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, walk.nbytes, walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, rounds, walk.nbytes, walk.iv); return skcipher_walk_done(&walk, 0); } @@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, walk.nbytes, walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, rounds, walk.nbytes, walk.iv); return skcipher_walk_done(&walk, 0); } @@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) blocks = walk.nbytes / AES_BLOCK_SIZE; if (blocks) { - kernel_neon_begin(); - aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, blocks, - req->iv, ctx->key2.key_enc); - kernel_neon_end(); + scoped_ksimd() + aes_essiv_cbc_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->key1.key_enc, rounds, blocks, + req->iv, ctx->key2.key_enc); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err ?: cbc_encrypt_walk(req, &walk); @@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req) blocks = walk.nbytes / AES_BLOCK_SIZE; if (blocks) { - kernel_neon_begin(); - aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, blocks, - req->iv, ctx->key2.key_enc); - kernel_neon_end(); + scoped_ksimd() + aes_essiv_cbc_decrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->key1.key_dec, rounds, blocks, + req->iv, ctx->key2.key_enc); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err ?: cbc_decrypt_walk(req, &walk); @@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req) else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv, byte_ctr); - kernel_neon_end(); + scoped_ksimd() + aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv, byte_ctr); if (unlikely(nbytes < AES_BLOCK_SIZE)) memcpy(walk.dst.virt.addr, @@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req) else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv); if (unlikely(nbytes < AES_BLOCK_SIZE)) memcpy(walk.dst.virt.addr, @@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) if (walk.nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, nbytes, + ctx->key2.key_enc, walk.iv, first); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, walk.nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); return skcipher_walk_done(&walk, 0); } @@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) if (walk.nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, nbytes, + ctx->key2.key_enc, walk.iv, first); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) return err; - kernel_neon_begin(); - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, walk.nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); return skcipher_walk_done(&walk, 0); } @@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key, return err; /* encrypt the zero vector */ - kernel_neon_begin(); - aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc, - rounds, 1); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, + ctx->key.key_enc, rounds, 1); cmac_gf128_mul_by_x(consts, consts); cmac_gf128_mul_by_x(consts + 1, consts); @@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key, if (err) return err; - kernel_neon_begin(); - aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); - aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); - kernel_neon_end(); + scoped_ksimd() { + aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); + aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); + } return cbcmac_setkey(tfm, key, sizeof(key)); } @@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, int rem; do { - kernel_neon_begin(); - rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, - dg, enc_before, !enc_before); - kernel_neon_end(); + scoped_ksimd() + rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, + dg, enc_before, !enc_before); in += (blocks - rem) * AES_BLOCK_SIZE; blocks = rem; } while (blocks); diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index c4a623e86593..d496effb0a5b 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, ctx->rounds = 6 + key_len / 4; - kernel_neon_begin(); - aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); - kernel_neon_end(); + scoped_ksimd() + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); return 0; } @@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - kernel_neon_begin(); - fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, - ctx->rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } @@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); - kernel_neon_begin(); - aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); - kernel_neon_end(); + scoped_ksimd() + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); memzero_explicit(&rk, sizeof(rk)); return 0; @@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req) unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; /* fall back to the non-bitsliced NEON implementation */ - kernel_neon_begin(); - neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->enc, ctx->key.rounds, blocks, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + neon_aes_cbc_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->enc, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - kernel_neon_begin(); - aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key.rk, ctx->key.rounds, blocks, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } @@ -220,30 +216,32 @@ static int ctr_encrypt(struct skcipher_request *req) const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - kernel_neon_begin(); - if (blocks >= 8) { - aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds, - blocks, walk.iv); - dst += blocks * AES_BLOCK_SIZE; - src += blocks * AES_BLOCK_SIZE; - } - if (nbytes && walk.nbytes == walk.total) { - u8 buf[AES_BLOCK_SIZE]; - u8 *d = dst; - - if (unlikely(nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(buf + sizeof(buf) - nbytes, - src, nbytes); - - neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, - nbytes, walk.iv); + scoped_ksimd() { + if (blocks >= 8) { + aesbs_ctr_encrypt(dst, src, ctx->key.rk, + ctx->key.rounds, blocks, + walk.iv); + dst += blocks * AES_BLOCK_SIZE; + src += blocks * AES_BLOCK_SIZE; + } + if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - + nbytes, src, nbytes); + + neon_aes_ctr_encrypt(dst, src, ctx->enc, + ctx->key.rounds, nbytes, + walk.iv); - if (unlikely(nbytes < AES_BLOCK_SIZE)) - memcpy(d, dst, nbytes); + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); - nbytes = 0; + nbytes = 0; + } } - kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } return err; @@ -320,33 +318,33 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in = walk.src.virt.addr; nbytes = walk.nbytes; - kernel_neon_begin(); - if (blocks >= 8) { - if (first == 1) - neon_aes_ecb_encrypt(walk.iv, walk.iv, - ctx->twkey, - ctx->key.rounds, 1); - first = 2; - - fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, - walk.iv); - - out += blocks * AES_BLOCK_SIZE; - in += blocks * AES_BLOCK_SIZE; - nbytes -= blocks * AES_BLOCK_SIZE; + scoped_ksimd() { + if (blocks >= 8) { + if (first == 1) + neon_aes_ecb_encrypt(walk.iv, walk.iv, + ctx->twkey, + ctx->key.rounds, 1); + first = 2; + + fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + + out += blocks * AES_BLOCK_SIZE; + in += blocks * AES_BLOCK_SIZE; + nbytes -= blocks * AES_BLOCK_SIZE; + } + if (walk.nbytes == walk.total && nbytes > 0) { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + nbytes = first = 0; + } } - if (walk.nbytes == walk.total && nbytes > 0) { - if (encrypt) - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, - ctx->key.rounds, nbytes, - ctx->twkey, walk.iv, first); - else - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, - ctx->key.rounds, nbytes, - ctx->twkey, walk.iv, first); - nbytes = first = 0; - } - kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } @@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in = walk.src.virt.addr; nbytes = walk.nbytes; - kernel_neon_begin(); - if (encrypt) - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first); - else - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, ctx->twkey, + walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, ctx->twkey, + walk.iv, first); + } return skcipher_walk_done(&walk, 0); } diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 4995b6e22335..7951557a285a 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -5,7 +5,6 @@ * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/b128ops.h> #include <crypto/gcm.h> @@ -22,6 +21,8 @@ #include <linux/string.h> #include <linux/unaligned.h> +#include <asm/simd.h> + MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src, u64 const h[][2], const char *head)) { - kernel_neon_begin(); - simd_update(blocks, dg, src, key->h, head); - kernel_neon_end(); + scoped_ksimd() + simd_update(blocks, dg, src, key->h, head); } /* avoid hogging the CPU for too long */ @@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) tag = NULL; } - kernel_neon_begin(); - pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); - kernel_neon_end(); + scoped_ksimd() + pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, nrounds, + tag); if (unlikely(!nbytes)) break; @@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) tag = NULL; } - kernel_neon_begin(); - ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, - nrounds, tag, otag, authsize); - kernel_neon_end(); + scoped_ksimd() + ret = pmull_gcm_decrypt(nbytes, dst, src, + ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); if (unlikely(!nbytes)) break; diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index e4a0b463f080..013de6ac569a 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, do { unsigned int n = min_t(unsigned int, srclen, SZ_4K); - kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); - kernel_neon_end(); + scoped_ksimd() + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); src += n; srclen -= n; } while (srclen); diff --git a/arch/arm64/crypto/polyval-ce-core.S b/arch/arm64/crypto/polyval-ce-core.S deleted file mode 100644 index b5326540d2e3..000000000000 --- a/arch/arm64/crypto/polyval-ce-core.S +++ /dev/null @@ -1,361 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Implementation of POLYVAL using ARMv8 Crypto Extensions. - * - * Copyright 2021 Google LLC - */ -/* - * This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions - * It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8, - * ..., h^1 in the POLYVAL finite field. This precomputation allows us to split - * finite field multiplication into two steps. - * - * In the first step, we consider h^i, m_i as normal polynomials of degree less - * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication - * is simply polynomial multiplication. - * - * In the second step, we compute the reduction of p(x) modulo the finite field - * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1. - * - * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where - * multiplication is finite field multiplication. The advantage is that the - * two-step process only requires 1 finite field reduction for every 8 - * polynomial multiplications. Further parallelism is gained by interleaving the - * multiplications and polynomial reductions. - */ - -#include <linux/linkage.h> -#define STRIDE_BLOCKS 8 - -KEY_POWERS .req x0 -MSG .req x1 -BLOCKS_LEFT .req x2 -ACCUMULATOR .req x3 -KEY_START .req x10 -EXTRA_BYTES .req x11 -TMP .req x13 - -M0 .req v0 -M1 .req v1 -M2 .req v2 -M3 .req v3 -M4 .req v4 -M5 .req v5 -M6 .req v6 -M7 .req v7 -KEY8 .req v8 -KEY7 .req v9 -KEY6 .req v10 -KEY5 .req v11 -KEY4 .req v12 -KEY3 .req v13 -KEY2 .req v14 -KEY1 .req v15 -PL .req v16 -PH .req v17 -TMP_V .req v18 -LO .req v20 -MI .req v21 -HI .req v22 -SUM .req v23 -GSTAR .req v24 - - .text - - .arch armv8-a+crypto - .align 4 - -.Lgstar: - .quad 0xc200000000000000, 0xc200000000000000 - -/* - * Computes the product of two 128-bit polynomials in X and Y and XORs the - * components of the 256-bit product into LO, MI, HI. - * - * Given: - * X = [X_1 : X_0] - * Y = [Y_1 : Y_0] - * - * We compute: - * LO += X_0 * Y_0 - * MI += (X_0 + X_1) * (Y_0 + Y_1) - * HI += X_1 * Y_1 - * - * Later, the 256-bit result can be extracted as: - * [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0] - * This step is done when computing the polynomial reduction for efficiency - * reasons. - * - * Karatsuba multiplication is used instead of Schoolbook multiplication because - * it was found to be slightly faster on ARM64 CPUs. - * - */ -.macro karatsuba1 X Y - X .req \X - Y .req \Y - ext v25.16b, X.16b, X.16b, #8 - ext v26.16b, Y.16b, Y.16b, #8 - eor v25.16b, v25.16b, X.16b - eor v26.16b, v26.16b, Y.16b - pmull2 v28.1q, X.2d, Y.2d - pmull v29.1q, X.1d, Y.1d - pmull v27.1q, v25.1d, v26.1d - eor HI.16b, HI.16b, v28.16b - eor LO.16b, LO.16b, v29.16b - eor MI.16b, MI.16b, v27.16b - .unreq X - .unreq Y -.endm - -/* - * Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into - * them. - */ -.macro karatsuba1_store X Y - X .req \X - Y .req \Y - ext v25.16b, X.16b, X.16b, #8 - ext v26.16b, Y.16b, Y.16b, #8 - eor v25.16b, v25.16b, X.16b - eor v26.16b, v26.16b, Y.16b - pmull2 HI.1q, X.2d, Y.2d - pmull LO.1q, X.1d, Y.1d - pmull MI.1q, v25.1d, v26.1d - .unreq X - .unreq Y -.endm - -/* - * Computes the 256-bit polynomial represented by LO, HI, MI. Stores - * the result in PL, PH. - * [PH : PL] = - * [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0] - */ -.macro karatsuba2 - // v4 = [HI_1 + MI_1 : HI_0 + MI_0] - eor v4.16b, HI.16b, MI.16b - // v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0] - eor v4.16b, v4.16b, LO.16b - // v5 = [HI_0 : LO_1] - ext v5.16b, LO.16b, HI.16b, #8 - // v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0] - eor v4.16b, v4.16b, v5.16b - // HI = [HI_0 : HI_1] - ext HI.16b, HI.16b, HI.16b, #8 - // LO = [LO_0 : LO_1] - ext LO.16b, LO.16b, LO.16b, #8 - // PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1] - ext PH.16b, v4.16b, HI.16b, #8 - // PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0] - ext PL.16b, LO.16b, v4.16b, #8 -.endm - -/* - * Computes the 128-bit reduction of PH : PL. Stores the result in dest. - * - * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) = - * x^128 + x^127 + x^126 + x^121 + 1. - * - * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the - * product of two 128-bit polynomials in Montgomery form. We need to reduce it - * mod g(x). Also, since polynomials in Montgomery form have an "extra" factor - * of x^128, this product has two extra factors of x^128. To get it back into - * Montgomery form, we need to remove one of these factors by dividing by x^128. - * - * To accomplish both of these goals, we add multiples of g(x) that cancel out - * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low - * bits are zero, the polynomial division by x^128 can be done by right - * shifting. - * - * Since the only nonzero term in the low 64 bits of g(x) is the constant term, - * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can - * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 + - * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to - * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T - * = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191. - * - * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits - * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1 - * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) * - * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 : - * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0). - * - * So our final computation is: - * T = T_1 : T_0 = g*(x) * P_0 - * V = V_1 : V_0 = g*(x) * (P_1 + T_0) - * p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0 - * - * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0 - * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 : - * T_1 into dest. This allows us to reuse P_1 + T_0 when computing V. - */ -.macro montgomery_reduction dest - DEST .req \dest - // TMP_V = T_1 : T_0 = P_0 * g*(x) - pmull TMP_V.1q, PL.1d, GSTAR.1d - // TMP_V = T_0 : T_1 - ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8 - // TMP_V = P_1 + T_0 : P_0 + T_1 - eor TMP_V.16b, PL.16b, TMP_V.16b - // PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1 - eor PH.16b, PH.16b, TMP_V.16b - // TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x) - pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d - eor DEST.16b, PH.16b, TMP_V.16b - .unreq DEST -.endm - -/* - * Compute Polyval on 8 blocks. - * - * If reduce is set, also computes the montgomery reduction of the - * previous full_stride call and XORs with the first message block. - * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1. - * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0. - * - * Sets PL, PH. - */ -.macro full_stride reduce - eor LO.16b, LO.16b, LO.16b - eor MI.16b, MI.16b, MI.16b - eor HI.16b, HI.16b, HI.16b - - ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64 - ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64 - - karatsuba1 M7 KEY1 - .if \reduce - pmull TMP_V.1q, PL.1d, GSTAR.1d - .endif - - karatsuba1 M6 KEY2 - .if \reduce - ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8 - .endif - - karatsuba1 M5 KEY3 - .if \reduce - eor TMP_V.16b, PL.16b, TMP_V.16b - .endif - - karatsuba1 M4 KEY4 - .if \reduce - eor PH.16b, PH.16b, TMP_V.16b - .endif - - karatsuba1 M3 KEY5 - .if \reduce - pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d - .endif - - karatsuba1 M2 KEY6 - .if \reduce - eor SUM.16b, PH.16b, TMP_V.16b - .endif - - karatsuba1 M1 KEY7 - eor M0.16b, M0.16b, SUM.16b - - karatsuba1 M0 KEY8 - karatsuba2 -.endm - -/* - * Handle any extra blocks after full_stride loop. - */ -.macro partial_stride - add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4) - sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4 - ld1 {KEY1.16b}, [KEY_POWERS], #16 - - ld1 {TMP_V.16b}, [MSG], #16 - eor SUM.16b, SUM.16b, TMP_V.16b - karatsuba1_store KEY1 SUM - sub BLOCKS_LEFT, BLOCKS_LEFT, #1 - - tst BLOCKS_LEFT, #4 - beq .Lpartial4BlocksDone - ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64 - ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64 - karatsuba1 M0 KEY8 - karatsuba1 M1 KEY7 - karatsuba1 M2 KEY6 - karatsuba1 M3 KEY5 -.Lpartial4BlocksDone: - tst BLOCKS_LEFT, #2 - beq .Lpartial2BlocksDone - ld1 {M0.16b, M1.16b}, [MSG], #32 - ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32 - karatsuba1 M0 KEY8 - karatsuba1 M1 KEY7 -.Lpartial2BlocksDone: - tst BLOCKS_LEFT, #1 - beq .LpartialDone - ld1 {M0.16b}, [MSG], #16 - ld1 {KEY8.16b}, [KEY_POWERS], #16 - karatsuba1 M0 KEY8 -.LpartialDone: - karatsuba2 - montgomery_reduction SUM -.endm - -/* - * Perform montgomery multiplication in GF(2^128) and store result in op1. - * - * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1 - * If op1, op2 are in montgomery form, this computes the montgomery - * form of op1*op2. - * - * void pmull_polyval_mul(u8 *op1, const u8 *op2); - */ -SYM_FUNC_START(pmull_polyval_mul) - adr TMP, .Lgstar - ld1 {GSTAR.2d}, [TMP] - ld1 {v0.16b}, [x0] - ld1 {v1.16b}, [x1] - karatsuba1_store v0 v1 - karatsuba2 - montgomery_reduction SUM - st1 {SUM.16b}, [x0] - ret -SYM_FUNC_END(pmull_polyval_mul) - -/* - * Perform polynomial evaluation as specified by POLYVAL. This computes: - * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1} - * where n=nblocks, h is the hash key, and m_i are the message blocks. - * - * x0 - pointer to precomputed key powers h^8 ... h^1 - * x1 - pointer to message blocks - * x2 - number of blocks to hash - * x3 - pointer to accumulator - * - * void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in, - * size_t nblocks, u8 *accumulator); - */ -SYM_FUNC_START(pmull_polyval_update) - adr TMP, .Lgstar - mov KEY_START, KEY_POWERS - ld1 {GSTAR.2d}, [TMP] - ld1 {SUM.16b}, [ACCUMULATOR] - subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - blt .LstrideLoopExit - ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64 - ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64 - full_stride 0 - subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - blt .LstrideLoopExitReduce -.LstrideLoop: - full_stride 1 - subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - bge .LstrideLoop -.LstrideLoopExitReduce: - montgomery_reduction SUM -.LstrideLoopExit: - adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - beq .LskipPartial - partial_stride -.LskipPartial: - st1 {SUM.16b}, [ACCUMULATOR] - ret -SYM_FUNC_END(pmull_polyval_update) diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c deleted file mode 100644 index c4e653688ea0..000000000000 --- a/arch/arm64/crypto/polyval-ce-glue.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Glue code for POLYVAL using ARMv8 Crypto Extensions - * - * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi> - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * Copyright 2021 Google LLC - */ - -/* - * Glue code based on ghash-clmulni-intel_glue.c. - * - * This implementation of POLYVAL uses montgomery multiplication accelerated by - * ARMv8 Crypto Extensions instructions to implement the finite field operations. - */ - -#include <asm/neon.h> -#include <crypto/internal/hash.h> -#include <crypto/polyval.h> -#include <crypto/utils.h> -#include <linux/cpufeature.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#define NUM_KEY_POWERS 8 - -struct polyval_tfm_ctx { - /* - * These powers must be in the order h^8, ..., h^1. - */ - u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE]; -}; - -struct polyval_desc_ctx { - u8 buffer[POLYVAL_BLOCK_SIZE]; -}; - -asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator); -asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2); - -static void internal_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator) -{ - kernel_neon_begin(); - pmull_polyval_update(keys, in, nblocks, accumulator); - kernel_neon_end(); -} - -static void internal_polyval_mul(u8 *op1, const u8 *op2) -{ - kernel_neon_begin(); - pmull_polyval_mul(op1, op2); - kernel_neon_end(); -} - -static int polyval_arm64_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm); - int i; - - if (keylen != POLYVAL_BLOCK_SIZE) - return -EINVAL; - - memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE); - - for (i = NUM_KEY_POWERS-2; i >= 0; i--) { - memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE); - internal_polyval_mul(tctx->key_powers[i], - tctx->key_powers[i+1]); - } - - return 0; -} - -static int polyval_arm64_init(struct shash_desc *desc) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - - memset(dctx, 0, sizeof(*dctx)); - - return 0; -} - -static int polyval_arm64_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - unsigned int nblocks; - - do { - /* allow rescheduling every 4K bytes */ - nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE; - internal_polyval_update(tctx, src, nblocks, dctx->buffer); - srclen -= nblocks * POLYVAL_BLOCK_SIZE; - src += nblocks * POLYVAL_BLOCK_SIZE; - } while (srclen >= POLYVAL_BLOCK_SIZE); - - return srclen; -} - -static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *dst) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - - if (len) { - crypto_xor(dctx->buffer, src, len); - internal_polyval_mul(dctx->buffer, - tctx->key_powers[NUM_KEY_POWERS-1]); - } - - memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE); - - return 0; -} - -static struct shash_alg polyval_alg = { - .digestsize = POLYVAL_DIGEST_SIZE, - .init = polyval_arm64_init, - .update = polyval_arm64_update, - .finup = polyval_arm64_finup, - .setkey = polyval_arm64_setkey, - .descsize = sizeof(struct polyval_desc_ctx), - .base = { - .cra_name = "polyval", - .cra_driver_name = "polyval-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = POLYVAL_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct polyval_tfm_ctx), - .cra_module = THIS_MODULE, - }, -}; - -static int __init polyval_ce_mod_init(void) -{ - return crypto_register_shash(&polyval_alg); -} - -static void __exit polyval_ce_mod_exit(void) -{ - crypto_unregister_shash(&polyval_alg); -} - -module_cpu_feature_match(PMULL, polyval_ce_mod_init) -module_exit(polyval_ce_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions"); -MODULE_ALIAS_CRYPTO("polyval"); -MODULE_ALIAS_CRYPTO("polyval-ce"); diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S deleted file mode 100644 index 9c77313f5a60..000000000000 --- a/arch/arm64/crypto/sha3-ce-core.S +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 - .set .Lv\b\().2d, \b - .set .Lv\b\().16b, \b - .endr - - /* - * ARMv8.2 Crypto Extensions instructions - */ - .macro eor3, rd, rn, rm, ra - .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro rax1, rd, rn, rm - .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro bcax, rd, rn, rm, ra - .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro xar, rd, rn, rm, imm6 - .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) - .endm - - /* - * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) - */ - .text -SYM_FUNC_START(sha3_ce_transform) - /* load state */ - add x8, x0, #32 - ld1 { v0.1d- v3.1d}, [x0] - ld1 { v4.1d- v7.1d}, [x8], #32 - ld1 { v8.1d-v11.1d}, [x8], #32 - ld1 {v12.1d-v15.1d}, [x8], #32 - ld1 {v16.1d-v19.1d}, [x8], #32 - ld1 {v20.1d-v23.1d}, [x8], #32 - ld1 {v24.1d}, [x8] - -0: sub w2, w2, #1 - mov w8, #24 - adr_l x9, .Lsha3_rcon - - /* load input */ - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v31.8b}, [x1], #24 - eor v0.8b, v0.8b, v25.8b - eor v1.8b, v1.8b, v26.8b - eor v2.8b, v2.8b, v27.8b - eor v3.8b, v3.8b, v28.8b - eor v4.8b, v4.8b, v29.8b - eor v5.8b, v5.8b, v30.8b - eor v6.8b, v6.8b, v31.8b - - tbnz x3, #6, 2f // SHA3-512 - - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v30.8b}, [x1], #16 - eor v7.8b, v7.8b, v25.8b - eor v8.8b, v8.8b, v26.8b - eor v9.8b, v9.8b, v27.8b - eor v10.8b, v10.8b, v28.8b - eor v11.8b, v11.8b, v29.8b - eor v12.8b, v12.8b, v30.8b - - tbnz x3, #4, 1f // SHA3-384 or SHA3-224 - - // SHA3-256 - ld1 {v25.8b-v28.8b}, [x1], #32 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - b 3f - -1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 - - // SHA3-224 - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b}, [x1], #8 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - eor v17.8b, v17.8b, v29.8b - b 3f - - // SHA3-512 -2: ld1 {v25.8b-v26.8b}, [x1], #16 - eor v7.8b, v7.8b, v25.8b - eor v8.8b, v8.8b, v26.8b - -3: sub w8, w8, #1 - - eor3 v29.16b, v4.16b, v9.16b, v14.16b - eor3 v26.16b, v1.16b, v6.16b, v11.16b - eor3 v28.16b, v3.16b, v8.16b, v13.16b - eor3 v25.16b, v0.16b, v5.16b, v10.16b - eor3 v27.16b, v2.16b, v7.16b, v12.16b - eor3 v29.16b, v29.16b, v19.16b, v24.16b - eor3 v26.16b, v26.16b, v16.16b, v21.16b - eor3 v28.16b, v28.16b, v18.16b, v23.16b - eor3 v25.16b, v25.16b, v15.16b, v20.16b - eor3 v27.16b, v27.16b, v17.16b, v22.16b - - rax1 v30.2d, v29.2d, v26.2d // bc[0] - rax1 v26.2d, v26.2d, v28.2d // bc[2] - rax1 v28.2d, v28.2d, v25.2d // bc[4] - rax1 v25.2d, v25.2d, v27.2d // bc[1] - rax1 v27.2d, v27.2d, v29.2d // bc[3] - - eor v0.16b, v0.16b, v30.16b - xar v29.2d, v1.2d, v25.2d, (64 - 1) - xar v1.2d, v6.2d, v25.2d, (64 - 44) - xar v6.2d, v9.2d, v28.2d, (64 - 20) - xar v9.2d, v22.2d, v26.2d, (64 - 61) - xar v22.2d, v14.2d, v28.2d, (64 - 39) - xar v14.2d, v20.2d, v30.2d, (64 - 18) - xar v31.2d, v2.2d, v26.2d, (64 - 62) - xar v2.2d, v12.2d, v26.2d, (64 - 43) - xar v12.2d, v13.2d, v27.2d, (64 - 25) - xar v13.2d, v19.2d, v28.2d, (64 - 8) - xar v19.2d, v23.2d, v27.2d, (64 - 56) - xar v23.2d, v15.2d, v30.2d, (64 - 41) - xar v15.2d, v4.2d, v28.2d, (64 - 27) - xar v28.2d, v24.2d, v28.2d, (64 - 14) - xar v24.2d, v21.2d, v25.2d, (64 - 2) - xar v8.2d, v8.2d, v27.2d, (64 - 55) - xar v4.2d, v16.2d, v25.2d, (64 - 45) - xar v16.2d, v5.2d, v30.2d, (64 - 36) - xar v5.2d, v3.2d, v27.2d, (64 - 28) - xar v27.2d, v18.2d, v27.2d, (64 - 21) - xar v3.2d, v17.2d, v26.2d, (64 - 15) - xar v25.2d, v11.2d, v25.2d, (64 - 10) - xar v26.2d, v7.2d, v26.2d, (64 - 6) - xar v30.2d, v10.2d, v30.2d, (64 - 3) - - bcax v20.16b, v31.16b, v22.16b, v8.16b - bcax v21.16b, v8.16b, v23.16b, v22.16b - bcax v22.16b, v22.16b, v24.16b, v23.16b - bcax v23.16b, v23.16b, v31.16b, v24.16b - bcax v24.16b, v24.16b, v8.16b, v31.16b - - ld1r {v31.2d}, [x9], #8 - - bcax v17.16b, v25.16b, v19.16b, v3.16b - bcax v18.16b, v3.16b, v15.16b, v19.16b - bcax v19.16b, v19.16b, v16.16b, v15.16b - bcax v15.16b, v15.16b, v25.16b, v16.16b - bcax v16.16b, v16.16b, v3.16b, v25.16b - - bcax v10.16b, v29.16b, v12.16b, v26.16b - bcax v11.16b, v26.16b, v13.16b, v12.16b - bcax v12.16b, v12.16b, v14.16b, v13.16b - bcax v13.16b, v13.16b, v29.16b, v14.16b - bcax v14.16b, v14.16b, v26.16b, v29.16b - - bcax v7.16b, v30.16b, v9.16b, v4.16b - bcax v8.16b, v4.16b, v5.16b, v9.16b - bcax v9.16b, v9.16b, v6.16b, v5.16b - bcax v5.16b, v5.16b, v30.16b, v6.16b - bcax v6.16b, v6.16b, v4.16b, v30.16b - - bcax v3.16b, v27.16b, v0.16b, v28.16b - bcax v4.16b, v28.16b, v1.16b, v0.16b - bcax v0.16b, v0.16b, v2.16b, v1.16b - bcax v1.16b, v1.16b, v27.16b, v2.16b - bcax v2.16b, v2.16b, v28.16b, v27.16b - - eor v0.16b, v0.16b, v31.16b - - cbnz w8, 3b - cond_yield 4f, x8, x9 - cbnz w2, 0b - - /* save state */ -4: st1 { v0.1d- v3.1d}, [x0], #32 - st1 { v4.1d- v7.1d}, [x0], #32 - st1 { v8.1d-v11.1d}, [x0], #32 - st1 {v12.1d-v15.1d}, [x0], #32 - st1 {v16.1d-v19.1d}, [x0], #32 - st1 {v20.1d-v23.1d}, [x0], #32 - st1 {v24.1d}, [x0] - mov w0, w2 - ret -SYM_FUNC_END(sha3_ce_transform) - - .section ".rodata", "a" - .align 8 -.Lsha3_rcon: - .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a - .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 - .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a - .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a - .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 - .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 - .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 - .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c deleted file mode 100644 index b4f1001046c9..000000000000 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/hash.h> -#include <crypto/sha3.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/unaligned.h> - -MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha3-224"); -MODULE_ALIAS_CRYPTO("sha3-256"); -MODULE_ALIAS_CRYPTO("sha3-384"); -MODULE_ALIAS_CRYPTO("sha3-512"); - -asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks, - int md_len); - -static int sha3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - struct crypto_shash *tfm = desc->tfm; - unsigned int bs, ds; - int blocks; - - ds = crypto_shash_digestsize(tfm); - bs = crypto_shash_blocksize(tfm); - blocks = len / bs; - len -= blocks * bs; - do { - int rem; - - kernel_neon_begin(); - rem = sha3_ce_transform(sctx->st, data, blocks, ds); - kernel_neon_end(); - data += (blocks - rem) * bs; - blocks = rem; - } while (blocks); - return len; -} - -static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len, - u8 *out) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - struct crypto_shash *tfm = desc->tfm; - __le64 *digest = (__le64 *)out; - u8 block[SHA3_224_BLOCK_SIZE]; - unsigned int bs, ds; - int i; - - ds = crypto_shash_digestsize(tfm); - bs = crypto_shash_blocksize(tfm); - memcpy(block, src, len); - - block[len++] = 0x06; - memset(block + len, 0, bs - len); - block[bs - 1] |= 0x80; - - kernel_neon_begin(); - sha3_ce_transform(sctx->st, block, 1, ds); - kernel_neon_end(); - memzero_explicit(block , sizeof(block)); - - for (i = 0; i < ds / 8; i++) - put_unaligned_le64(sctx->st[i], digest++); - - if (ds & 4) - put_unaligned_le32(sctx->st[i], (__le32 *)digest); - - return 0; -} - -static struct shash_alg algs[] = { { - .digestsize = SHA3_224_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-224", - .base.cra_driver_name = "sha3-224-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_256_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-256", - .base.cra_driver_name = "sha3-256-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_384_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-384", - .base.cra_driver_name = "sha3-384-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_384_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_512_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-512", - .base.cra_driver_name = "sha3-512-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_512_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -} }; - -static int __init sha3_neon_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha3_neon_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA3, sha3_neon_mod_init); -module_exit(sha3_neon_mod_fini); diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index eac6f5fa0abe..24c1fcfae072 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -5,7 +5,6 @@ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> @@ -13,6 +12,8 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <asm/simd.h> + MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, { int remain; - kernel_neon_begin(); - remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); - kernel_neon_end(); + scoped_ksimd() { + remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); + } return remain; } static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - kernel_neon_begin(); - sm3_base_do_finup(desc, data, len, sm3_ce_transform); - kernel_neon_end(); + scoped_ksimd() { + sm3_base_do_finup(desc, data, len, sm3_ce_transform); + } return sm3_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c index 6c4611a503a3..15f30cc24f32 100644 --- a/arch/arm64/crypto/sm3-neon-glue.c +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -5,7 +5,7 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> @@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src, static int sm3_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - int remain; - - kernel_neon_begin(); - remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform); - kernel_neon_end(); - return remain; + scoped_ksimd() + return sm3_base_do_update_blocks(desc, data, len, + sm3_neon_transform); } static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - kernel_neon_begin(); - sm3_base_do_finup(desc, data, len, sm3_neon_transform); - kernel_neon_end(); + scoped_ksimd() + sm3_base_do_finup(desc, data, len, sm3_neon_transform); return sm3_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c index e9cc1c1364ec..332f02167a96 100644 --- a/arch/arm64/crypto/sm4-ce-ccm-glue.c +++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c @@ -11,7 +11,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> @@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); crypto_inc(walk->iv, SM4_BLOCK_SIZE); - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); - - while (walk->nbytes && walk->nbytes != walk->total) { - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, - walk->src.virt.addr, walk->iv, - walk->nbytes - tail, mac); - - kernel_neon_end(); - - err = skcipher_walk_done(walk, tail); - - kernel_neon_begin(); - } - - if (walk->nbytes) { - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, - walk->src.virt.addr, walk->iv, - walk->nbytes, mac); + while (walk->nbytes) { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - sm4_ce_ccm_final(rkey_enc, ctr0, mac); + if (walk->nbytes == walk->total) + tail = 0; - kernel_neon_end(); + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, + walk->src.virt.addr, walk->iv, + walk->nbytes - tail, mac); - err = skcipher_walk_done(walk, 0); - } else { + err = skcipher_walk_done(walk, tail); + } sm4_ce_ccm_final(rkey_enc, ctr0, mac); - - kernel_neon_end(); } return err; diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c index c31d76fb5a17..bceec833ef4e 100644 --- a/arch/arm64/crypto/sm4-ce-cipher-glue.c +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) if (!crypto_simd_usable()) { sm4_crypt_block(ctx->rkey_enc, out, in); } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_enc, out, in); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_do_crypt(ctx->rkey_enc, out, in); } } @@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) if (!crypto_simd_usable()) { sm4_crypt_block(ctx->rkey_dec, out, in); } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_dec, out, in); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_do_crypt(ctx->rkey_dec, out, in); } } diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c index c2ea3d5f690b..ef06f4f768a1 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-glue.c +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -11,7 +11,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> @@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); - - kernel_neon_end(); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); + } return 0; } @@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + gcm_calculate_auth_mac(req, ghash); - if (req->assoclen) - gcm_calculate_auth_mac(req, ghash); + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + const u8 *l = NULL; - while (walk->nbytes) { - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - const u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; + if (walk->nbytes == walk->total) { + l = (const u8 *)&lengths; + tail = 0; + } - if (walk->nbytes == walk->total) { sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes, ghash, - ctx->ghash_table, - (const u8 *)&lengths); - - kernel_neon_end(); - - return skcipher_walk_done(walk, 0); - } + walk->nbytes - tail, ghash, + ctx->ghash_table, l); - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes - tail, ghash, - ctx->ghash_table, NULL); - - kernel_neon_end(); - - err = skcipher_walk_done(walk, tail); - - kernel_neon_begin(); + err = skcipher_walk_done(walk, tail); + } while (walk->nbytes); } - - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv, - walk->nbytes, ghash, ctx->ghash_table, - (const u8 *)&lengths); - - kernel_neon_end(); - return err; } diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 7a60e7b559dc..5569cece5a0b 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -8,7 +8,7 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> @@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, if (ret) return ret; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->key1.rkey_enc, - ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, - ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key1.rkey_enc, + ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, + ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + } return 0; } @@ -117,16 +116,14 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) u8 *dst = walk.dst.virt.addr; unsigned int nblks; - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_crypt(rkey, dst, src, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; + scoped_ksimd() { + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_crypt(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } } - kernel_neon_end(); - err = skcipher_walk_done(&walk, nbytes); } @@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req, nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - if (encrypt) - sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, - walk.iv, nblocks); - else - sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + else + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); + } } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt) if (err) return err; - kernel_neon_begin(); - - if (encrypt) - sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes); - else - sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + else + sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + } return skcipher_walk_done(&walk, 0); } @@ -288,28 +281,26 @@ static int sm4_ctr_crypt(struct skcipher_request *req) u8 *dst = walk.dst.virt.addr; unsigned int nblks; - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - /* tail */ - if (walk.nbytes == walk.total && nbytes > 0) { - u8 keystream[SM4_BLOCK_SIZE]; - - sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); - crypto_inc(walk.iv, SM4_BLOCK_SIZE); - crypto_xor_cpy(dst, src, keystream, nbytes); - nbytes = 0; + scoped_ksimd() { + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } } - kernel_neon_end(); - err = skcipher_walk_done(&walk, nbytes); } @@ -359,18 +350,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) if (nbytes < walk.total) nbytes &= ~(SM4_BLOCK_SIZE - 1); - kernel_neon_begin(); - - if (encrypt) - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, nbytes, - rkey2_enc); - else - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, nbytes, - rkey2_enc); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + } rkey2_enc = NULL; @@ -395,18 +384,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) if (err) return err; - kernel_neon_begin(); - - if (encrypt) - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes, - rkey2_enc); - else - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes, - rkey2_enc); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + } return skcipher_walk_done(&walk, 0); } @@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); - + scoped_ksimd() + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key, memset(consts, 0, SM4_BLOCK_SIZE); - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); - /* encrypt the zero block */ - sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); - - kernel_neon_end(); + /* encrypt the zero block */ + sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); + } /* gf(2^128) multiply zero-ciphertext with u and u^2 */ a = be64_to_cpu(consts[0].a); @@ -568,18 +551,16 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); - sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); + sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); + sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); - sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - - kernel_neon_end(); + sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + } return 0; } @@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p, unsigned int nblocks = len / SM4_BLOCK_SIZE; len %= SM4_BLOCK_SIZE; - kernel_neon_begin(); - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, - nblocks, false, true); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, + nblocks, false, true); return len; } @@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src, ctx->digest[len] ^= 0x80; consts += SM4_BLOCK_SIZE; } - kernel_neon_begin(); - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, - false, true); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, + false, true); memcpy(out, ctx->digest, SM4_BLOCK_SIZE); return 0; } @@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src, if (len) { crypto_xor(ctx->digest, src, len); - kernel_neon_begin(); - sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, - ctx->digest); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, + ctx->digest); } memcpy(out, ctx->digest, SM4_BLOCK_SIZE); return 0; diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c index e3500aca2d18..e944c2a2efb0 100644 --- a/arch/arm64/crypto/sm4-neon-glue.c +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_crypt(rkey, dst, src, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_crypt(rkey, dst, src, nblocks); } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -157,12 +151,9 @@ static int sm4_ctr_crypt(struct skcipher_request *req) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, + walk.iv, nblocks); dst += nblocks * SM4_BLOCK_SIZE; src += nblocks * SM4_BLOCK_SIZE; diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index c8c77f9e36d6..862416624852 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -19,7 +19,7 @@ #error "cpucaps have overflown ARM64_CB_BIT" #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stringify.h> @@ -207,7 +207,7 @@ alternative_endif #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap)); @@ -219,7 +219,7 @@ alternative_endif #define ALTERNATIVE(oldinstr, newinstr, ...) \ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -263,6 +263,6 @@ l_yes: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ALTERNATIVE_MACROS_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 00d97b8a757f..621aa8550174 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -4,7 +4,7 @@ #include <asm/alternative-macros.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/init.h> #include <linux/types.h> @@ -26,13 +26,16 @@ void __init apply_alternatives_all(void); bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length); +int apply_alternatives_module(void *start, size_t length); #else -static inline void apply_alternatives_module(void *start, size_t length) { } +static inline int apply_alternatives_module(void *start, size_t length) +{ + return 0; +} #endif void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 9e96f024b2f1..d20b03931a8d 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -9,7 +9,7 @@ #include <asm/sysreg.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/irqchip/arm-gic-common.h> #include <linux/stringify.h> @@ -188,5 +188,5 @@ static inline bool gic_has_relaxed_pmr_sync(void) return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 292f2687a12e..d67e2fdd1aee 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -27,7 +27,7 @@ /* Data fields for EX_TYPE_UACCESS_CPY */ #define EX_DATA_UACCESS_WRITE BIT(0) -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ @@ -77,7 +77,7 @@ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write) .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #include <linux/stringify.h> @@ -132,6 +132,6 @@ EX_DATA_REG(ADDR, addr) \ ")") -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 23be85d93348..f0ca7196f6fa 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -5,7 +5,7 @@ * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #error "Only include this from assembly code" #endif @@ -325,14 +325,14 @@ alternative_cb_end * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_t0sz, valreg, t0sz - bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH + bfi \valreg, \t0sz, #TCR_EL1_T0SZ_SHIFT, #TCR_EL1_T0SZ_WIDTH .endm /* * tcr_set_t1sz - update TCR.T1SZ */ .macro tcr_set_t1sz, valreg, t1sz - bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH + bfi \valreg, \t1sz, #TCR_EL1_T1SZ_SHIFT, #TCR_EL1_T1SZ_WIDTH .endm /* @@ -371,7 +371,7 @@ alternative_endif * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction - * domain: domain used in dsb instruciton + * domain: domain used in dsb instruction * start: starting virtual address of the region * end: end virtual address of the region * linesz: dcache line size @@ -412,7 +412,7 @@ alternative_endif * [start, end) * * op: operation passed to dc instruction - * domain: domain used in dsb instruciton + * domain: domain used in dsb instruction * start: starting virtual address of the region * end: end virtual address of the region * fixup: optional label to branch to on user fault @@ -589,7 +589,7 @@ alternative_endif .macro offset_ttbr1, ttbr, tmp #if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2) mrs \tmp, tcr_el1 - and \tmp, \tmp, #TCR_T1SZ_MASK + and \tmp, \tmp, #TCR_EL1_T1SZ_MASK cmp \tmp, #TCR_T1SZ(VA_BITS_MIN) orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET csel \ttbr, \tmp, \ttbr, eq diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 87f568a94e55..afad1849c4cf 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -103,17 +103,17 @@ static __always_inline void __lse_atomic_and(int i, atomic_t *v) return __lse_atomic_andnot(~i, v); } -#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +#define ATOMIC_FETCH_OP_AND(name) \ static __always_inline int \ __lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_andnot##name(~i, v); \ } -ATOMIC_FETCH_OP_AND(_relaxed, ) -ATOMIC_FETCH_OP_AND(_acquire, a, "memory") -ATOMIC_FETCH_OP_AND(_release, l, "memory") -ATOMIC_FETCH_OP_AND( , al, "memory") +ATOMIC_FETCH_OP_AND(_relaxed) +ATOMIC_FETCH_OP_AND(_acquire) +ATOMIC_FETCH_OP_AND(_release) +ATOMIC_FETCH_OP_AND( ) #undef ATOMIC_FETCH_OP_AND @@ -210,17 +210,17 @@ static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v) return __lse_atomic64_andnot(~i, v); } -#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +#define ATOMIC64_FETCH_OP_AND(name) \ static __always_inline long \ __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_andnot##name(~i, v); \ } -ATOMIC64_FETCH_OP_AND(_relaxed, ) -ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") -ATOMIC64_FETCH_OP_AND(_release, l, "memory") -ATOMIC64_FETCH_OP_AND( , al, "memory") +ATOMIC64_FETCH_OP_AND(_relaxed) +ATOMIC64_FETCH_OP_AND(_acquire) +ATOMIC64_FETCH_OP_AND(_release) +ATOMIC64_FETCH_OP_AND( ) #undef ATOMIC64_FETCH_OP_AND diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index f5801b0ba9e9..9495c4441a46 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -7,7 +7,7 @@ #ifndef __ASM_BARRIER_H #define __ASM_BARRIER_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kasan-checks.h> @@ -221,6 +221,6 @@ do { \ #include <asm-generic/barrier.h> -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_BARRIER_H */ diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 09963004ceea..dd2c8586a725 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -35,7 +35,7 @@ #define ARCH_DMA_MINALIGN (128) #define ARCH_KMALLOC_MINALIGN (8) -#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) +#if !defined(__ASSEMBLER__) && !defined(BUILD_VDSO) #include <linux/bitops.h> #include <linux/kasan-enabled.h> @@ -135,6 +135,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) return ctr; } -#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */ +#endif /* !defined(__ASSEMBLER__) && !defined(BUILD_VDSO) */ #endif diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 9d769291a306..2c8029472ad4 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -5,7 +5,7 @@ #include <asm/cpucap-defs.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> /* * Check whether a cpucap is possible at compiletime. @@ -77,6 +77,6 @@ cpucap_is_possible(const unsigned int cap) return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e223cbf350e4..4de51f8d92cb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -19,7 +19,7 @@ #define ARM64_SW_FEATURE_OVERRIDE_HVHE 4 #define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bug.h> #include <linux/jump_label.h> @@ -199,7 +199,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * registers (e.g, SCTLR, TCR etc.) or patching the kernel via * alternatives. The kernel patching is batched and performed at later * point. The actions are always initiated only after the capability - * is finalised. This is usally denoted by "enabling" the capability. + * is finalised. This is usually denoted by "enabling" the capability. * The actions are initiated as follows : * a) Action is triggered on all online CPUs, after the capability is * finalised, invoked within the stop_machine() context from @@ -251,7 +251,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) #define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) /* - * The capabilitiy is detected on the Boot CPU and is used by kernel + * The capability is detected on the Boot CPU and is used by kernel * during early boot. i.e, the capability should be "detected" and * "enabled" as early as possibly on all booting CPUs. */ @@ -1078,6 +1078,6 @@ static inline bool cpu_has_lpa2(void) #endif } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9b00b75acbf2..08860d482e60 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -247,9 +247,9 @@ /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX #define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) -#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) +#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_EL1_NFD1 | TCR_EL1_NFD0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/sysreg.h> @@ -328,6 +328,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CTR_EL0); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h index 54ceae0874c7..c92912eaf186 100644 --- a/arch/arm64/include/asm/current.h +++ b/arch/arm64/include/asm/current.h @@ -4,7 +4,7 @@ #include <linux/compiler.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; @@ -23,7 +23,7 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_CURRENT_H */ diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index f5e3ed2420ce..8d5f92418838 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -48,7 +48,7 @@ #define AARCH32_BREAK_THUMB2_LO 0xf7f0 #define AARCH32_BREAK_THUMB2_HI 0xa000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; #define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ @@ -88,5 +88,5 @@ static inline bool try_step_suspended_breakpoints(struct pt_regs *regs) bool try_handle_aarch32_break(struct pt_regs *regs); -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index bcd5622aa096..aa91165ca140 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -126,21 +126,14 @@ static inline void efi_set_pgd(struct mm_struct *mm) if (mm != current->active_mm) { /* * Update the current thread's saved ttbr0 since it is - * restored as part of a return from exception. Enable - * access to the valid TTBR0_EL1 and invoke the errata - * workaround directly since there is no return from - * exception when invoking the EFI run-time services. + * restored as part of a return from exception. */ update_saved_ttbr0(current, mm); - uaccess_ttbr0_enable(); - post_ttbr_update_workaround(); } else { /* - * Defer the switch to the current thread's TTBR0_EL1 - * until uaccess_enable(). Restore the current - * thread's saved ttbr0 corresponding to its active_mm + * Restore the current thread's saved ttbr0 + * corresponding to its active_mm */ - uaccess_ttbr0_disable(); update_saved_ttbr0(current, current->active_mm); } } diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 99a7c0235e6d..cacd20df1786 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -7,7 +7,7 @@ #ifndef __ARM_KVM_INIT_H__ #define __ARM_KVM_INIT_H__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #error Assembly-only header #endif @@ -24,7 +24,7 @@ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it * can reset into an UNKNOWN state and might not read as 1 until it has * been initialized explicitly. - * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * Initialize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 3f93f4eef953..d2779d604c7b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -133,7 +133,7 @@ #define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) #endif /* CONFIG_ARM64_FORCE_52BIT */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <uapi/linux/elf.h> #include <linux/bug.h> @@ -293,6 +293,6 @@ static inline int arch_check_elf(void *ehdr, bool has_interp, return 0; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index e1deed824464..4975a92cbd17 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -431,7 +431,7 @@ #define ESR_ELx_IT_GCSPOPCX 6 #define ESR_ELx_IT_GCSPOPX 7 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/types.h> static inline unsigned long esr_brk_comment(unsigned long esr) @@ -534,6 +534,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr) } const char *esr_get_class_string(unsigned long esr); -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ESR_H */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 635a43c4ec85..65555284446e 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -15,7 +15,7 @@ #ifndef _ASM_ARM64_FIXMAP_H #define _ASM_ARM64_FIXMAP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> #include <linux/math.h> #include <linux/sizes.h> @@ -117,5 +117,5 @@ extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t pr #include <asm-generic/fixmap.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_ARM64_FIXMAP_H */ diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b8cf0ea43cc0..1d2e33559bd5 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -12,7 +12,7 @@ #include <asm/sigcontext.h> #include <asm/sysreg.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bitmap.h> #include <linux/build_bug.h> diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h index 2ae50bdce59b..751e88a96734 100644 --- a/arch/arm64/include/asm/fpu.h +++ b/arch/arm64/include/asm/fpu.h @@ -6,10 +6,22 @@ #ifndef __ASM_FPU_H #define __ASM_FPU_H +#include <linux/preempt.h> #include <asm/neon.h> #define kernel_fpu_available() cpu_has_neon() -#define kernel_fpu_begin() kernel_neon_begin() -#define kernel_fpu_end() kernel_neon_end() + +static inline void kernel_fpu_begin(void) +{ + BUG_ON(!in_task()); + preempt_disable(); + kernel_neon_begin(NULL); +} + +static inline void kernel_fpu_end(void) +{ + kernel_neon_end(NULL); + preempt_enable(); +} #endif /* ! __ASM_FPU_H */ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index ba7cf7fec5e9..1621c84f44b3 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -37,7 +37,7 @@ */ #define ARCH_FTRACE_SHIFT_STACK_TRACER 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compat.h> extern void _mcount(unsigned long); @@ -217,9 +217,9 @@ static inline bool arch_syscall_match_sym_name(const char *sym, */ return !strcmp(sym + 8, name); } -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_FUNCTION_GRAPH_TRACER void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h index 05da4a7c5788..a114e4f8209b 100644 --- a/arch/arm64/include/asm/gpr-num.h +++ b/arch/arm64/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 .equ .L__gpr_num_x\num, \num @@ -11,7 +11,7 @@ .equ .L__gpr_num_xzr, 31 .equ .L__gpr_num_wzr, 31 -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -21,6 +21,6 @@ " .equ .L__gpr_num_xzr, 31\n" \ " .equ .L__gpr_num_wzr, 31\n" -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d567265467c..1f63814ae6c4 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -46,7 +46,7 @@ #define COMPAT_HWCAP2_SB (1 << 5) #define COMPAT_HWCAP2_SSBS (1 << 6) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/log2.h> /* diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h index c09cf942dc92..9ba85173f857 100644 --- a/arch/arm64/include/asm/image.h +++ b/arch/arm64/include/asm/image.h @@ -20,7 +20,7 @@ #define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3 #define ARM64_IMAGE_FLAG_PHYS_BASE 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define arm64_image_flag_field(flags, field) \ (((flags) >> field##_SHIFT) & field##_MASK) @@ -54,6 +54,6 @@ struct arm64_image_header { __le32 res5; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 18c7811774d3..e1d30ba99d01 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -12,7 +12,7 @@ #include <asm/insn-def.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_NOP = 0x0 << 5, @@ -730,6 +730,6 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn); typedef bool (pstate_check_t)(unsigned long); extern pstate_check_t * const aarch32_opcode_cond_checks[16]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 424ed421cd97..0cb211d3607d 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -8,7 +8,7 @@ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/insn.h> @@ -58,5 +58,5 @@ l_yes: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index e1b57c13f8a4..b167e9d3da91 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> #include <asm/memory.h> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 4d9cc7a76d9c..892e5bebda95 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -25,7 +25,7 @@ #define KEXEC_ARCH KEXEC_ARCH_AARCH64 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /** * crash_setup_regs() - save registers for the panic kernel @@ -130,6 +130,6 @@ extern int load_other_segments(struct kimage *image, char *cmdline); #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index a81937fae9f6..21dbc9dda747 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -10,8 +10,6 @@ #include <asm/set_memory.h> -static inline bool arch_kfence_init_pool(void) { return true; } - static inline bool kfence_protect_page(unsigned long addr, bool protect) { set_memory_valid(addr, 1, !protect); @@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void) { return !kfence_early_init; } +bool arch_kfence_init_pool(void); #else /* CONFIG_KFENCE */ static inline bool arm64_kfence_can_set_direct_map(void) { return false; } #endif /* CONFIG_KFENCE */ diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index 3184f5d1e3ae..67ef1c5532ae 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -14,7 +14,7 @@ #include <linux/ptrace.h> #include <asm/debug-monitors.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void arch_kgdb_breakpoint(void) { @@ -36,7 +36,7 @@ static inline int kgdb_single_step_handler(struct pt_regs *regs, } #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * gdb remote procotol (well most versions of it) expects the following diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9da54d4ee49e..4b34f7b7ed2f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -46,7 +46,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm.h> @@ -303,7 +303,7 @@ void kvm_compute_final_ctr_el0(struct alt_instr *alt, void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ .macro get_host_ctxt reg, tmp adr_this_cpu \reg, kvm_host_data, \tmp diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e4069f2ce642..2dc5e6e742bb 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -49,7 +49,7 @@ * mappings, and none of this applies in that case. */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/alternative.h> @@ -396,5 +396,5 @@ void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} #endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h index de002636eb1f..3171963ad25c 100644 --- a/arch/arm64/include/asm/kvm_mte.h +++ b/arch/arm64/include/asm/kvm_mte.h @@ -5,7 +5,7 @@ #ifndef __ASM_KVM_MTE_H #define __ASM_KVM_MTE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/sysreg.h> @@ -62,5 +62,5 @@ alternative_else_nop_endif .endm #endif /* CONFIG_ARM64_MTE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_KVM_MTE_H */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 6199c9f7ec6e..e50987b32483 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -8,7 +8,7 @@ #ifndef __ASM_KVM_PTRAUTH_H #define __ASM_KVM_PTRAUTH_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/sysreg.h> @@ -100,7 +100,7 @@ alternative_else_nop_endif .endm #endif /* CONFIG_ARM64_PTR_AUTH */ -#else /* !__ASSEMBLY */ +#else /* !__ASSEMBLER__ */ #define __ptrauth_save_key(ctxt, key) \ do { \ @@ -120,5 +120,5 @@ alternative_else_nop_endif __ptrauth_save_key(ctxt, APGA); \ } while(0) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index d3acd9c87509..40bd17add539 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -1,7 +1,7 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/assembler.h> #endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f1505c4acb38..9d54b2ea49d6 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -207,7 +207,7 @@ */ #define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bitops.h> #include <linux/compiler.h> @@ -392,7 +392,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr) * virt_to_page(x) convert a _valid_ virtual address to struct page * * virt_addr_valid(x) indicates whether a virtual address is valid */ -#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if defined(CONFIG_DEBUG_VIRTUAL) #define page_to_virt(x) ({ \ @@ -422,7 +421,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) }) void dump_mem_limit(void); -#endif /* !ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * Given that the GIC architecture permits ITS implementations that can only be diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 78a4dbf75e60..137a173df1ff 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -12,7 +12,7 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/refcount.h> #include <asm/cpufeature.h> @@ -112,5 +112,5 @@ void kpti_install_ng_mappings(void); static inline void kpti_install_ng_mappings(void) {} #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0dbe3b29049b..cc80af59c69e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -8,7 +8,7 @@ #ifndef __ASM_MMU_CONTEXT_H #define __ASM_MMU_CONTEXT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/sched.h> @@ -62,29 +62,21 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) } /* - * TCR.T0SZ value to use when the ID map is active. - */ -#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS) - -/* * Ensure TCR.T0SZ is set to the provided value. */ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr = read_sysreg(tcr_el1); - if ((tcr & TCR_T0SZ_MASK) == t0sz) + if ((tcr & TCR_EL1_T0SZ_MASK) == t0sz) return; - tcr &= ~TCR_T0SZ_MASK; + tcr &= ~TCR_EL1_T0SZ_MASK; tcr |= t0sz; write_sysreg(tcr, tcr_el1); isb(); } -#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual)) -#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) - /* * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. * @@ -103,7 +95,7 @@ static inline void cpu_uninstall_idmap(void) cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual)); if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); @@ -113,7 +105,7 @@ static inline void cpu_install_idmap(void) { cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - cpu_set_idmap_tcr_t0sz(); + __cpu_set_tcr_t0sz(TCR_T0SZ(IDMAP_VA_BITS)); cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); } @@ -330,6 +322,6 @@ static inline void deactivate_mm(struct task_struct *tsk, #include <asm-generic/mmu_context.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 0f9b08e8fb8d..352139271918 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -9,7 +9,7 @@ #include <asm/cputype.h> #include <asm/mte-def.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -259,6 +259,6 @@ static inline int mte_enable_kernel_store_only(void) #endif /* CONFIG_ARM64_MTE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_MTE_KASAN_H */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 3b5069f4683d..6d4a78b9dc3e 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -8,7 +8,7 @@ #include <asm/compiler.h> #include <asm/mte-def.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bitfield.h> #include <linux/kasan-enabled.h> @@ -282,5 +282,5 @@ static inline void mte_check_tfsr_exit(void) } #endif /* CONFIG_KASAN_HW_TAGS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index d4b1d172a79b..acebee4605b5 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -13,7 +13,7 @@ #define cpu_has_neon() system_supports_fpsimd() -void kernel_neon_begin(void); -void kernel_neon_end(void); +void kernel_neon_begin(struct user_fpsimd_state *); +void kernel_neon_end(struct user_fpsimd_state *); #endif /* ! __ASM_NEON_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 2312e6ee595f..00f117ff4f7a 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -10,7 +10,7 @@ #include <asm/page-def.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ #include <linux/types.h> /* for gfp_t */ @@ -33,8 +33,8 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr); #define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio -void tag_clear_highpage(struct page *to); -#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE +bool tag_clear_highpages(struct page *to, int numpages); +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) @@ -45,7 +45,7 @@ int pfn_is_map_memory(unsigned long pfn); #include <asm/memory.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 9abcc8ef3087..b57b2bb00967 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ " cbnz %w[loop], 1b", \ /* LSE atomics */ \ - #op_lse "\t%" #w "[val], %[ptr]\n" \ + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ __nops(3)) \ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ [ptr] "+Q"(*(u##sz *)ptr) \ @@ -124,9 +124,16 @@ PERCPU_RW_OPS(8) PERCPU_RW_OPS(16) PERCPU_RW_OPS(32) PERCPU_RW_OPS(64) -PERCPU_OP(add, add, stadd) -PERCPU_OP(andnot, bic, stclr) -PERCPU_OP(or, orr, stset) + +/* + * Use value-returning atomics for CPU-local ops as they are more likely + * to execute "near" to the CPU (e.g. in L1$). + * + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop + */ +PERCPU_OP(add, add, ldadd) +PERCPU_OP(andnot, bic, ldclr) +PERCPU_OP(or, orr, ldset) PERCPU_RET_OP(add, add, ldadd) #undef PERCPU_RW_OPS diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f3b77deedfa2..d49180bb7cb3 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -228,102 +228,53 @@ /* * TCR flags. */ -#define TCR_T0SZ_OFFSET 0 -#define TCR_T1SZ_OFFSET 16 -#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) -#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) -#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) -#define TCR_TxSZ_WIDTH 6 -#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) -#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET) - -#define TCR_EPD0_SHIFT 7 -#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT) -#define TCR_IRGN0_SHIFT 8 -#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) - -#define TCR_EPD1_SHIFT 23 -#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT) -#define TCR_IRGN1_SHIFT 24 -#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT) - -#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC) -#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) -#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT) -#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA) -#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK) - - -#define TCR_ORGN0_SHIFT 10 -#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) - -#define TCR_ORGN1_SHIFT 26 -#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT) - -#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC) -#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA) -#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT) -#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA) -#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK) - -#define TCR_SH0_SHIFT 12 -#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) -#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) - -#define TCR_SH1_SHIFT 28 -#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT) -#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT) -#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER) - -#define TCR_TG0_SHIFT 14 -#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) -#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) -#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) -#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) - -#define TCR_TG1_SHIFT 30 -#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT) -#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT) -#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) -#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) - -#define TCR_IPS_SHIFT 32 -#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) -#define TCR_A1 (UL(1) << 22) -#define TCR_ASID16 (UL(1) << 36) -#define TCR_TBI0 (UL(1) << 37) -#define TCR_TBI1 (UL(1) << 38) -#define TCR_HA (UL(1) << 39) -#define TCR_HD (UL(1) << 40) -#define TCR_HPD0_SHIFT 41 -#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT) -#define TCR_HPD1_SHIFT 42 -#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT) -#define TCR_TBID0 (UL(1) << 51) -#define TCR_TBID1 (UL(1) << 52) -#define TCR_NFD0 (UL(1) << 53) -#define TCR_NFD1 (UL(1) << 54) -#define TCR_E0PD0 (UL(1) << 55) -#define TCR_E0PD1 (UL(1) << 56) -#define TCR_TCMA0 (UL(1) << 57) -#define TCR_TCMA1 (UL(1) << 58) -#define TCR_DS (UL(1) << 59) +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_EL1_T0SZ_SHIFT) +#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_EL1_T1SZ_SHIFT) + +#define TCR_T0SZ_MASK TCR_EL1_T0SZ_MASK +#define TCR_T1SZ_MASK TCR_EL1_T1SZ_MASK + +#define TCR_EPD0_MASK TCR_EL1_EPD0_MASK +#define TCR_EPD1_MASK TCR_EL1_EPD1_MASK + +#define TCR_IRGN0_MASK TCR_EL1_IRGN0_MASK +#define TCR_IRGN0_WBWA (TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT) + +#define TCR_ORGN0_MASK TCR_EL1_ORGN0_MASK +#define TCR_ORGN0_WBWA (TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT) + +#define TCR_SH0_MASK TCR_EL1_SH0_MASK +#define TCR_SH0_INNER (TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT) + +#define TCR_SH1_MASK TCR_EL1_SH1_MASK + +#define TCR_TG0_SHIFT TCR_EL1_TG0_SHIFT +#define TCR_TG0_MASK TCR_EL1_TG0_MASK +#define TCR_TG0_4K (TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT) +#define TCR_TG0_64K (TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT) +#define TCR_TG0_16K (TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT) + +#define TCR_TG1_SHIFT TCR_EL1_TG1_SHIFT +#define TCR_TG1_MASK TCR_EL1_TG1_MASK +#define TCR_TG1_16K (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT) +#define TCR_TG1_4K (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT) +#define TCR_TG1_64K (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT) + +#define TCR_IPS_SHIFT TCR_EL1_IPS_SHIFT +#define TCR_IPS_MASK TCR_EL1_IPS_MASK +#define TCR_A1 TCR_EL1_A1 +#define TCR_ASID16 TCR_EL1_AS +#define TCR_TBI0 TCR_EL1_TBI0 +#define TCR_TBI1 TCR_EL1_TBI1 +#define TCR_HA TCR_EL1_HA +#define TCR_HD TCR_EL1_HD +#define TCR_HPD0 TCR_EL1_HPD0 +#define TCR_HPD1 TCR_EL1_HPD1 +#define TCR_TBID0 TCR_EL1_TBID0 +#define TCR_TBID1 TCR_EL1_TBID1 +#define TCR_E0PD0 TCR_EL1_E0PD0 +#define TCR_E0PD1 TCR_EL1_E0PD1 +#define TCR_DS TCR_EL1_DS /* * TTBR. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 85dceb1c66f4..161e8660eddd 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -62,7 +62,7 @@ #define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) #define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/cpufeature.h> #include <asm/pgtable-types.h> @@ -84,7 +84,7 @@ extern unsigned long prot_ns_shared; #else static inline bool __pure lpa2_is_enabled(void) { - return read_tcr() & TCR_DS; + return read_tcr() & TCR_EL1_DS; } #define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED) @@ -127,7 +127,7 @@ static inline bool __pure lpa2_is_enabled(void) #define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC) #define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define pte_pi_index(pte) ( \ ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0944e296dd4a..64d5f1d9cce9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -30,7 +30,7 @@ #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/cmpxchg.h> #include <asm/fixmap.h> @@ -130,12 +130,16 @@ static inline void arch_leave_lazy_mmu_mode(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * Outside of a few very special situations (e.g. hibernation), we always - * use broadcast TLB invalidation instructions, therefore a spurious page - * fault on one CPU which has been handled concurrently by another CPU - * does not need to perform additional invalidation. + * We use local TLB invalidation instruction when reusing page in + * write protection fault handler to avoid TLBI broadcast in the hot + * path. This will cause spurious page faults if stale read-only TLB + * entries exist. */ -#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) \ + local_flush_tlb_page_nonotify(vma, address) + +#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \ + local_flush_tlb_page_nonotify(vma, address) /* * ZERO_PAGE is a global shared page that is always zero: used @@ -433,7 +437,7 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new); * 1 0 | 1 0 1 * 1 1 | 0 1 x * - * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via + * When hardware DBM is not present, the software PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) @@ -599,7 +603,7 @@ static inline int pte_protnone(pte_t pte) /* * pte_present_invalid() tells us that the pte is invalid from HW * perspective but present from SW perspective, so the fields are to be - * interpretted as per the HW layout. The second 2 checks are the unique + * interpreted as per the HW layout. The second 2 checks are the unique * encoding that we use for PROT_NONE. It is insufficient to only use * the first check because we share the same encoding scheme with pmds * which support pmd_mkinvalid(), so can be present-invalid without @@ -1949,6 +1953,6 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, #endif /* CONFIG_ARM64_CONTPTE */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 0d5d1f0525eb..ab78a78821a2 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -9,7 +9,7 @@ #ifndef __ASM_PROCFNS_H #define __ASM_PROCFNS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/page.h> @@ -21,5 +21,5 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include <asm/memory.h> -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 61d62bfd5a7b..e30c4c8e3a7a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -25,7 +25,7 @@ #define MTE_CTRL_STORE_ONLY (1UL << 19) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/build_bug.h> #include <linux/cache.h> @@ -172,7 +172,12 @@ struct thread_struct { unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ - struct user_fpsimd_state kernel_fpsimd_state; + /* + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the + * address of a caller provided buffer that will be used to preserve a + * task's kernel mode FPSIMD state while it is scheduled out. + */ + struct user_fpsimd_state *kernel_fpsimd_state; unsigned int kernel_fpsimd_cpu; #ifdef CONFIG_ARM64_PTR_AUTH struct ptrauth_keys_user keys_user; @@ -437,5 +442,5 @@ int set_tsc_mode(unsigned int val); #define GET_TSC_CTL(adr) get_tsc_mode((adr)) #define SET_TSC_CTL(val) set_tsc_mode((val)) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 65b053a24d82..39582511ad72 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -94,7 +94,7 @@ */ #define NO_SYSCALL (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bug.h> #include <linux/types.h> @@ -361,5 +361,5 @@ static inline void procedure_link_pointer_set(struct pt_regs *regs, extern unsigned long profile_pc(struct pt_regs *regs); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h index 6cb070eca9e9..e19253f96c94 100644 --- a/arch/arm64/include/asm/rsi_smc.h +++ b/arch/arm64/include/asm/rsi_smc.h @@ -122,7 +122,7 @@ */ #define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct realm_config { union { @@ -142,7 +142,7 @@ struct realm_config { */ } __aligned(0x1000); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Read configuration for the current Realm. diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 97d9256d33c9..78beceec10cd 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -5,7 +5,7 @@ #ifndef __ASM_RWONCE_H #define __ASM_RWONCE_H -#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__) +#if defined(CONFIG_LTO) && !defined(__ASSEMBLER__) #include <linux/compiler_types.h> #include <asm/alternative-macros.h> @@ -62,7 +62,7 @@ }) #endif /* !BUILD_VDSO */ -#endif /* CONFIG_LTO && !__ASSEMBLY__ */ +#endif /* CONFIG_LTO && !__ASSEMBLER__ */ #include <asm-generic/rwonce.h> diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index a76f9b387a26..0fbc2e7867d3 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -2,7 +2,7 @@ #ifndef _ASM_SCS_H #define _ASM_SCS_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm-offsets.h> #include <asm/sysreg.h> @@ -53,8 +53,8 @@ enum { EDYNSCS_INVALID_CFA_OPCODE = 4, }; -int __pi_scs_patch(const u8 eh_frame[], int size); +int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); -#endif /* __ASSEMBLY __ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 484cb6972e99..b2248bd3cb58 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -9,7 +9,7 @@ #define SDEI_STACK_SIZE IRQ_STACK_SIZE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> #include <linux/preempt.h> @@ -49,5 +49,5 @@ unsigned long do_sdei_event(struct pt_regs *regs, unsigned long sdei_arch_get_entry_point(int conduit); #define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SDEI_H */ diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 8e86c9e70e48..0941f6f58a14 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -6,12 +6,15 @@ #ifndef __ASM_SIMD_H #define __ASM_SIMD_H +#include <linux/cleanup.h> #include <linux/compiler.h> #include <linux/irqflags.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/types.h> +#include <asm/neon.h> + #ifdef CONFIG_KERNEL_MODE_NEON /* @@ -29,7 +32,7 @@ static __must_check inline bool may_use_simd(void) */ return !WARN_ON(!system_capabilities_finalized()) && system_supports_fpsimd() && - !in_hardirq() && !irqs_disabled() && !in_nmi(); + !in_hardirq() && !in_nmi(); } #else /* ! CONFIG_KERNEL_MODE_NEON */ @@ -40,4 +43,11 @@ static __must_check inline bool may_use_simd(void) { #endif /* ! CONFIG_KERNEL_MODE_NEON */ +DEFINE_LOCK_GUARD_1(ksimd, + struct user_fpsimd_state, + kernel_neon_begin(_T->lock), + kernel_neon_end(_T->lock)) + +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) + #endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d48ef6d5abcc..10ea4f543069 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -23,7 +23,7 @@ #define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT) #define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/threads.h> #include <linux/cpumask.h> @@ -155,6 +155,6 @@ bool cpus_are_stuck_in_kernel(void); extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 8fef12626090..296ae3420bfd 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -12,7 +12,7 @@ #define BP_HARDEN_EL2_SLOTS 4 #define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/smp.h> #include <asm/percpu.h> @@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void spectre_bhb_patch_clearbhb(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_print_disabled_mitigations(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h index 0ee0f6ba0fd8..796797b8db7e 100644 --- a/arch/arm64/include/asm/stacktrace/frame.h +++ b/arch/arm64/include/asm/stacktrace/frame.h @@ -25,7 +25,7 @@ #define FRAME_META_TYPE_FINAL 1 #define FRAME_META_TYPE_PT_REGS 2 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * A standard AAPCS64 frame record. */ @@ -43,6 +43,6 @@ struct frame_record_meta { struct frame_record record; u64 type; }; -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_STACKTRACE_FRAME_H */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 0cde2f473971..e65f33edf9d6 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -23,7 +23,7 @@ struct cpu_suspend_ctx { * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter(). * This data must survive until cpu_resume() is called. * - * This struct desribes the size and the layout of the saved cpu state. + * This struct describes the size and the layout of the saved cpu state. * The layout of the callee_saved_regs is defined by the implementation * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c231d2a3e515..9df51accbb02 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -52,7 +52,7 @@ #ifndef CONFIG_BROKEN_GAS_INST -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ // The space separator is omitted so that __emit_inst(x) can be parsed as // either an assembler directive or an assembler macro argument. #define __emit_inst(x) .inst(x) @@ -71,11 +71,11 @@ (((x) >> 24) & 0x000000ff)) #endif /* CONFIG_CPU_BIG_ENDIAN */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __emit_inst(x) .long __INSTR_BSWAP(x) -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t" -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_BROKEN_GAS_INST */ @@ -1129,9 +1129,7 @@ #define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn) #define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn) -#define ARM64_FEATURE_FIELD_BITS 4 - -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro mrs_s, rt, sreg __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 344b1c1a4bbb..d316a804eb38 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -7,7 +7,7 @@ #ifndef __ASM_SYSTEM_MISC_H #define __ASM_SYSTEM_MISC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/linkage.h> @@ -28,6 +28,6 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, struct mm_struct; extern void __show_regs(struct pt_regs *); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f241b8601ebd..a803b887b0b4 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -10,7 +10,7 @@ #include <linux/compiler.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 18a5dc0c9a54..a2d65d7d6aae 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -8,7 +8,7 @@ #ifndef __ASM_TLBFLUSH_H #define __ASM_TLBFLUSH_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bitfield.h> #include <linux/mm_types.h> @@ -249,6 +249,19 @@ static inline unsigned long get_trans_granule(void) * cannot be easily determined, the value TLBI_TTL_UNKNOWN will * perform a non-hinted invalidation. * + * local_flush_tlb_page(vma, addr) + * Local variant of flush_tlb_page(). Stale TLB entries may + * remain in remote CPUs. + * + * local_flush_tlb_page_nonotify(vma, addr) + * Same as local_flush_tlb_page() except MMU notifier will not be + * called. + * + * local_flush_tlb_contpte(vma, addr) + * Invalidate the virtual-address range + * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU + * for the user address space corresponding to 'vma->mm'. Stale + * TLB entries may remain in remote CPUs. * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented * on top of these routines, since that is our interface to the mmu_gather @@ -282,6 +295,33 @@ static inline void flush_tlb_mm(struct mm_struct *mm) mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } +static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm, + unsigned long uaddr) +{ + unsigned long addr; + + dsb(nshst); + addr = __TLBI_VADDR(uaddr, ASID(mm)); + __tlbi(vale1, addr); + __tlbi_user(vale1, addr); +} + +static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma, + unsigned long uaddr) +{ + __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr); + dsb(nsh); +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long uaddr) +{ + __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr); + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); + dsb(nsh); +} + static inline void __flush_tlb_page_nosync(struct mm_struct *mm, unsigned long uaddr) { @@ -472,6 +512,22 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } +static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, + unsigned long addr) +{ + unsigned long asid; + + addr = round_down(addr, CONT_PTE_SIZE); + + dsb(nshst); + asid = ASID(vma->vm_mm); + __flush_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, + 3, true, lpa2_is_enabled()); + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr, + addr + CONT_PTE_SIZE); + dsb(nsh); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { @@ -524,6 +580,33 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b { __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); } + +static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval) +{ + ptdesc_t diff = oldval ^ newval; + + /* invalid to valid transition requires no flush */ + if (!(oldval & PTE_VALID)) + return false; + + /* Transition in the SW bits requires no flush */ + diff &= ~PTE_SWBITS_MASK; + + return diff; +} + +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); +} +#define pte_needs_flush pte_needs_flush + +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + #endif #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 1aa4ecb73429..6490930deef8 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -422,9 +422,9 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt } #define user_access_begin(a,b) user_access_begin(a,b) #define user_access_end() uaccess_ttbr0_disable() -#define unsafe_put_user(x, ptr, label) \ +#define arch_unsafe_put_user(x, ptr, label) \ __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) -#define unsafe_get_user(x, ptr, label) \ +#define arch_unsafe_get_user(x, ptr, label) \ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) /* diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 61679070f595..232b46969088 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -7,7 +7,7 @@ #define __VDSO_PAGES 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <generated/vdso-offsets.h> @@ -19,6 +19,6 @@ extern char vdso_start[], vdso_end[]; extern char vdso32_start[], vdso32_end[]; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index 6d75e03d3827..d7ebe7ceefa0 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -5,7 +5,7 @@ #ifndef __COMPAT_BARRIER_H #define __COMPAT_BARRIER_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Warning: This code is meant to be used from the compat vDSO only. */ @@ -31,6 +31,6 @@ #define smp_rmb() aarch32_smp_rmb() #define smp_wmb() aarch32_smp_wmb() -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __COMPAT_BARRIER_H */ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 7d1a116549b1..0d513f924321 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_COMPAT_GETTIMEOFDAY_H #define __ASM_VDSO_COMPAT_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/barrier.h> #include <asm/unistd_compat_32.h> @@ -161,6 +161,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) } #define vdso_clocksource_ok vdso_clocksource_ok -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_COMPAT_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h index a2197da1951b..da1d58bbfabe 100644 --- a/arch/arm64/include/asm/vdso/getrandom.h +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -3,7 +3,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vsyscall.h> @@ -33,6 +33,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index c59e84105b43..3658a757e255 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,7 +7,7 @@ #ifdef __aarch64__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/alternative.h> #include <asm/arch_timer.h> @@ -96,7 +96,7 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( #define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data #endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #else /* !__aarch64__ */ diff --git a/arch/arm64/include/asm/vdso/processor.h b/arch/arm64/include/asm/vdso/processor.h index ff830b766ad2..7abb0cc81cd6 100644 --- a/arch/arm64/include/asm/vdso/processor.h +++ b/arch/arm64/include/asm/vdso/processor.h @@ -5,13 +5,13 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void cpu_relax(void) { asm volatile("yield" ::: "memory"); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 417aae5763a8..3f3c8eb74e2e 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,7 +2,7 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> @@ -22,6 +22,6 @@ void __arch_update_vdso_clock(struct vdso_clock *vc) /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index aa280f356b96..530af9620fdb 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -56,7 +56,7 @@ */ #define BOOT_CPU_FLAG_E2H BIT_ULL(32) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/sections.h> @@ -161,6 +161,6 @@ static inline bool is_hyp_nvhe(void) return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 20873099c035..75daee1a07e9 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -3,9 +3,7 @@ #ifndef __ASM_VMAP_STACK_H #define __ASM_VMAP_STACK_H -#include <linux/bug.h> #include <linux/gfp.h> -#include <linux/kconfig.h> #include <linux/vmalloc.h> #include <linux/pgtable.h> #include <asm/memory.h> @@ -19,8 +17,6 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { void *p; - BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, __builtin_return_address(0)); return kasan_reset_tag(p); diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index befcd8a7abc9..c38e3d017a79 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -9,7 +9,7 @@ #include <linux/hardirq.h> #include <asm-generic/xor.h> #include <asm/hwcap.h> -#include <asm/neon.h> +#include <asm/simd.h> #ifdef CONFIG_KERNEL_MODE_NEON @@ -19,9 +19,8 @@ static void xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { - kernel_neon_begin(); - xor_block_inner_neon.do_2(bytes, p1, p2); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_2(bytes, p1, p2); } static void @@ -29,9 +28,8 @@ xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { - kernel_neon_begin(); - xor_block_inner_neon.do_3(bytes, p1, p2, p3); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_3(bytes, p1, p2, p3); } static void @@ -40,9 +38,8 @@ xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p3, const unsigned long * __restrict p4) { - kernel_neon_begin(); - xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); } static void @@ -52,9 +49,8 @@ xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4, const unsigned long * __restrict p5) { - kernel_neon_begin(); - xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); } static struct xor_block_template xor_block_arm64 = { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ed5f3892674c..a792a599b9d6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -31,7 +31,7 @@ #define KVM_SPSR_FIQ 4 #define KVM_NR_SPSR 5 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/psci.h> #include <linux/types.h> #include <asm/ptrace.h> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 0f39ba4f3efd..6fed93fb2536 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -80,7 +80,7 @@ #define PTRACE_PEEKMTETAGS 33 #define PTRACE_POKEMTETAGS 34 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * User structures for general purpose, floating point and debug registers. @@ -332,6 +332,6 @@ struct user_gcs { __u64 gcspr_el0; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index d42f7a92238b..e29bf3e2d0cc 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -17,7 +17,7 @@ #ifndef _UAPI__ASM_SIGCONTEXT_H #define _UAPI__ASM_SIGCONTEXT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -192,7 +192,7 @@ struct gcs_context { __u64 reserved; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #include <asm/sve_context.h> diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..af90128cfed5 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -133,7 +133,7 @@ static int __init acpi_fadt_sanity_check(void) /* * FADT is required on arm64; retrieve it to check its presence - * and carry out revision and ACPI HW reduced compliancy tests + * and carry out revision and ACPI HW reduced compliance tests */ status = acpi_get_table(ACPI_SIG_FADT, 0, &table); if (ACPI_FAILURE(status)) { @@ -197,8 +197,6 @@ out: */ void __init acpi_boot_table_init(void) { - int ret; - /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -252,12 +250,8 @@ done: * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, + acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) - pr_info("Use ACPI SPCR as default console: No\n"); - else - pr_info("Use ACPI SPCR as default console: Yes\n"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); @@ -357,16 +351,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) * as long as we take care not to create a writable * mapping for executable code. */ - fallthrough; - - case EFI_ACPI_MEMORY_NVS: - /* - * ACPI NVS marks an area reserved for use by the - * firmware, even after exiting the boot service. - * This may be used by the firmware for sharing dynamic - * tables/data (e.g., ACPI CCEL) with the OS. Map it - * as read-only. - */ prot = PAGE_KERNEL_RO; break; @@ -439,7 +423,7 @@ int apei_claim_sea(struct pt_regs *regs) irq_work_run(); __irq_exit(); } else { - pr_warn_ratelimited("APEI work queued but not completed"); + pr_warn_ratelimited("APEI work queued but not completed\n"); err = -EINPROGRESS; } } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8ff6610af496..f5ec7e7c1d3f 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(const struct alt_region *region, - bool is_module, - unsigned long *cpucap_mask) +static int __apply_alternatives(const struct alt_region *region, + bool is_module, + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region, updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - if (ALT_HAS_CB(alt)) + if (ALT_HAS_CB(alt)) { alt_cb = ALT_REPL_PTR(alt); - else + if (is_module && !core_kernel_text((unsigned long)alt_cb)) + return -ENOEXEC; + } else { alt_cb = patch_alternative; + } alt_cb(alt, origptr, updptr, nr_inst); @@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region, bitmap_and(applied_alternatives, applied_alternatives, system_cpucaps, ARM64_NCAPS); } + + return 0; } static void __init apply_alternatives_vdso(void) @@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void) } #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length) +int apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, @@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length) bitmap_fill(all_capabilities, ARM64_NCAPS); - __apply_alternatives(®ion, true, &all_capabilities[0]); + return __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..42b182cfa404 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -95,6 +95,7 @@ #include <asm/vectors.h> #include <asm/virt.h> +#include <asm/spectre.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -1002,7 +1003,7 @@ static void __init sort_ftr_regs(void) /* * Initialise the CPU feature register from Boot CPU values. - * Also initiliases the strict_mask for the register. + * Also initialises the strict_mask for the register. * Any bits that are not covered by an arm64_ftr_bits entry are considered * RES0 for the system-wide value, and must strictly match. */ @@ -1969,7 +1970,7 @@ static struct cpumask dbm_cpus __read_mostly; static inline void __cpu_enable_hw_dbm(void) { - u64 tcr = read_sysreg(tcr_el1) | TCR_HD; + u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_HD; write_sysreg(tcr, tcr_el1); isb(); @@ -2255,7 +2256,7 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) { if (this_cpu_has_cap(ARM64_HAS_E0PD)) - sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); + sysreg_clear_set(tcr_el1, 0, TCR_EL1_E0PD1); } #endif /* CONFIG_ARM64_E0PD */ @@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void) */ if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + + /* + * Report Spectre mitigations status. + */ + spectre_print_disabled_mitigations(); } void __init setup_system_features(void) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 6c371b158b99..a81cb4aa4738 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -10,6 +10,7 @@ #include <linux/efi.h> #include <linux/init.h> #include <linux/kmemleak.h> +#include <linux/kthread.h> #include <linux/screen_info.h> #include <linux/vmalloc.h> @@ -165,20 +166,53 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) return s; } -static DEFINE_RAW_SPINLOCK(efi_rt_lock); - void arch_efi_call_virt_setup(void) { - efi_virtmap_load(); - raw_spin_lock(&efi_rt_lock); + efi_runtime_assert_lock_held(); + + if (preemptible() && (current->flags & PF_KTHREAD)) { + /* + * Disable migration to ensure that a preempted EFI runtime + * service call will be resumed on the same CPU. This avoids + * potential issues with EFI runtime calls that are preempted + * while polling for an asynchronous completion of a secure + * firmware call, which may not permit the CPU to change. + */ + migrate_disable(); + kthread_use_mm(&efi_mm); + } else { + efi_virtmap_load(); + } + + /* + * Enable access to the valid TTBR0_EL1 and invoke the errata + * workaround directly since there is no return from exception when + * invoking the EFI run-time services. + */ + uaccess_ttbr0_enable(); + post_ttbr_update_workaround(); + __efi_fpsimd_begin(); } void arch_efi_call_virt_teardown(void) { __efi_fpsimd_end(); - raw_spin_unlock(&efi_rt_lock); - efi_virtmap_unload(); + + /* + * Defer the switch to the current thread's TTBR0_EL1 until + * uaccess_enable(). Do so before efi_virtmap_unload() updates the + * saved TTBR0 value, so the userland page tables are not activated + * inadvertently over the back of an exception. + */ + uaccess_ttbr0_disable(); + + if (preemptible() && (current->flags & PF_KTHREAD)) { + kthread_unuse_mm(&efi_mm); + migrate_enable(); + } else { + efi_virtmap_unload(); + } } asmlinkage u64 *efi_rt_stack_top __ro_after_init; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index a9c81715ce59..3625797e9ee8 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -34,20 +34,12 @@ * Handle IRQ/context state management when entering from kernel mode. * Before this function is called it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. - * - * This is intended to match the logic in irqentry_enter(), handling the kernel - * mode transitions only. */ -static __always_inline irqentry_state_t __enter_from_kernel_mode(struct pt_regs *regs) -{ - return irqentry_enter(regs); -} - static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs) { irqentry_state_t state; - state = __enter_from_kernel_mode(regs); + state = irqentry_enter(regs); mte_check_tfsr_entry(); mte_disable_tco_entry(current); @@ -58,21 +50,12 @@ static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs) * Handle IRQ/context state management when exiting to kernel mode. * After this function returns it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. - * - * This is intended to match the logic in irqentry_exit(), handling the kernel - * mode transitions only, and with preemption handled elsewhere. */ -static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs, - irqentry_state_t state) -{ - irqentry_exit(regs, state); -} - static void noinstr exit_to_kernel_mode(struct pt_regs *regs, irqentry_state_t state) { mte_check_tfsr_exit(); - __exit_to_kernel_mode(regs, state); + irqentry_exit(regs, state); } /* @@ -80,17 +63,12 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs, * Before this function is called it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. */ -static __always_inline void __enter_from_user_mode(struct pt_regs *regs) +static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) { enter_from_user_mode(regs); mte_disable_tco_entry(current); } -static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) -{ - __enter_from_user_mode(regs); -} - /* * Handle IRQ/context state management when exiting to user mode. * After this function returns it is not safe to call regular kernel code, @@ -100,7 +78,7 @@ static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs) { local_irq_disable(); - exit_to_user_mode_prepare(regs); + exit_to_user_mode_prepare_legacy(regs); local_daif_mask(); mte_check_tfsr_exit(); exit_to_user_mode(); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 169ccf600066..025140caafe7 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -94,7 +94,7 @@ SYM_CODE_START(ftrace_caller) stp x29, x30, [sp, #FREGS_SIZE] add x29, sp, #FREGS_SIZE - /* Prepare arguments for the the tracer func */ + /* Prepare arguments for the tracer func */ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn) mov x1, x9 // parent_ip (callsite's LR) mov x3, sp // regs diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e3f8f51748bc..c154f72634e0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -225,10 +225,21 @@ static void fpsimd_bind_task_to_cpu(void); */ static void get_cpu_fpsimd_context(void) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - local_bh_disable(); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * The softirq subsystem lacks a true unmask/mask API, and + * re-enabling softirq processing using local_bh_enable() will + * not only unmask softirqs, it will also result in immediate + * delivery of any pending softirqs. + * This is undesirable when running with IRQs disabled, but in + * that case, there is no need to mask softirqs in the first + * place, so only bother doing so when IRQs are enabled. + */ + if (!irqs_disabled()) + local_bh_disable(); + } else { preempt_disable(); + } } /* @@ -240,10 +251,12 @@ static void get_cpu_fpsimd_context(void) */ static void put_cpu_fpsimd_context(void) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - local_bh_enable(); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + if (!irqs_disabled()) + local_bh_enable(); + } else { preempt_enable(); + } } unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) @@ -1489,21 +1502,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task) * Elide the load if this CPU holds the most recent kernel mode * FPSIMD context of the current task. */ - if (last->st == &task->thread.kernel_fpsimd_state && + if (last->st == task->thread.kernel_fpsimd_state && task->thread.kernel_fpsimd_cpu == smp_processor_id()) return; - fpsimd_load_state(&task->thread.kernel_fpsimd_state); + fpsimd_load_state(task->thread.kernel_fpsimd_state); } static void fpsimd_save_kernel_state(struct task_struct *task) { struct cpu_fp_state cpu_fp_state = { - .st = &task->thread.kernel_fpsimd_state, + .st = task->thread.kernel_fpsimd_state, .to_save = FP_STATE_FPSIMD, }; - fpsimd_save_state(&task->thread.kernel_fpsimd_state); + BUG_ON(!cpu_fp_state.st); + + fpsimd_save_state(task->thread.kernel_fpsimd_state); fpsimd_bind_state_to_cpu(&cpu_fp_state); task->thread.kernel_fpsimd_cpu = smp_processor_id(); @@ -1774,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; + t->thread.kernel_fpsimd_state = NULL; /* * If we don't support fpsimd, bail out after we have * reset the fpsimd_cpu for this task and clear the @@ -1833,12 +1849,19 @@ void fpsimd_save_and_flush_cpu_state(void) * * The caller may freely use the FPSIMD registers until kernel_neon_end() is * called. + * + * Unless called from non-preemptible task context, @state must point to a + * caller provided buffer that will be used to preserve the task's kernel mode + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel + * mode FPSIMD occurring in softirq context. May be %NULL otherwise. */ -void kernel_neon_begin(void) +void kernel_neon_begin(struct user_fpsimd_state *state) { if (WARN_ON(!system_supports_fpsimd())) return; + WARN_ON((preemptible() || in_serving_softirq()) && !state); + BUG_ON(!may_use_simd()); get_cpu_fpsimd_context(); @@ -1846,7 +1869,7 @@ void kernel_neon_begin(void) /* Save unsaved fpsimd state, if any: */ if (test_thread_flag(TIF_KERNEL_FPSTATE)) { BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); - fpsimd_save_kernel_state(current); + fpsimd_save_state(state); } else { fpsimd_save_user_state(); @@ -1867,8 +1890,16 @@ void kernel_neon_begin(void) * mode in task context. So in this case, setting the flag here * is always appropriate. */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) { + /* + * Record the caller provided buffer as the kernel mode + * FP/SIMD buffer for this task, so that the state can + * be preserved and restored on a context switch. + */ + WARN_ON(current->thread.kernel_fpsimd_state != NULL); + current->thread.kernel_fpsimd_state = state; set_thread_flag(TIF_KERNEL_FPSTATE); + } } /* Invalidate any task state remaining in the fpsimd regs: */ @@ -1886,22 +1917,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin); * * The caller must not use the FPSIMD registers after this function is called, * unless kernel_neon_begin() is called again in the meantime. + * + * The value of @state must match the value passed to the preceding call to + * kernel_neon_begin(). */ -void kernel_neon_end(void) +void kernel_neon_end(struct user_fpsimd_state *state) { if (!system_supports_fpsimd()) return; + if (!test_thread_flag(TIF_KERNEL_FPSTATE)) + return; + /* * If we are returning from a nested use of kernel mode FPSIMD, restore * the task context kernel mode FPSIMD state. This can only happen when * running in softirq context on non-PREEMPT_RT. */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && - test_thread_flag(TIF_KERNEL_FPSTATE)) - fpsimd_load_kernel_state(current); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) { + fpsimd_load_state(state); + } else { clear_thread_flag(TIF_KERNEL_FPSTATE); + WARN_ON(current->thread.kernel_fpsimd_state != state); + current->thread.kernel_fpsimd_state = NULL; + } } EXPORT_SYMBOL_GPL(kernel_neon_end); @@ -1934,11 +1973,11 @@ void __efi_fpsimd_begin(void) if (!system_supports_fpsimd()) return; - WARN_ON(preemptible()); - if (may_use_simd()) { - kernel_neon_begin(); + kernel_neon_begin(&efi_fpsimd_state); } else { + WARN_ON(preemptible()); + /* * If !efi_sve_state, SVE can't be in use yet and doesn't need * preserving: @@ -1986,7 +2025,7 @@ void __efi_fpsimd_end(void) return; if (!efi_fpsimd_state_used) { - kernel_neon_end(); + kernel_neon_end(&efi_fpsimd_state); } else { if (system_supports_sve() && efi_sve_state_used) { bool ffr = true; diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 5adad37ab4fa..5a1554a44162 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -492,7 +492,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return ret; /* - * When using mcount, callsites in modules may have been initalized to + * When using mcount, callsites in modules may have been initialized to * call an arbitrary module PLT (which redirects to the _mcount stub) * rather than the ftrace PLT we'll use at runtime (which redirects to * the ftrace trampoline). We can ignore the old PLT when initializing diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index c0065a1d77cf..15dedb385b9e 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -62,7 +62,7 @@ static void __init init_irq_stacks(void) } } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static void ____do_softirq(struct pt_regs *regs) { __do_softirq(); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 6f121a0164a4..239c16e3d02f 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -251,7 +251,7 @@ void crash_post_resume(void) * marked as Reserved as memory was allocated via memblock_reserve(). * * In hibernation, the pages which are Reserved and yet "nosave" are excluded - * from the hibernation iamge. crash_is_nosave() does thich check for crash + * from the hibernation image. crash_is_nosave() does thich check for crash * dump kernel and will reduce the total size of hibernation image. */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d6d443c4a01a..24adb581af0e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -489,16 +489,29 @@ int module_finalize(const Elf_Ehdr *hdr, int ret; s = find_section(hdr, sechdrs, ".altinstructions"); - if (s) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (s) { + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (ret < 0) { + pr_err("module %s: error occurred when applying alternatives\n", me->name); + return ret; + } + } if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); if (s) { - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); - if (ret) + /* + * Because we can reject modules that are malformed + * so SCS patching fails, skip dry run and try to patch + * it in place. If patching fails, the module would not + * be loaded anyway. + */ + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); + if (ret) { pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", me->name, ret); + return -ENOEXEC; + } } } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 43f7a2f39403..32148bf09c1d 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, folio = page_folio(page); if (folio_test_hugetlb(folio)) - WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) && + !is_huge_zero_folio(folio)); else WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e8ddbde31a83..a852264958c3 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) if (enable_scs) { scs_patch(__eh_frame_start + va_offset, - __eh_frame_end - __eh_frame_start); + __eh_frame_end - __eh_frame_start, false); asm("ic ialluis"); dynamic_scs_is_enabled = true; @@ -141,13 +141,13 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr) { u64 sctlr = read_sysreg(sctlr_el1); - u64 tcr = read_sysreg(tcr_el1) | TCR_DS; + u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_DS; u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1); u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - tcr &= ~TCR_IPS_MASK; - tcr |= parange << TCR_IPS_SHIFT; + tcr &= ~TCR_EL1_IPS_MASK; + tcr |= parange << TCR_EL1_IPS_SHIFT; asm(" msr sctlr_el1, %0 ;" " isb ;" @@ -263,7 +263,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt) } if (va_bits > VA_BITS_MIN) - sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits)); + sysreg_clear_set(tcr_el1, TCR_EL1_T1SZ_MASK, TCR_T1SZ(va_bits)); /* * The virtual KASLR displacement modulo 2MiB is decided by the diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 55d0cd64ef71..bbe7d30ed12b 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, return 0; } -int scs_patch(const u8 eh_frame[], int size) +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) { int code_alignment_factor = 1; bool fde_use_sdata8 = false; @@ -277,11 +277,13 @@ int scs_patch(const u8 eh_frame[], int size) } } else { ret = scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, true); + fde_use_sdata8, !skip_dry_run); if (ret) return ret; - scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, false); + + if (!skip_dry_run) + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 08ef9f80456b..aec3172d4003 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[]; void init_feature_override(u64 boot_status, const void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen); void relocate_kernel(u64 offset); -int scs_patch(const u8 eh_frame[], int size); +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 8ab6104a4883..43a0361a8bf0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -49,7 +49,10 @@ void *alloc_insn_page(void) addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!addr) return NULL; - set_memory_rox((unsigned long)addr, 1); + if (set_memory_rox((unsigned long)addr, 1)) { + execmem_free(addr); + return NULL; + } return addr; } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2799bdb2fb82..941668800aea 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -131,7 +131,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) struct uprobe_task *utask = current->utask; /* - * Task has received a fatal signal, so reset back to probbed + * Task has received a fatal signal, so reset back to probed * address. */ instruction_pointer_set(regs, utask->vaddr); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index f9a32dfde006..80a580e019c5 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param); static bool spectre_v2_mitigations_off(void) { - bool ret = __nospectre_v2 || cpu_mitigations_off(); - - if (ret) - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); - - return ret; + return __nospectre_v2 || cpu_mitigations_off(); } static const char *get_bhb_affected_string(enum mitigation_state bhb_state) @@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param); */ static bool spectre_v4_mitigations_off(void) { - bool ret = cpu_mitigations_off() || - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; - - if (ret) - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); - - return ret; + return cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; } /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ @@ -1043,9 +1033,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); - } else if (cpu_mitigations_off() || __nospectre_bhb) { - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + /* Do nothing */ } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; set_bit(BHB_HW, &system_bhb_mitigations); @@ -1199,3 +1187,18 @@ void unpriv_ebpf_notify(int new_state) pr_err("WARNING: %s", EBPF_WARN); } #endif + +void spectre_print_disabled_mitigations(void) +{ + /* Keep a single copy of the common message suffix to avoid duplication. */ + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; + + if (spectre_v2_mitigations_off()) + pr_info("spectre-v2 %s", spectre_disabled_suffix); + + if (spectre_v4_mitigations_off()) + pr_info("spectre-v4 %s", spectre_disabled_suffix); + + if (__nospectre_bhb || cpu_mitigations_off()) + pr_info("spectre-bhb %s", spectre_disabled_suffix); +} diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 4b001121c72d..b9bdd83fbbca 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -912,13 +912,39 @@ static int sve_set_common(struct task_struct *target, return -EINVAL; /* - * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by - * vec_set_vector_length(), which will also validate them for us: + * On systems without SVE we accept FPSIMD format writes with + * a VL of 0 to allow exiting streaming mode, otherwise a VL + * is required. */ - ret = vec_set_vector_length(target, type, header.vl, - ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); - if (ret) - return ret; + if (header.vl) { + /* + * If the system does not support SVE we can't + * configure a SVE VL. + */ + if (!system_supports_sve() && type == ARM64_VEC_SVE) + return -EINVAL; + + /* + * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are + * consumed by vec_set_vector_length(), which will + * also validate them for us: + */ + ret = vec_set_vector_length(target, type, header.vl, + ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); + if (ret) + return ret; + } else { + /* If the system supports SVE we require a VL. */ + if (system_supports_sve()) + return -EINVAL; + + /* + * Only FPSIMD formatted data with no flags set is + * supported. + */ + if (header.flags != SVE_PT_REGS_FPSIMD) + return -EINVAL; + } /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */ if (type == ARM64_VEC_SME) { @@ -1016,7 +1042,7 @@ static int sve_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return -EINVAL; return sve_set_common(target, regset, pos, count, kbuf, ubuf, diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 95169f7b6531..778f2a1faac8 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -63,8 +63,6 @@ static void free_sdei_stacks(void) { int cpu; - BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - for_each_possible_cpu(cpu) { _free_sdei_stack(&sdei_stack_normal_ptr, cpu); _free_sdei_stack(&sdei_stack_critical_ptr, cpu); @@ -88,8 +86,6 @@ static int init_sdei_stacks(void) int cpu; int err = 0; - BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - for_each_possible_cpu(cpu) { err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); if (err) @@ -202,7 +198,7 @@ out_err: /* * do_sdei_event() returns one of: * SDEI_EV_HANDLED - success, return to the interrupted context. - * SDEI_EV_FAILED - failure, return this error code to firmare. + * SDEI_EV_FAILED - failure, return this error code to firmware. * virtual-address - success, return to this address. */ unsigned long __kprobes do_sdei_event(struct pt_regs *regs, diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 68cea3a4a35c..1aa324104afb 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -350,7 +350,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) /* * Now that the dying CPU is beyond the point of no return w.r.t. - * in-kernel synchronisation, try to get the firwmare to help us to + * in-kernel synchronisation, try to get the firmware to help us to * verify that it has really left the kernel before we consider * clobbering anything it might still be using. */ @@ -523,7 +523,7 @@ int arch_register_cpu(int cpu) /* * Availability of the acpi handle is sufficient to establish - * that _STA has aleady been checked. No need to recheck here. + * that _STA has already been checked. No need to recheck here. */ c->hotpluggable = arch_cpu_is_hotpluggable(cpu); @@ -1094,7 +1094,7 @@ static void ipi_setup_sgi(int ipi) irq = ipi_irq_base + ipi; if (ipi_should_be_nmi(ipi)) { - err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat); + err = request_percpu_nmi(irq, ipi_handler, "IPI", NULL, &irq_stat); WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err); } else { err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index aba7ca6bca2d..c062badd1a56 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -96,7 +96,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * (Similarly for HVC and SMC elsewhere.) */ - if (flags & _TIF_MTE_ASYNC_FAULT) { + if (unlikely(flags & _TIF_MTE_ASYNC_FAULT)) { /* * Process the asynchronous tag check fault before the actual * syscall. do_notify_resume() will send a signal to userspace diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 681939ef5d16..914282016069 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -922,7 +922,7 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne __show_regs(regs); /* - * We use nmi_panic to limit the potential for recusive overflows, and + * We use nmi_panic to limit the potential for recursive overflows, and * to get a better stack trace. */ nmi_panic(NULL, "kernel stack overflow"); diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index ffa3536581f6..9d0efed91414 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -63,7 +63,7 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ $(filter -Werror,$(KBUILD_CPPFLAGS)) \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu11 + -std=gnu11 -fms-extensions VDSO_CFLAGS += -O2 # Some useful compiler-dependent flags from top-level Makefile VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) @@ -71,6 +71,7 @@ VDSO_CFLAGS += -fno-strict-overflow VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) VDSO_CFLAGS += -Werror=date-time VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types) +VDSO_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag) # Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is # unreliable. diff --git a/arch/arm64/kernel/vmcore_info.c b/arch/arm64/kernel/vmcore_info.c index b19d5d6cb8b3..9619ece66b79 100644 --- a/arch/arm64/kernel/vmcore_info.c +++ b/arch/arm64/kernel/vmcore_info.c @@ -14,7 +14,7 @@ static inline u64 get_tcr_el1_t1sz(void); static inline u64 get_tcr_el1_t1sz(void) { - return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; + return (read_sysreg(tcr_el1) & TCR_EL1_T1SZ_MASK) >> TCR_EL1_T1SZ_SHIFT; } void arch_crash_save_vmcoreinfo(void) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 3f675875abea..99a07972068d 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -806,7 +806,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) tpt = tpc = true; /* - * For the poor sods that could not correctly substract one value + * For the poor sods that could not correctly subtract one value * from another, trap the full virtual timer and counter. */ if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 870953b4a8a7..12acc024f7a8 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -624,6 +624,7 @@ nommu: kvm_timer_vcpu_load(vcpu); kvm_vgic_load(vcpu); kvm_vcpu_load_debug(vcpu); + kvm_vcpu_load_fgt(vcpu); if (has_vhe()) kvm_vcpu_load_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); @@ -642,7 +643,6 @@ nommu: vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); - kvm_vcpu_load_fgt(vcpu); if (is_protected_kvm_enabled()) { kvm_call_hyp_nvhe(__pkvm_vcpu_load, @@ -2448,7 +2448,7 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; - /* Propagate the FGT state to the the nVHE side */ + /* Propagate the FGT state to the nVHE side */ kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks; kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks; kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks; diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 4e16f9b96f63..f731cc4c3f28 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -115,7 +115,7 @@ static void ffa_set_retval(struct kvm_cpu_context *ctxt, * * FFA-1.3 introduces 64-bit variants of the CPU cycle management * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests - * complete with SMC32 direct reponses which *should* allow us use the + * complete with SMC32 direct responses which *should* allow us use the * function ID sent by the caller to determine whether to return x8-x17. * * Note that we also cannot rely on function IDs in the response. @@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id, struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; struct ffa_mem_region *buf; - u32 offset, nr_ranges; + u32 offset, nr_ranges, checked_offset; int ret = 0; if (addr_mbz || npages_mbz || fraglen > len || @@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id, goto out_unlock; } - if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) { + if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + if (fraglen < checked_offset) { ret = FFA_RET_INVALID_PARAMETERS; goto out_unlock; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ddc8beb55eee..49db32f3ddf7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +/* + * Ensure the PFN range is contained within PA-range. + * + * This check is also robust to overflows and is therefore a requirement before + * using a pfn/nr_pages pair from an untrusted source. + */ +static bool pfn_range_is_valid(u64 pfn, u64 nr_pages) +{ + u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT); + + return pfn < limit && ((limit - pfn) >= nr_pages); +} + struct kvm_mem_range { u64 start; u64 end; @@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) void *virt = __hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) u64 virt = (u64)__hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); if (!ret) @@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); if (!ret) @@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size); if (ret) return ret; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7cc964af8d30..9a6a80c3fbe5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1755,7 +1755,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, /* * Check if this is non-struct page memory PFN, and cannot support - * CMOs. It could potentially be unsafe to access as cachable. + * CMOs. It could potentially be unsafe to access as cacheable. */ if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) { if (is_vma_cacheable) { diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index f04cda40545b..be6bbd167770 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -85,7 +85,7 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) /* * Let's treat memory allocation failures as benign: If we fail to * allocate anything, return an error and keep the allocated array - * alive. Userspace may try to recover by intializing the vcpu + * alive. Userspace may try to recover by initializing the vcpu * again, and there is no reason to affect the whole VM for this. */ num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e67eb39ddc11..ec3fbe0b8d52 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2595,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, .val = 0, \ } -/* sys_reg_desc initialiser for known cpufeature ID registers */ -#define AA32_ID_SANITISED(name) { \ - ID_DESC(name), \ - .visibility = aa32_id_visibility, \ - .val = 0, \ -} - /* sys_reg_desc initialiser for writable ID registers */ #define ID_WRITABLE(name, mask) { \ ID_DESC(name), \ .val = mask, \ } +/* + * 32bit ID regs are fully writable when the guest is 32bit + * capable. Nothing in the KVM code should rely on 32bit features + * anyway, only 64bit, so let the VMM do its worse. + */ +#define AA32_ID_WRITABLE(name) { \ + ID_DESC(name), \ + .visibility = aa32_id_visibility, \ + .val = GENMASK(31, 0), \ +} + /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ #define ID_FILTERED(sysreg, name, mask) { \ ID_DESC(sysreg), \ @@ -3128,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - AA32_ID_SANITISED(ID_PFR0_EL1), - AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_WRITABLE(ID_PFR0_EL1), + AA32_ID_WRITABLE(ID_PFR1_EL1), { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, .reset = read_sanitised_id_dfr0_el1, - .val = ID_DFR0_EL1_PerfMon_MASK | - ID_DFR0_EL1_CopDbg_MASK, }, + .val = GENMASK(31, 0) }, ID_HIDDEN(ID_AFR0_EL1), - AA32_ID_SANITISED(ID_MMFR0_EL1), - AA32_ID_SANITISED(ID_MMFR1_EL1), - AA32_ID_SANITISED(ID_MMFR2_EL1), - AA32_ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_WRITABLE(ID_MMFR0_EL1), + AA32_ID_WRITABLE(ID_MMFR1_EL1), + AA32_ID_WRITABLE(ID_MMFR2_EL1), + AA32_ID_WRITABLE(ID_MMFR3_EL1), /* CRm=2 */ - AA32_ID_SANITISED(ID_ISAR0_EL1), - AA32_ID_SANITISED(ID_ISAR1_EL1), - AA32_ID_SANITISED(ID_ISAR2_EL1), - AA32_ID_SANITISED(ID_ISAR3_EL1), - AA32_ID_SANITISED(ID_ISAR4_EL1), - AA32_ID_SANITISED(ID_ISAR5_EL1), - AA32_ID_SANITISED(ID_MMFR4_EL1), - AA32_ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_WRITABLE(ID_ISAR0_EL1), + AA32_ID_WRITABLE(ID_ISAR1_EL1), + AA32_ID_WRITABLE(ID_ISAR2_EL1), + AA32_ID_WRITABLE(ID_ISAR3_EL1), + AA32_ID_WRITABLE(ID_ISAR4_EL1), + AA32_ID_WRITABLE(ID_ISAR5_EL1), + AA32_ID_WRITABLE(ID_MMFR4_EL1), + AA32_ID_WRITABLE(ID_ISAR6_EL1), /* CRm=3 */ - AA32_ID_SANITISED(MVFR0_EL1), - AA32_ID_SANITISED(MVFR1_EL1), - AA32_ID_SANITISED(MVFR2_EL1), + AA32_ID_WRITABLE(MVFR0_EL1), + AA32_ID_WRITABLE(MVFR1_EL1), + AA32_ID_WRITABLE(MVFR2_EL1), ID_UNALLOCATED(3,3), - AA32_ID_SANITISED(ID_PFR2_EL1), + AA32_ID_WRITABLE(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - AA32_ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_WRITABLE(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -5606,11 +5609,17 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) guard(mutex)(&kvm->arch.config_lock); - if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && - irqchip_in_kernel(kvm) && - kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) { - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK; - kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK; + /* + * This hacks into the ID registers, so only perform it when the + * first vcpu runs, or the kvm_set_vm_id_reg() helper will scream. + */ + if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) { + u64 val; + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); } if (vcpu_has_nv(vcpu)) { diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 4c1209261b65..bb92853d1fd3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) static int iter_mark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; int nr_lpis = 0; + xa_lock_irqsave(&dist->lpi_xa, flags); + xa_for_each(&dist->lpi_xa, intid, irq) { if (!vgic_try_get_irq_ref(irq)) continue; - xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); nr_lpis++; } + xa_unlock_irqrestore(&dist->lpi_xa, flags); + return nr_lpis; } static void iter_unmark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_lock_irqsave(&dist->lpi_xa, flags); + __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + + /* vgic_put_irq() expects to be called outside of the xa_lock */ vgic_put_irq(kvm, irq); } } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1796b1a22a72..da62edbc1205 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - xa_init(&dist->lpi_xa); + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } /* CREATION */ @@ -71,6 +71,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); int kvm_vgic_create(struct kvm *kvm, u32 type) { struct kvm_vcpu *vcpu; + u64 aa64pfr0, pfr1; unsigned long i; int ret; @@ -161,10 +162,19 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - else + } else { INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); + } + + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index ce3e3ed3f29f..3f1c4b10fed9 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; + unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); if (ret) { kfree(irq); return ERR_PTR(ret); @@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->target_vcpu = vcpu; irq->group = 1; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -114,21 +115,18 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; - - goto out_unlock; + } else { + ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); } - ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + if (ret) { xa_release(&dist->lpi_xa, intid); kfree(irq); - } - -out_unlock: - xa_unlock(&dist->lpi_xa); - if (ret) return ERR_PTR(ret); + } /* * We "cache" the configuration table entries in our struct vgic_irq's. diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 6fbb4b099855..2f75ef14d339 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -301,7 +301,8 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) return; /* Hide GICv3 sysreg if necessary */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || + !irqchip_in_kernel(vcpu->kvm)) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 6dd5a10081e2..8d20c53faef0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->arch.config_lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_dist->lpi_xa.xa_lock + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled * vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled * @@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; - if (irq->intid >= VGIC_MIN_LPI) - might_lock(&dist->lpi_xa.xa_lock); + /* + * Normally the lock is only taken when the refcount drops to 0. + * Acquire/release it early on lockdep kernels to make locking issues + * in rare release paths a bit more obvious. + */ + if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) { + guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock); + } if (!__vgic_put_irq(kvm, irq)) return; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); vgic_release_lpi_locked(dist, irq); - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } static void vgic_release_deleted_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid; + unsigned long flags, intid; struct vgic_irq *irq; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); xa_for_each(&dist->lpi_xa, intid, irq) { if (irq->pending_release) vgic_release_lpi_locked(dist, irq); } - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index c0557945939c..589bcf878938 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -622,8 +622,7 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma, __ptep_set_access_flags(vma, addr, ptep, entry, 0); if (dirty) - __flush_tlb_range(vma, start_addr, addr, - PAGE_SIZE, true, 3); + local_flush_tlb_contpte(vma, start_addr); } else { __contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte); __ptep_set_access_flags(vma, addr, ptep, entry, dirty); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d816ff44faff..be9dab2c7d6a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -233,9 +233,13 @@ int __ptep_set_access_flags(struct vm_area_struct *vma, pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - /* Invalidate a stale read-only entry */ + /* + * Invalidate the local stale read-only entry. Remote stale entries + * may still cause page faults and be invalidated via + * flush_tlb_fix_spurious_fault(). + */ if (dirty) - flush_tlb_page(vma, address); + local_flush_tlb_page(vma, address); return 1; } @@ -967,10 +971,21 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, return vma_alloc_folio(flags, 0, vma, vaddr); } -void tag_clear_highpage(struct page *page) +bool tag_clear_highpages(struct page *page, int numpages) { - /* Newly allocated page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(page)); - mte_zero_clear_page_tags(page_address(page)); - set_page_mte_tagged(page); + /* + * Check if MTE is supported and fall back to clear_highpage(). + * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and + * post_alloc_hook() will invoke tag_clear_highpages(). + */ + if (!system_supports_mte()) + return false; + + /* Newly allocated pages, shouldn't have been tagged yet */ + for (int i = 0; i < numpages; i++, page++) { + WARN_ON_ONCE(!try_page_mte_tagging(page)); + mte_zero_clear_page_tags(page_address(page)); + set_page_mte_tagged(page); + } + return true; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b8d37eb037fc..c8d24b7cdd62 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -49,6 +49,8 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ +#define INVALID_PHYS_ADDR (-1ULL) + DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key); u64 kimage_voffset __ro_after_init; @@ -194,11 +196,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, } while (ptep++, addr += PAGE_SIZE, addr != end); } -static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { unsigned long next; pmd_t pmd = READ_ONCE(*pmdp); @@ -213,6 +215,8 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(TABLE_PTE); + if (pte_phys == INVALID_PHYS_ADDR) + return -ENOMEM; ptep = pte_set_fixmap(pte_phys); init_clear_pgtable(ptep); ptep += pte_index(addr); @@ -244,11 +248,13 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, * walker. */ pte_clear_fixmap(); + + return 0; } -static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) +static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -269,22 +275,29 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), READ_ONCE(pmd_val(*pmdp)))); } else { - alloc_init_cont_pte(pmdp, addr, next, phys, prot, - pgtable_alloc, flags); + int ret; + + ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot, + pgtable_alloc, flags); + if (ret) + return ret; BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); } phys += next - addr; } while (pmdp++, addr = next, addr != end); + + return 0; } -static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; unsigned long next; pud_t pud = READ_ONCE(*pudp); pmd_t *pmdp; @@ -301,6 +314,8 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(TABLE_PMD); + if (pmd_phys == INVALID_PHYS_ADDR) + return -ENOMEM; pmdp = pmd_set_fixmap(pmd_phys); init_clear_pgtable(pmdp); pmdp += pmd_index(addr); @@ -320,20 +335,26 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + if (ret) + goto out; pmdp += pmd_index(next) - pmd_index(addr); phys += next - addr; } while (addr = next, addr != end); +out: pmd_clear_fixmap(); + + return ret; } -static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret = 0; unsigned long next; p4d_t p4d = READ_ONCE(*p4dp); pud_t *pudp; @@ -346,6 +367,8 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(TABLE_PUD); + if (pud_phys == INVALID_PHYS_ADDR) + return -ENOMEM; pudp = pud_set_fixmap(pud_phys); init_clear_pgtable(pudp); pudp += pud_index(addr); @@ -375,8 +398,10 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), READ_ONCE(pud_val(*pudp)))); } else { - alloc_init_cont_pmd(pudp, addr, next, phys, prot, - pgtable_alloc, flags); + ret = alloc_init_cont_pmd(pudp, addr, next, phys, prot, + pgtable_alloc, flags); + if (ret) + goto out; BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); @@ -384,14 +409,18 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, phys += next - addr; } while (pudp++, addr = next, addr != end); +out: pud_clear_fixmap(); + + return ret; } -static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; unsigned long next; pgd_t pgd = READ_ONCE(*pgdp); p4d_t *p4dp; @@ -404,6 +433,8 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys = pgtable_alloc(TABLE_P4D); + if (p4d_phys == INVALID_PHYS_ADDR) + return -ENOMEM; p4dp = p4d_set_fixmap(p4d_phys); init_clear_pgtable(p4dp); p4dp += p4d_index(addr); @@ -418,8 +449,10 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, next = p4d_addr_end(addr, end); - alloc_init_pud(p4dp, addr, next, phys, prot, - pgtable_alloc, flags); + ret = alloc_init_pud(p4dp, addr, next, phys, prot, + pgtable_alloc, flags); + if (ret) + goto out; BUG_ON(p4d_val(old_p4d) != 0 && p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); @@ -427,15 +460,19 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, phys += next - addr; } while (p4dp++, addr = next, addr != end); +out: p4d_clear_fixmap(); + + return ret; } -static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); @@ -444,7 +481,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, * within a page, we cannot map the region as the caller expects. */ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) - return; + return -EINVAL; phys &= PAGE_MASK; addr = virt & PAGE_MASK; @@ -452,25 +489,45 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); - alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, - flags); + ret = alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + if (ret) + return ret; phys += next - addr; } while (pgdp++, addr = next, addr != end); + + return 0; } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; + mutex_lock(&fixmap_lock); - __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, - pgtable_alloc, flags); + ret = __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); mutex_unlock(&fixmap_lock); + + return ret; } -#define INVALID_PHYS_ADDR (-1ULL) +static void early_create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) +{ + int ret; + + ret = __create_pgd_mapping(pgdir, phys, virt, size, prot, pgtable_alloc, + flags); + if (ret) + panic("Failed to create page tables\n"); +} static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, enum pgtable_type pgtable_type) @@ -503,7 +560,7 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, } static phys_addr_t -try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp) +pgd_pgtable_alloc_init_mm_gfp(enum pgtable_type pgtable_type, gfp_t gfp) { return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type); } @@ -511,21 +568,13 @@ try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp) static phys_addr_t __maybe_unused pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) { - phys_addr_t pa; - - pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type); - BUG_ON(pa == INVALID_PHYS_ADDR); - return pa; + return pgd_pgtable_alloc_init_mm_gfp(pgtable_type, GFP_PGTABLE_KERNEL); } static phys_addr_t pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type) { - phys_addr_t pa; - - pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type); - BUG_ON(pa == INVALID_PHYS_ADDR); - return pa; + return __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type); } static void split_contpte(pte_t *ptep) @@ -546,7 +595,7 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) pte_t *ptep; int i; - pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp); + pte_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PTE, gfp); if (pte_phys == INVALID_PHYS_ADDR) return -ENOMEM; ptep = (pte_t *)phys_to_virt(pte_phys); @@ -591,7 +640,7 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) pmd_t *pmdp; int i; - pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp); + pmd_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PMD, gfp); if (pmd_phys == INVALID_PHYS_ADDR) return -ENOMEM; pmdp = (pmd_t *)phys_to_virt(pmd_phys); @@ -708,6 +757,30 @@ out: return ret; } +static inline bool force_pte_mapping(void) +{ + const bool bbml2 = system_capabilities_finalized() ? + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); + + if (debug_pagealloc_enabled()) + return true; + if (bbml2) + return false; + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); +} + +static inline bool split_leaf_mapping_possible(void) +{ + /* + * !BBML2_NOABORT systems should never run into scenarios where we would + * have to split. So exit early and let calling code detect it and raise + * a warning. + */ + if (!system_supports_bbml2_noabort()) + return false; + return !force_pte_mapping(); +} + static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -715,12 +788,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * !BBML2_NOABORT systems should not be trying to change permissions on - * anything that is not pte-mapped in the first place. Just return early - * and let the permission change code raise a warning if not already - * pte-mapped. + * Exit early if the region is within a pte-mapped area or if we can't + * split. For the latter case, the permission change code will raise a + * warning if not already pte-mapped. */ - if (!system_supports_bbml2_noabort()) + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) return 0; /* @@ -758,30 +830,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) return ret; } -static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pud_t pud = pudp_get(pudp); int ret = 0; if (pud_leaf(pud)) - ret = split_pud(pudp, pud, GFP_ATOMIC, false); + ret = split_pud(pudp, pud, gfp, false); return ret; } -static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pmd_t pmd = pmdp_get(pmdp); int ret = 0; if (pmd_leaf(pmd)) { if (pmd_cont(pmd)) split_contpmd(pmdp); - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); + ret = split_pmd(pmdp, pmd, gfp, false); /* * We have split the pmd directly to ptes so there is no need to @@ -793,9 +865,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, return ret; } -static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, + unsigned long next, struct mm_walk *walk) { pte_t pte = __ptep_get(ptep); @@ -805,12 +876,24 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, return 0; } -static const struct mm_walk_ops split_to_ptes_ops __initconst = { +static const struct mm_walk_ops split_to_ptes_ops = { .pud_entry = split_to_ptes_pud_entry, .pmd_entry = split_to_ptes_pmd_entry, .pte_entry = split_to_ptes_pte_entry, }; +static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) +{ + int ret; + + arch_enter_lazy_mmu_mode(); + ret = walk_kernel_page_table_range_lockless(start, end, + &split_to_ptes_ops, NULL, &gfp); + arch_leave_lazy_mmu_mode(); + + return ret; +} + static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; @@ -847,11 +930,9 @@ static int __init linear_map_split_to_ptes(void *__unused) * PTE. The kernel alias remains static throughout runtime so * can continue to be safely mapped with large mappings. */ - ret = walk_kernel_page_table_range_lockless(lstart, kstart, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); if (!ret) - ret = walk_kernel_page_table_range_lockless(kend, lend, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); if (ret) panic("Failed to split linear map\n"); flush_tlb_kernel_range(lstart, lend); @@ -903,8 +984,8 @@ void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -918,8 +999,8 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, if (page_mappings_only) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc_special_mm, flags); + early_create_pgd_mapping(mm->pgd, phys, virt, size, prot, + pgd_pgtable_alloc_special_mm, flags); } static void update_mapping_prot(phys_addr_t phys, unsigned long virt, @@ -931,8 +1012,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt, return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); /* flush the TLBs after updating live kernel mappings */ flush_tlb_kernel_range(virt, virt + size); @@ -941,8 +1022,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, phys_addr_t end, pgprot_t prot, int flags) { - __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, - prot, early_pgtable_alloc, flags); + early_create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); } void __init mark_linear_text_alias_ro(void) @@ -1002,6 +1083,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); __kfence_pool = phys_to_virt(kfence_pool); } + +bool arch_kfence_init_pool(void) +{ + unsigned long start = (unsigned long)__kfence_pool; + unsigned long end = start + KFENCE_POOL_SIZE; + int ret; + + /* Exit early if we know the linear map is already pte-mapped. */ + if (!split_leaf_mapping_possible()) + return true; + + /* Kfence pool is already pte-mapped for the early init case. */ + if (kfence_early_init) + return true; + + mutex_lock(&pgtable_split_lock); + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); + mutex_unlock(&pgtable_split_lock); + + /* + * Since the system supports bbml2_noabort, tlb invalidation is not + * required here; the pgtable mappings have been split to pte but larger + * entries may safely linger in the TLB. + */ + + return !ret; +} #else /* CONFIG_KFENCE */ static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } @@ -1009,16 +1117,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { #endif /* CONFIG_KFENCE */ -static inline bool force_pte_mapping(void) -{ - bool bbml2 = system_capabilities_finalized() ? - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); -} - static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); @@ -1158,6 +1256,8 @@ static int __init __kpti_install_ng_mappings(void *__unused) remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); if (!cpu) { + int ret; + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); @@ -1178,9 +1278,11 @@ static int __init __kpti_install_ng_mappings(void *__unused) // covers the PTE[] page itself, the remaining entries are free // to be used as a ad-hoc fixmap. // - __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc), - KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, - kpti_ng_pgd_alloc, 0); + ret = __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + if (ret) + panic("Failed to create page tables\n"); } cpu_install_idmap(); @@ -1233,9 +1335,9 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, - entry_tramp_text_size(), prot, - pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS); + early_create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) @@ -1877,23 +1979,28 @@ int arch_add_memory(int nid, u64 start, u64 size, if (force_pte_mapping()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, params->pgprot, pgd_pgtable_alloc_init_mm, - flags); + ret = __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), + size, params->pgprot, pgd_pgtable_alloc_init_mm, + flags); + if (ret) + goto err; memblock_clear_nomap(start, size); ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); if (ret) - __remove_pgd_mapping(swapper_pg_dir, - __phys_to_virt(start), size); - else { - /* Address of hotplugged memory can be smaller */ - max_pfn = max(max_pfn, PFN_UP(start + size)); - max_low_pfn = max_pfn; - } + goto err; + + /* Address of hotplugged memory can be smaller */ + max_pfn = max(max_pfn, PFN_UP(start + size)); + max_low_pfn = max_pfn; + + return 0; +err: + __remove_pgd_mapping(swapper_pg_dir, + __phys_to_virt(start), size); return ret; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 5135f2d66958..f0e784b963e6 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -148,7 +148,7 @@ static int change_memory_common(unsigned long addr, int numpages, unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; - int i; + int ret; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; @@ -184,9 +184,13 @@ static int change_memory_common(unsigned long addr, int numpages, */ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { - for (i = 0; i < area->nr_pages; i++) { - __change_memory_common((u64)page_address(area->pages[i]), - PAGE_SIZE, set_mask, clear_mask); + unsigned long idx = (start - (unsigned long)kasan_reset_tag(area->addr)) + >> PAGE_SHIFT; + for (; numpages; idx++, numpages--) { + ret = __change_memory_common((u64)page_address(area->pages[idx]), + PAGE_SIZE, set_mask, clear_mask); + if (ret) + return ret; } } diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 8160cff35089..bf5110b91e2f 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -56,7 +56,7 @@ void __init pgtable_cache_init(void) * With 52-bit physical addresses, the architecture requires the * top-level table to be aligned to at least 64 bytes. */ - BUILD_BUG_ON(PGD_SIZE < 64); + BUILD_BUG_ON(!IS_ALIGNED(PGD_SIZE, 64)); #endif /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 86818511962b..01e868116448 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,15 +23,18 @@ #include <asm/sysreg.h> #ifdef CONFIG_ARM64_64K_PAGES -#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K +#define TCR_TG_FLAGS ((TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT) |\ + (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT)) #elif defined(CONFIG_ARM64_16K_PAGES) -#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K +#define TCR_TG_FLAGS ((TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT) |\ + (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT)) #else /* CONFIG_ARM64_4K_PAGES */ -#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K +#define TCR_TG_FLAGS ((TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT) |\ + (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT)) #endif #ifdef CONFIG_RANDOMIZE_BASE -#define TCR_KASLR_FLAGS TCR_NFD1 +#define TCR_KASLR_FLAGS TCR_EL1_NFD1 #else #define TCR_KASLR_FLAGS 0 #endif @@ -40,23 +43,30 @@ #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA #ifdef CONFIG_KASAN_SW_TAGS -#define TCR_KASAN_SW_FLAGS TCR_TBI1 | TCR_TBID1 +#define TCR_KASAN_SW_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1 #else #define TCR_KASAN_SW_FLAGS 0 #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_EL1_TCMA1 | TCR_EL1_TBI1 | TCR_EL1_TBID1 #elif defined(CONFIG_ARM64_MTE) /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on * TBI being enabled at EL1. */ -#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1 #else #define TCR_MTE_FLAGS 0 #endif +#define TCR_IRGN_WBWA ((TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT) |\ + (TCR_EL1_IRGN1_WBWA << TCR_EL1_IRGN1_SHIFT)) +#define TCR_ORGN_WBWA ((TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT) |\ + (TCR_EL1_ORGN1_WBWA << TCR_EL1_ORGN1_SHIFT)) +#define TCR_SHARED ((TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT) |\ + (TCR_EL1_SH1_INNER << TCR_EL1_SH1_SHIFT)) + /* * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and * changed during mte_cpu_setup to Normal Tagged if the system supports MTE. @@ -129,7 +139,7 @@ SYM_FUNC_START(cpu_do_resume) /* Don't change t0sz here, mask those bits when restoring */ mrs x7, tcr_el1 - bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + bfi x8, x7, TCR_EL1_T0SZ_SHIFT, TCR_EL1_T0SZ_WIDTH msr tcr_el1, x8 msr vbar_el1, x9 @@ -481,8 +491,8 @@ SYM_FUNC_START(__cpu_setup) tcr2 .req x15 mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \ - TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS + TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_EL1_AS | \ + TCR_EL1_TBI0 | TCR_EL1_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS mov tcr2, xzr tcr_clear_errata_bits tcr, x9, x5 @@ -492,7 +502,7 @@ SYM_FUNC_START(__cpu_setup) alternative_if ARM64_HAS_VA52 tcr_set_t1sz tcr, x9 #ifdef CONFIG_ARM64_LPA2 - orr tcr, tcr, #TCR_DS + orr tcr, tcr, #TCR_EL1_DS #endif alternative_else_nop_endif #endif @@ -500,7 +510,7 @@ alternative_else_nop_endif /* * Set the IPS bits in TCR_EL1. */ - tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6 + tcr_compute_pa_size tcr, #TCR_EL1_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* * Enable hardware update of the Access Flags bit. @@ -510,7 +520,7 @@ alternative_else_nop_endif mrs x9, ID_AA64MMFR1_EL1 ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 cbz x9, 1f - orr tcr, tcr, #TCR_HA // hardware Access flag update + orr tcr, tcr, #TCR_EL1_HA // hardware Access flag update #ifdef CONFIG_ARM64_HAFT cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT b.lt 1f diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0c9a50a1e73e..afd05b41ea9e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -3054,7 +3054,7 @@ bool bpf_jit_supports_exceptions(void) /* We unwind through both kernel frames starting from within bpf_throw * call and BPF frames. Therefore we require FP unwinder to be enabled * to walk kernel frames and reach BPF frames in the stack trace. - * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y + * ARM64 kernel is always compiled with CONFIG_FRAME_POINTER=y */ return true; } diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index bbbb812603e8..86860ab672dc 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -44,21 +44,38 @@ function expect_fields(nf) { # Print a CPP macro definition, padded with spaces so that the macro bodies # line up in a column -function define(name, val) { - printf "%-56s%s\n", "#define " name, val +function define(prefix, name, val) { + printf "%-56s%s\n", "#define " prefix name, val +} + +# Same as above, but without a prefix +function define_reg(name, val) { + define(null, name, val) } # Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field -function define_field(reg, field, msb, lsb) { - define(reg "_" field, "GENMASK(" msb ", " lsb ")") - define(reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")") - define(reg "_" field "_SHIFT", lsb) - define(reg "_" field "_WIDTH", msb - lsb + 1) +function define_field(prefix, reg, field, msb, lsb) { + define(prefix, reg "_" field, "GENMASK(" msb ", " lsb ")") + define(prefix, reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")") + define(prefix, reg "_" field "_SHIFT", lsb) + define(prefix, reg "_" field "_WIDTH", msb - lsb + 1) } # Print a field _SIGNED definition for a field -function define_field_sign(reg, field, sign) { - define(reg "_" field "_SIGNED", sign) +function define_field_sign(prefix, reg, field, sign) { + define(prefix, reg "_" field "_SIGNED", sign) +} + +# Print the Res0, Res1, Unkn masks +function define_resx_unkn(prefix, reg, res0, res1, unkn) { + if (res0 != null) + define(prefix, reg "_RES0", "(" res0 ")") + if (res1 != null) + define(prefix, reg "_RES1", "(" res1 ")") + if (unkn != null) + define(prefix, reg "_UNKN", "(" unkn ")") + if (res0 != null || res1 != null || unkn != null) + print "" } # Parse a "<msb>[:<lsb>]" string into the global variables @msb and @lsb @@ -128,18 +145,17 @@ $1 == "SysregFields" && block_current() == "Root" { next_bit = 63 + delete seen_prefixes + next } $1 == "EndSysregFields" && block_current() == "SysregFields" { expect_fields(1) - if (next_bit > 0) + if (next_bit >= 0) fatal("Unspecified bits in " reg) - define(reg "_RES0", "(" res0 ")") - define(reg "_RES1", "(" res1 ")") - define(reg "_UNKN", "(" unkn ")") - print "" + define_resx_unkn(prefix, reg, res0, res1, unkn) reg = null res0 = null @@ -170,35 +186,31 @@ $1 == "Sysreg" && block_current() == "Root" { fatal("Duplicate Sysreg definition for " reg) defined_regs[reg] = 1 - define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2) - define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")") + define_reg("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2) + define_reg("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")") - define("SYS_" reg "_Op0", op0) - define("SYS_" reg "_Op1", op1) - define("SYS_" reg "_CRn", crn) - define("SYS_" reg "_CRm", crm) - define("SYS_" reg "_Op2", op2) + define_reg("SYS_" reg "_Op0", op0) + define_reg("SYS_" reg "_Op1", op1) + define_reg("SYS_" reg "_CRn", crn) + define_reg("SYS_" reg "_CRm", crm) + define_reg("SYS_" reg "_Op2", op2) print "" + prefix = null next_bit = 63 + delete seen_prefixes + next } $1 == "EndSysreg" && block_current() == "Sysreg" { expect_fields(1) - if (next_bit > 0) + if (next_bit >= 0) fatal("Unspecified bits in " reg) - if (res0 != null) - define(reg "_RES0", "(" res0 ")") - if (res1 != null) - define(reg "_RES1", "(" res1 ")") - if (unkn != null) - define(reg "_UNKN", "(" unkn ")") - if (res0 != null || res1 != null || unkn != null) - print "" + define_resx_unkn(prefix, reg, res0, res1, unkn) reg = null op0 = null @@ -209,6 +221,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { res0 = null res1 = null unkn = null + prefix = null block_pop() next @@ -225,7 +238,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { print "/* For " reg " fields see " $2 " */" print "" - next_bit = 0 + next_bit = -1 res0 = null res1 = null unkn = null @@ -233,8 +246,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { next } - -$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, "RES0", $2) field = "RES0_" msb "_" lsb @@ -244,7 +256,7 @@ $1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields next } -$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, "RES1", $2) field = "RES1_" msb "_" lsb @@ -254,7 +266,7 @@ $1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields next } -$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, "UNKN", $2) field = "UNKN_" msb "_" lsb @@ -264,62 +276,62 @@ $1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields next } -$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) + define_field(prefix, reg, field, msb, lsb) print "" next } -$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, field, $2) next } -$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { block_push("Enum") expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) - define_field_sign(reg, field, "true") + define_field(prefix, reg, field, msb, lsb) + define_field_sign(prefix, reg, field, "true") delete seen_enum_vals next } -$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { block_push("Enum") expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) - define_field_sign(reg, field, "false") + define_field(prefix, reg, field, msb, lsb) + define_field_sign(prefix, reg, field, "false") delete seen_enum_vals next } -$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { block_push("Enum") expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) + define_field(prefix, reg, field, msb, lsb) delete seen_enum_vals @@ -349,7 +361,47 @@ $1 == "EndEnum" && block_current() == "Enum" { fatal("Duplicate Enum value " val " for " name) seen_enum_vals[val] = 1 - define(reg "_" field "_" name, "UL(" val ")") + define(prefix, reg "_" field "_" name, "UL(" val ")") + next +} + +$1 == "Prefix" && (block_current() == "Sysreg" || block_current() == "SysregFields") { + block_push("Prefix") + + expect_fields(2) + + if (next_bit < 63) + fatal("Prefixed fields must precede non-prefixed fields (" reg ")") + + prefix = $2 "_" + + if (prefix in seen_prefixes) + fatal("Duplicate prefix " prefix " for " reg) + seen_prefixes[prefix] = 1 + + res0 = "UL(0)" + res1 = "UL(0)" + unkn = "UL(0)" + next_bit = 63 + + next +} + +$1 == "EndPrefix" && block_current() == "Prefix" { + expect_fields(1) + if (next_bit >= 0) + fatal("Unspecified bits in prefix " prefix " for " reg) + + define_resx_unkn(prefix, reg, res0, res1, unkn) + + prefix = null + res0 = "UL(0)" + res1 = "UL(0)" + unkn = "UL(0)" + next_bit = 63 + + block_pop() + next } diff --git a/arch/arm64/tools/syscall_32.tbl b/arch/arm64/tools/syscall_32.tbl index 8d9088bc577d..8cdfe5d4dac9 100644 --- a/arch/arm64/tools/syscall_32.tbl +++ b/arch/arm64/tools/syscall_32.tbl @@ -481,3 +481,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 1c6cdf9d54bb..8921b51866d6 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -4669,6 +4669,27 @@ Field 1 V3 Field 0 En EndSysreg +Sysreg ICH_VMCR_EL2 3 4 12 11 7 +Prefix FEAT_GCIE +Res0 63:32 +Field 31:27 VPMR +Res0 26:1 +Field 0 EN +EndPrefix +Res0 63:32 +Field 31:24 VPMR +Field 23:21 VBPR0 +Field 20:18 VBPR1 +Res0 17:10 +Field 9 VEOIM +Res0 8:5 +Field 4 VCBPR +Field 3 VFIQEn +Field 2 VAckCtl +Field 1 VENG1 +Field 0 VENG0 +EndSysreg + Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3e838c229cd5..50e1304e7a6f 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -917,7 +917,6 @@ CONFIG_MMC=y CONFIG_MMC_LOONGSON2=m CONFIG_INFINIBAND=m CONFIG_EDAC=y -# CONFIG_EDAC_LEGACY_SYSFS is not set CONFIG_EDAC_LOONGSON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index fc83bb32f9f0..bd5f0457ad21 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -67,6 +67,8 @@ #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) +#define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) +#define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index dfb982fe8701..f3efb00b6141 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -55,6 +55,27 @@ enum cpu_type_enum { CPU_LAST }; +static inline char *id_to_core_name(unsigned int id) +{ + if ((id & PRID_COMP_MASK) != PRID_COMP_LOONGSON) + return "Unknown"; + + switch (id & PRID_SERIES_MASK) { + case PRID_SERIES_LA132: + return "LA132"; + case PRID_SERIES_LA264: + return "LA264"; + case PRID_SERIES_LA364: + return "LA364"; + case PRID_SERIES_LA464: + return "LA464"; + case PRID_SERIES_LA664: + return "LA664"; + default: + return "Unknown"; + } +} + #endif /* !__ASSEMBLER__ */ /* @@ -101,7 +122,9 @@ enum cpu_type_enum { #define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ #define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ #define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ -#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ +#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -132,6 +155,8 @@ enum cpu_type_enum { #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) +#define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) +#define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 13b2462f3d8c..5faa97a87a9e 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_switch(struct task_struct *next) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; + return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; + return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; } #endif /* __KERNEL__ */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index eaff72b38dc8..0130185e0349 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,7 +14,7 @@ #include <asm/pgtable-bits.h> #include <asm/string.h> -extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); +extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap @@ -25,6 +25,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, pgprot_t prot) { + if (offset > TO_PHYS_MASK) + return NULL; + switch (pgprot_val(prot) & _CACHE_MASK) { case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 09dfd7eb406e..3de03cb864b2 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -128,6 +128,7 @@ #define CPUCFG6_PMNUM GENMASK(7, 4) #define CPUCFG6_PMNUM_SHIFT 4 #define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_PMBITS_SHIFT 8 #define CPUCFG6_UPM BIT(14) #define LOONGARCH_CPUCFG16 0x10 @@ -1137,6 +1138,7 @@ #define IOCSRF_FLATMODE BIT_ULL(10) #define IOCSRF_VM BIT_ULL(11) #define IOCSRF_AVEC BIT_ULL(15) +#define IOCSRF_REDIRECT BIT_ULL(16) #define LOONGARCH_IOCSR_VENDOR 0x10 diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 1c63a9d9a6d3..08dcc698ec18 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -88,7 +88,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index bd128696e96d..03fb60432fde 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -424,6 +424,9 @@ static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) |= _PAGE_MODIFIED; + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } @@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | - (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); - return pmd; + if (pmd_val(pmd) & _PAGE_DIRTY) + pmd_val(pmd) |= _PAGE_MODIFIED; + + return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index aafb3cd9e943..215e0f9e8aa3 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -10,10 +10,6 @@ #include <linux/types.h> -#ifndef __KERNEL__ -#include <stdint.h> -#endif - /* * For PTRACE_{POKE,PEEK}USR. 0 - 31 are GPRs, * 32 is syscall's original ARG0, 33 is PC, 34 is BADVADDR. @@ -41,44 +37,44 @@ struct user_pt_regs { } __attribute__((aligned(8))); struct user_fp_state { - uint64_t fpr[32]; - uint64_t fcc; - uint32_t fcsr; + __u64 fpr[32]; + __u64 fcc; + __u32 fcsr; }; struct user_lsx_state { /* 32 registers, 128 bits width per register. */ - uint64_t vregs[32*2]; + __u64 vregs[32*2]; }; struct user_lasx_state { /* 32 registers, 256 bits width per register. */ - uint64_t vregs[32*4]; + __u64 vregs[32*4]; }; struct user_lbt_state { - uint64_t scr[4]; - uint32_t eflags; - uint32_t ftop; + __u64 scr[4]; + __u32 eflags; + __u32 ftop; }; struct user_watch_state { - uint64_t dbg_info; + __u64 dbg_info; struct { - uint64_t addr; - uint64_t mask; - uint32_t ctrl; - uint32_t pad; + __u64 addr; + __u64 mask; + __u32 ctrl; + __u32 pad; } dbg_regs[8]; }; struct user_watch_state_v2 { - uint64_t dbg_info; + __u64 dbg_info; struct { - uint64_t addr; - uint64_t mask; - uint32_t ctrl; - uint32_t pad; + __u64 addr; + __u64 mask; + __u32 ctrl; + __u32 pad; } dbg_regs[14]; }; diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cbfce2872d71..a2060a24b39f 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -157,6 +157,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_TLB; if (config & CPUCFG1_IOCSR) c->options |= LOONGARCH_CPU_IOCSR; + if (config & CPUCFG1_MSGINT) + c->options |= LOONGARCH_CPU_MSGINT; if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; @@ -275,7 +277,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int uint32_t config; uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); - const char *core_name = "Unknown"; + const char *core_name = id_to_core_name(c->processor_id); switch (BIT(fls(c->isa_level) - 1)) { case LOONGARCH_CPU_ISA_LA32R: @@ -289,35 +291,23 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int break; } - switch (c->processor_id & PRID_SERIES_MASK) { - case PRID_SERIES_LA132: - core_name = "LA132"; - break; - case PRID_SERIES_LA264: - core_name = "LA264"; - break; - case PRID_SERIES_LA364: - core_name = "LA364"; - break; - case PRID_SERIES_LA464: - core_name = "LA464"; - break; - case PRID_SERIES_LA664: - core_name = "LA664"; - break; - } - pr_info("%s Processor probed (%s Core)\n", __cpu_family[cpu], core_name); - if (!cpu_has_iocsr) + if (!cpu_has_iocsr) { + __cpu_full_name[cpu] = "Unknown"; return; - - if (!__cpu_full_name[cpu]) - __cpu_full_name[cpu] = cpu_full_name; + } *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); + if (!__cpu_full_name[cpu]) { + if (((char *)vendor)[0] == 0) + __cpu_full_name[cpu] = "Unknown"; + else + __cpu_full_name[cpu] = cpu_full_name; + } + config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); if (config & IOCSRF_CSRIPI) c->options |= LOONGARCH_CPU_CSRIPI; @@ -331,6 +321,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int c->options |= LOONGARCH_CPU_EIODECODE; if (config & IOCSRF_AVEC) c->options |= LOONGARCH_CPU_AVECINT; + if (config & IOCSRF_REDIRECT) + c->options |= LOONGARCH_CPU_REDIRECTINT; if (config & IOCSRF_VM) c->options |= LOONGARCH_CPU_HYPERVISOR; } diff --git a/arch/loongarch/kernel/kexec_efi.c b/arch/loongarch/kernel/kexec_efi.c index 45121b914f8f..5ee78ebb1546 100644 --- a/arch/loongarch/kernel/kexec_efi.c +++ b/arch/loongarch/kernel/kexec_efi.c @@ -42,7 +42,7 @@ static void *efi_kexec_load(struct kimage *image, { int ret; unsigned long text_offset, kernel_segment_number; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_segment *kernel_segment; struct loongarch_image_header *h; diff --git a/arch/loongarch/kernel/kexec_elf.c b/arch/loongarch/kernel/kexec_elf.c index 97b2f049801a..1b6b64744c7f 100644 --- a/arch/loongarch/kernel/kexec_elf.c +++ b/arch/loongarch/kernel/kexec_elf.c @@ -59,7 +59,7 @@ static void *elf_kexec_load(struct kimage *image, int ret; unsigned long text_offset, kernel_segment_number; struct elfhdr ehdr; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_elf_info elf_info; struct kexec_segment *kernel_segment; diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index e4b2bbc47e62..d7fafda1d541 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -39,34 +39,12 @@ static unsigned long systable_ptr; static unsigned long start_addr; static unsigned long first_ind_entry; -static void kexec_image_info(const struct kimage *kimage) -{ - unsigned long i; - - pr_debug("kexec kimage info:\n"); - pr_debug("\ttype: %d\n", kimage->type); - pr_debug("\tstart: %lx\n", kimage->start); - pr_debug("\thead: %lx\n", kimage->head); - pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); - - for (i = 0; i < kimage->nr_segments; i++) { - pr_debug("\t segment[%lu]: %016lx - %016lx", i, - kimage->segment[i].mem, - kimage->segment[i].mem + kimage->segment[i].memsz); - pr_debug("\t\t0x%lx bytes, %lu pages\n", - (unsigned long)kimage->segment[i].memsz, - (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); - } -} - int machine_kexec_prepare(struct kimage *kimage) { int i; char *bootloader = "kexec"; void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; - kexec_image_info(kimage); - kimage->arch.efi_boot = fw_arg0; kimage->arch.systable_ptr = fw_arg2; @@ -259,6 +237,7 @@ void machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_SMP crash_smp_send_stop(); #endif + machine_kexec_mask_interrupts(); cpumask_set_cpu(crashing_cpu, &cpus_in_crash); pr_info("Starting crashdump kernel...\n"); @@ -296,6 +275,7 @@ void machine_kexec(struct kimage *image) /* We do not want to be bothered. */ local_irq_disable(); + machine_kexec_mask_interrupts(); pr_notice("EFI boot flag: 0x%lx\n", efi_boot); pr_notice("Command line addr: 0x%lx\n", cmdline_ptr); diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index dda236b51a88..fb57026f5f25 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -143,7 +143,7 @@ int load_other_segments(struct kimage *image, unsigned long initrd_load_addr = 0; unsigned long orig_segments = image->nr_segments; char *modified_cmdline = NULL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; kbuf.image = image; /* Don't allocate anything below the kernel */ diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index aed901c57fb4..8ab1ffedc52c 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -13,7 +13,7 @@ void __init memblock_init(void) { u32 mem_type; - u64 mem_start, mem_end, mem_size; + u64 mem_start, mem_size; efi_memory_desc_t *md; /* Parse memory information */ @@ -21,7 +21,6 @@ void __init memblock_init(void) mem_type = md->type; mem_start = md->phys_addr; mem_size = md->num_pages << EFI_PAGE_SHIFT; - mem_end = mem_start + mem_size; switch (mem_type) { case EFI_LOADER_CODE: @@ -31,8 +30,6 @@ void __init memblock_init(void) case EFI_PERSISTENT_MEMORY: case EFI_CONVENTIONAL_MEMORY: memblock_add(mem_start, mem_size); - if (max_low_pfn < (mem_end >> PAGE_SHIFT)) - max_low_pfn = mem_end >> PAGE_SHIFT; break; case EFI_PAL_CODE: case EFI_UNUSABLE_MEMORY: @@ -49,6 +46,8 @@ void __init memblock_init(void) } } + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); memblock_set_current_limit(PFN_PHYS(max_low_pfn)); /* Reserve the first 2MB */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index d6e73e8f9c0b..8b89898e20df 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -158,35 +158,9 @@ static void __init node_mem_init(unsigned int node) #ifdef CONFIG_ACPI_NUMA -/* - * add_numamem_region - * - * Add a uasable memory region described by BIOS. The - * routine gets each intersection between BIOS's region - * and node's region, and adds them into node's memblock - * pool. - * - */ -static void __init add_numamem_region(u64 start, u64 end, u32 type) -{ - u32 node = pa_to_nid(start); - u64 size = end - start; - static unsigned long num_physpages; - - if (start >= end) { - pr_debug("Invalid region: %016llx-%016llx\n", start, end); - return; - } - - num_physpages += (size >> PAGE_SHIFT); - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", - node, type, start, size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start >> PAGE_SHIFT, end >> PAGE_SHIFT, num_physpages); - memblock_set_node(start, size, &memblock.memory, node); -} +static unsigned long num_physpages; -static void __init init_node_memblock(void) +static void __init info_node_memblock(void) { u32 mem_type; u64 mem_end, mem_start, mem_size; @@ -206,12 +180,20 @@ static void __init init_node_memblock(void) case EFI_BOOT_SERVICES_DATA: case EFI_PERSISTENT_MEMORY: case EFI_CONVENTIONAL_MEMORY: - add_numamem_region(mem_start, mem_end, mem_type); + num_physpages += (mem_size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + (u32)pa_to_nid(mem_start), mem_type, mem_start, mem_size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + mem_start >> PAGE_SHIFT, mem_end >> PAGE_SHIFT, num_physpages); break; case EFI_PAL_CODE: case EFI_UNUSABLE_MEMORY: case EFI_ACPI_RECLAIM_MEMORY: - add_numamem_region(mem_start, mem_end, mem_type); + num_physpages += (mem_size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + (u32)pa_to_nid(mem_start), mem_type, mem_start, mem_size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + mem_start >> PAGE_SHIFT, mem_end >> PAGE_SHIFT, num_physpages); fallthrough; case EFI_RESERVED_TYPE: case EFI_RUNTIME_SERVICES_CODE: @@ -249,22 +231,16 @@ int __init init_numa_memory(void) for (i = 0; i < NR_CPUS; i++) set_cpuid_to_node(i, NUMA_NO_NODE); - numa_reset_distance(); - nodes_clear(numa_nodes_parsed); - nodes_clear(node_possible_map); - nodes_clear(node_online_map); - WARN_ON(memblock_clear_hotplug(0, PHYS_ADDR_MAX)); - /* Parse SRAT and SLIT if provided by firmware. */ - ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); + if (!acpi_disabled) + ret = numa_memblks_init(acpi_numa_init, false); + else + ret = numa_memblks_init(fake_numa_init, false); + if (ret < 0) return ret; - node_possible_map = numa_nodes_parsed; - if (WARN_ON(nodes_empty(node_possible_map))) - return -EINVAL; - - init_node_memblock(); + info_node_memblock(); if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; @@ -272,7 +248,8 @@ int __init init_numa_memory(void) node_mem_init(node); node_set_online(node); } - max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); setup_nr_node_ids(); loongson_sysconf.nr_nodes = nr_node_ids; @@ -283,26 +260,6 @@ int __init init_numa_memory(void) #endif -void __init paging_init(void) -{ - unsigned int node; - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } -#ifdef CONFIG_ZONE_DMA32 - zones_size[ZONE_DMA32] = MAX_DMA32_PFN; -#endif - zones_size[ZONE_NORMAL] = max_low_pfn; - free_area_init(zones_size); -} - int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 8ad098703488..9d257c8519c9 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -845,13 +845,14 @@ static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config static int __init init_hw_perf_events(void) { - int counters; + int bits, counters; if (!cpu_has_pmp) return -ENODEV; pr_info("Performance counters: "); - counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; + bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; loongarch_pmu.num_counters = counters; loongarch_pmu.max_period = (1ULL << 63) - 1; @@ -867,7 +868,7 @@ static int __init init_hw_perf_events(void) on_each_cpu(reset_counters, NULL, 1); pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", - loongarch_pmu.name, counters, 64); + loongarch_pmu.name, counters, bits); perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index cea30768ae92..63d2b7e7e844 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -17,6 +17,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; unsigned int isa = cpu_data[n].isa_level; + unsigned int prid = cpu_data[n].processor_id; unsigned int version = cpu_data[n].processor_id & 0xff; unsigned int fp_version = cpu_data[n].fpu_vers; @@ -37,6 +38,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "global_id\t\t: %d\n", cpu_data[n].global_id); seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]); seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]); + seq_printf(m, "PRID\t\t\t: %s (%08x)\n", id_to_core_name(prid), prid); seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version); seq_printf(m, "FPU Revision\t\t: 0x%02x\n", fp_version); seq_printf(m, "CPU MHz\t\t\t: %llu.%02llu\n", diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 69c17d162fff..25a87378e48e 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -294,8 +294,6 @@ static void __init fdt_setup(void) early_init_dt_scan(fdt_pointer, __pa(fdt_pointer)); early_init_fdt_reserve_self(); - - max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); #endif } @@ -390,7 +388,8 @@ static void __init check_kernel_sections_mem(void) static void __init arch_mem_init(char **cmdline_p) { /* Recalculate max_low_pfn for "mem=xxx" */ - max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); if (usermem) pr_info("User-defined physical RAM map overwrite\n"); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 3d9be6ca7ec5..da5926fead4a 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -1131,8 +1131,8 @@ static void configure_exception_vector(void) tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; csr_write64(eentry, LOONGARCH_CSR_EENTRY); - csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); - csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY); + csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY); } void per_cpu_trap_init(int cpu) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index c32333695381..a1cc116b4dac 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (val >= EIOINTC_ROUTE_MAX_VCPUS) + if (val > EIOINTC_ROUTE_MAX_VCPUS) ret = -EINVAL; else s->num_cpu = val; diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 7c8143e79c12..a7fa458e3360 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -857,7 +857,7 @@ retry: if (writeable) { prot_bits = kvm_pte_mkwriteable(prot_bits); - if (write) + if (write || !kvm_slot_dirty_track_enabled(memslot)) prot_bits = kvm_pte_mkdirty(prot_bits); } diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 32dc213374be..29c2aaba63c3 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -4,6 +4,7 @@ */ #include <linux/kvm_host.h> +#include <asm/delay.h> #include <asm/kvm_csr.h> #include <asm/kvm_vcpu.h> @@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); + __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 30e3b089a596..1245a6b35896 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -132,6 +132,9 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when * exiting the guest, so that the next time trap into the guest. * We don't need to deal with PMU CSRs contexts. + * + * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU + * before entering guest VM */ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); @@ -139,16 +142,12 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); if (!(val & KVM_PMU_EVENT_ENABLED)) vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; + else + kvm_make_request(KVM_REQ_PMU, vcpu); kvm_restore_host_pmu(vcpu); } -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) -{ - if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) - kvm_make_request(KVM_REQ_PMU, vcpu); -} - static void kvm_check_pmu(struct kvm_vcpu *vcpu) { if (kvm_check_request(KVM_REQ_PMU, vcpu)) { @@ -299,7 +298,10 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { - kvm_lose_pmu(vcpu); + if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) { + kvm_lose_pmu(vcpu); + kvm_make_request(KVM_REQ_PMU, vcpu); + } /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -1604,9 +1606,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_restore_timer(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - /* Restore hardware PMU CSRs */ - kvm_restore_pmu(vcpu); - /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index c3e4586a7975..6bfd4b8dad1b 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -60,7 +60,6 @@ int __ref page_is_ram(unsigned long pfn) return memblock_is_memory(addr) && !memblock_is_reserved(addr); } -#ifndef CONFIG_NUMA void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -72,7 +71,6 @@ void __init paging_init(void) free_area_init(max_zone_pfns); } -#endif /* !CONFIG_NUMA */ void __ref free_initmem(void) { diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index df949a3d0f34..27c336959fe8 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -6,7 +6,7 @@ #include <asm/io.h> #include <asm-generic/early_ioremap.h> -void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) +void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size) { return ((void __iomem *)TO_CACHE(phys_addr)); } diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index cbe53d0b7fb0..f97dc9936401 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1624,6 +1624,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i /* Direct jump skips 5 NOP instructions */ else if (is_bpf_text_address((unsigned long)orig_call)) orig_call += LOONGARCH_BPF_FENTRY_NBYTES; + /* Module tracing not supported - cause kernel lockups */ + else if (is_module_text_address((unsigned long)orig_call)) + return -ENOTSUPP; if (flags & BPF_TRAMP_F_CALL_ORIG) { move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 5bc9627a6cf9..d9fc5d520b37 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -50,11 +50,11 @@ static int __init pcibios_init(void) */ lsize = cpu_last_level_cache_line_size(); - BUG_ON(!lsize); + if (lsize) { + pci_dfl_cache_line_size = lsize >> 2; - pci_dfl_cache_line_size = lsize >> 2; - - pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize); + pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize); + } return 0; } diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index d8316f993482..c0cc3ca5da9f 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -19,7 +19,7 @@ ccflags-vdso := \ cflags-vdso := $(ccflags-vdso) \ -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ - -std=gnu11 -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ + -std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index fba8089c9fb3..e717099b77cc 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -265,7 +265,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -281,7 +280,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -583,6 +581,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 6af37716384c..6b0bab752035 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -261,7 +261,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -277,7 +276,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -540,6 +538,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 471f4ec3730d..bb59e3f06ed5 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -268,7 +268,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -284,7 +283,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -560,6 +558,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 28492ef51457..ed85517ff01c 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -258,7 +258,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -274,7 +273,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -532,6 +530,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 2fbefb16b72e..0217a6982856 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -260,7 +260,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -276,7 +275,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -542,6 +540,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index deec5df3f35a..a9ba431a9408 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -259,7 +259,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -275,7 +274,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -559,6 +557,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 301a05c12577..f0d0d120e144 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -279,7 +279,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -295,7 +294,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -646,6 +644,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 0d401db0e8f8..baf0fc7a66d9 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -257,7 +257,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -273,7 +272,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -532,6 +530,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 90fb5b6bcf83..9cb732f0c6ce 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -258,7 +258,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -274,7 +273,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -533,6 +531,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index b89b0f7fe2da..6b3295e4c5ae 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -259,7 +259,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -275,7 +274,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -549,6 +547,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 8cc372c4df72..1f3a2fad6d2f 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -254,7 +254,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -270,7 +269,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -530,6 +528,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index f4569f64c6e4..d8ef36564d50 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -255,7 +255,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m -CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m CONFIG_L2TP=m @@ -271,7 +270,6 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_NC=y CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -530,6 +528,7 @@ CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f41d38dfbf13..871a5d67bf41 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 580af574fe73..022fc85d94b3 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -475,3 +475,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/mips/boot/dts/econet/en751221.dtsi b/arch/mips/boot/dts/econet/en751221.dtsi index 66197e73d4f0..2abeef5b744a 100644 --- a/arch/mips/boot/dts/econet/en751221.dtsi +++ b/arch/mips/boot/dts/econet/en751221.dtsi @@ -18,7 +18,7 @@ cpu@0 { device_type = "cpu"; - compatible = "mips,mips24KEc"; + compatible = "mips,mips34Kc"; reg = <0>; }; }; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 29191fa1801e..a3101f2268c6 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -692,7 +692,7 @@ unsigned long mips_stack_top(void) /* Space for the VDSO, data page & GIC user page */ if (current->thread.abi) { top -= PAGE_ALIGN(current->thread.abi->vdso->size); - top -= PAGE_SIZE; + top -= VDSO_NR_PAGES * PAGE_SIZE; top -= mips_gic_present() ? PAGE_SIZE : 0; /* Space to randomize the VDSO base */ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index d824ffe9a014..8cedc83c3266 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -408,3 +408,4 @@ 467 n32 open_tree_attr sys_open_tree_attr 468 n32 file_getattr sys_file_getattr 469 n32 file_setattr sys_file_setattr +470 n32 listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 7a7049c2c307..9b92bddf06b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -384,3 +384,4 @@ 467 n64 open_tree_attr sys_open_tree_attr 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr +470 n64 listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index d330274f0601..f810b8a55716 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -457,3 +457,4 @@ 467 o32 open_tree_attr sys_open_tree_attr 468 o32 file_getattr sys_file_getattr 469 o32 file_setattr sys_file_setattr +470 o32 listns sys_listns diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 347126dc010d..44a662536148 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -12,9 +12,11 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/export.h> +#include <linux/sort.h> #include <asm/cpu.h> #include <asm/cpu-type.h> @@ -508,58 +510,95 @@ static int __init set_ntlb(char *str) __setup("ntlb=", set_ntlb); -/* Initialise all TLB entries with unique values */ -static void r4k_tlb_uniquify(void) + +/* Comparison function for EntryHi VPN fields. */ +static int r4k_vpn_cmp(const void *a, const void *b) { - int entry = num_wired_entries(); + long v = *(unsigned long *)a - *(unsigned long *)b; + int s = sizeof(long) > sizeof(int) ? sizeof(long) * 8 - 1: 0; + return s ? (v != 0) | v >> s : v; +} + +/* + * Initialise all TLB entries with unique values that do not clash with + * what we have been handed over and what we'll be using ourselves. + */ +static void __ref r4k_tlb_uniquify(void) +{ + int tlbsize = current_cpu_data.tlbsize; + bool use_slab = slab_is_available(); + int start = num_wired_entries(); + phys_addr_t tlb_vpn_size; + unsigned long *tlb_vpns; + unsigned long vpn_mask; + int cnt, ent, idx, i; + + vpn_mask = GENMASK(cpu_vmbits - 1, 13); + vpn_mask |= IS_ENABLED(CONFIG_64BIT) ? 3ULL << 62 : 1 << 31; + + tlb_vpn_size = tlbsize * sizeof(*tlb_vpns); + tlb_vpns = (use_slab ? + kmalloc(tlb_vpn_size, GFP_KERNEL) : + memblock_alloc_raw(tlb_vpn_size, sizeof(*tlb_vpns))); + if (WARN_ON(!tlb_vpns)) + return; /* Pray local_flush_tlb_all() is good enough. */ htw_stop(); + + for (i = start, cnt = 0; i < tlbsize; i++, cnt++) { + unsigned long vpn; + + write_c0_index(i); + mtc0_tlbr_hazard(); + tlb_read(); + tlb_read_hazard(); + vpn = read_c0_entryhi(); + vpn &= vpn_mask & PAGE_MASK; + tlb_vpns[cnt] = vpn; + + /* Prevent any large pages from overlapping regular ones. */ + write_c0_pagemask(read_c0_pagemask() & PM_DEFAULT_MASK); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + } + + sort(tlb_vpns, cnt, sizeof(tlb_vpns[0]), r4k_vpn_cmp, NULL); + + write_c0_pagemask(PM_DEFAULT_MASK); write_c0_entrylo0(0); write_c0_entrylo1(0); - while (entry < current_cpu_data.tlbsize) { - unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); - unsigned long asid = 0; - int idx; + idx = 0; + ent = tlbsize; + for (i = start; i < tlbsize; i++) + while (1) { + unsigned long entryhi, vpn; - /* Skip wired MMID to make ginvt_mmid work */ - if (cpu_has_mmid) - asid = MMID_KERNEL_WIRED + 1; + entryhi = UNIQUE_ENTRYHI(ent); + vpn = entryhi & vpn_mask & PAGE_MASK; - /* Check for match before using UNIQUE_ENTRYHI */ - do { - if (cpu_has_mmid) { - write_c0_memorymapid(asid); - write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + if (idx >= cnt || vpn < tlb_vpns[idx]) { + write_c0_entryhi(entryhi); + write_c0_index(i); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + ent++; + break; + } else if (vpn == tlb_vpns[idx]) { + ent++; } else { - write_c0_entryhi(UNIQUE_ENTRYHI(entry) | asid); + idx++; } - mtc0_tlbw_hazard(); - tlb_probe(); - tlb_probe_hazard(); - idx = read_c0_index(); - /* No match or match is on current entry */ - if (idx < 0 || idx == entry) - break; - /* - * If we hit a match, we need to try again with - * a different ASID. - */ - asid++; - } while (asid < asid_mask); - - if (idx >= 0 && idx != entry) - panic("Unable to uniquify TLB entry %d", idx); - - write_c0_index(entry); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - entry++; - } + } tlbw_use_hazard(); htw_start(); flush_micro_tlb(); + if (use_slab) + kfree(tlb_vpns); + else + memblock_free(tlb_vpns, tlb_vpn_size); } /* @@ -602,6 +641,7 @@ static void r4k_tlb_configure(void) /* From this point on the ARC firmware is dead. */ r4k_tlb_uniquify(); + local_flush_tlb_all(); /* Did I tell you that ARC SUCKS? */ } diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index 000d6d50520a..82b0fd8576a2 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -241,16 +241,22 @@ mips_pci_controller: #endif /* - * Setup the Malta max (2GB) memory for PCI DMA in host bridge - * in transparent addressing mode. + * Set up memory mapping in host bridge for PCI DMA masters, + * in transparent addressing mode. For EVA use the Malta + * maximum of 2 GiB memory in the alias space at 0x80000000 + * as per PHYS_OFFSET. Otherwise use 256 MiB of memory in + * the regular space, avoiding mapping the PCI MMIO window + * for DMA as it seems to confuse the system controller's + * logic, causing PCI MMIO to stop working. */ - mask = PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH; - MSC_WRITE(MSC01_PCI_BAR0, mask); - MSC_WRITE(MSC01_PCI_HEAD4, mask); + mask = PHYS_OFFSET ? PHYS_OFFSET : 0xf0000000; + MSC_WRITE(MSC01_PCI_BAR0, + mask | PCI_BASE_ADDRESS_MEM_PREFETCH); + MSC_WRITE(MSC01_PCI_HEAD4, + PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH); - mask &= MSC01_PCI_BAR0_SIZE_MSK; MSC_WRITE(MSC01_PCI_P2SCMSKL, mask); - MSC_WRITE(MSC01_PCI_P2SCMAPL, mask); + MSC_WRITE(MSC01_PCI_P2SCMAPL, PHYS_OFFSET); /* Don't handle target retries indefinitely. */ if ((data & MSC01_PCI_CFG_MAXRTRY_MSK) == diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 17c42d718eb3..f8481e4e9d21 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -18,7 +18,7 @@ KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os ifndef CONFIG_64BIT KBUILD_CFLAGS += -mfast-indirect-calls endif -KBUILD_CFLAGS += -std=gnu11 +KBUILD_CFLAGS += -std=gnu11 -fms-extensions LDFLAGS_vmlinux := -X -e startup --as-needed -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(addprefix $(obj)/, $(OBJECTS)) $(LIBGCC) FORCE diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 88a788a7b18d..39bdacaa530b 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -468,3 +468,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e24f4d88885a..9537a61ebae0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_DMA_OPS if PPC64 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_GIGANTIC_PAGE if ARCH_SUPPORTS_HUGETLBFS select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index c47b78c1d3e7..f1a4761ebd44 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,7 +70,7 @@ BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE) BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) BOOTCFLAGS := $(BOOTTARGETFLAGS) \ - -std=gnu11 \ + -std=gnu11 -fms-extensions \ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 \ -msoft-float -mno-altivec -mno-vsx \ @@ -86,6 +86,7 @@ BOOTARFLAGS := -crD ifdef CONFIG_CC_IS_CLANG BOOTCFLAGS += $(CLANG_FLAGS) +BOOTCFLAGS += -Wno-microsoft-anon-tag BOOTAFLAGS += $(CLANG_FLAGS) endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4f5a46a77fa2..784a00e681fa 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -451,7 +451,7 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_begin user_write_access_begin #define user_write_access_end prevent_current_write_to_user -#define unsafe_get_user(x, p, e) do { \ +#define arch_unsafe_get_user(x, p, e) do { \ __long_type(*(p)) __gu_val; \ __typeof__(*(p)) __user *__gu_addr = (p); \ \ @@ -459,7 +459,7 @@ user_write_access_begin(const void __user *ptr, size_t len) (x) = (__typeof__(*(p)))__gu_val; \ } while (0) -#define unsafe_put_user(x, p, e) \ +#define arch_unsafe_put_user(x, p, e) \ __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) #define unsafe_copy_from_user(d, s, l, e) \ @@ -504,11 +504,11 @@ do { \ unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \ } while (0) -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ __get_user_size_goto(*((type *)(dst)), \ (__force type __user *)(src), sizeof(type), err_label) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define arch_put_kernel_nofault(dst, src, type, err_label) \ __put_user_size_goto(*((type *)(src)), \ (__force type __user *)(dst), sizeof(type), err_label) diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b453e80dfc00..ec4458cdb97b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -560,3 +560,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7b527d18aa5e..4c321a8ea896 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -423,7 +423,6 @@ config PPC_64S_HASH_MMU config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 - select ARCH_HAS_GIGANTIC_PAGE default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7ec60290abe6..78c4b6ce5f13 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -267,22 +267,11 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, static int spufs_context_open(const struct path *path) { - int ret; - struct file *filp; - - ret = get_unused_fd_flags(0); - if (ret < 0) - return ret; - - filp = dentry_open(path, O_RDONLY, current_cred()); - if (IS_ERR(filp)) { - put_unused_fd(ret); - return PTR_ERR(filp); - } - - filp->f_op = &spufs_context_fops; - fd_install(ret, filp); - return ret; + FD_PREPARE(fdf, 0, dentry_open(path, O_RDONLY, current_cred())); + if (fdf.err) + return fdf.err; + fd_prepare_file(fdf)->f_op = &spufs_context_fops; + return fd_publish(fdf); } static struct spu_context * @@ -508,26 +497,15 @@ static const struct file_operations spufs_gang_fops = { static int spufs_gang_open(const struct path *path) { - int ret; - struct file *filp; - - ret = get_unused_fd_flags(0); - if (ret < 0) - return ret; - /* * get references for dget and mntget, will be released * in error path of *_open(). */ - filp = dentry_open(path, O_RDONLY, current_cred()); - if (IS_ERR(filp)) { - put_unused_fd(ret); - return PTR_ERR(filp); - } - - filp->f_op = &spufs_gang_fops; - fd_install(ret, filp); - return ret; + FD_PREPARE(fdf, 0, dentry_open(path, O_RDONLY, current_cred())); + if (fdf.err) + return fdf.err; + fd_prepare_file(fdf)->f_op = &spufs_gang_fops; + return fd_publish(fdf); } static int spufs_create_gang(struct inode *inode, diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 21a2f447c43f..dd7b668799d9 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -479,10 +479,7 @@ static const struct file_operations papr_hvpipe_handle_ops = { static int papr_hvpipe_dev_create_handle(u32 srcID) { - struct hvpipe_source_info *src_info; - struct file *file; - long err; - int fd; + struct hvpipe_source_info *src_info __free(kfree) = NULL; spin_lock(&hvpipe_src_list_lock); /* @@ -506,20 +503,13 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) src_info->tsk = current; init_waitqueue_head(&src_info->recv_wqh); - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = fd; - goto free_buf; - } - - file = anon_inode_getfile("[papr-hvpipe]", - &papr_hvpipe_handle_ops, (void *)src_info, - O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto free_fd; - } + FD_PREPARE(fdf, O_RDONLY | O_CLOEXEC, + anon_inode_getfile("[papr-hvpipe]", &papr_hvpipe_handle_ops, + (void *)src_info, O_RDWR)); + if (fdf.err) + return fdf.err; + retain_and_null_ptr(src_info); spin_lock(&hvpipe_src_list_lock); /* * If two processes are executing ioctl() for the same @@ -528,22 +518,11 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) */ if (hvpipe_find_source(srcID)) { spin_unlock(&hvpipe_src_list_lock); - err = -EALREADY; - goto free_file; + return -EALREADY; } list_add(&src_info->list, &hvpipe_src_list); spin_unlock(&hvpipe_src_list_lock); - - fd_install(fd, file); - return fd; - -free_file: - fput(file); -free_fd: - put_unused_fd(fd); -free_buf: - kfree(src_info); - return err; + return fd_publish(fdf); } /* diff --git a/arch/powerpc/platforms/pseries/papr-platform-dump.c b/arch/powerpc/platforms/pseries/papr-platform-dump.c index f8d55eccdb6b..be633c9a0e75 100644 --- a/arch/powerpc/platforms/pseries/papr-platform-dump.c +++ b/arch/powerpc/platforms/pseries/papr-platform-dump.c @@ -303,8 +303,6 @@ static long papr_platform_dump_create_handle(u64 dump_tag) { struct ibm_platform_dump_params *params; u64 param_dump_tag; - struct file *file; - long err; int fd; /* @@ -334,34 +332,22 @@ static long papr_platform_dump_create_handle(u64 dump_tag) params->dump_tag_lo = (u32)(dump_tag & 0x00000000ffffffffULL); params->status = RTAS_IBM_PLATFORM_DUMP_START; - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + fd = FD_ADD(O_RDONLY | O_CLOEXEC, + anon_inode_getfile_fmode("[papr-platform-dump]", + &papr_platform_dump_handle_ops, + (void *)params, O_RDONLY, + FMODE_LSEEK | FMODE_PREAD)); if (fd < 0) { - err = fd; - goto free_area; - } - - file = anon_inode_getfile_fmode("[papr-platform-dump]", - &papr_platform_dump_handle_ops, - (void *)params, O_RDONLY, - FMODE_LSEEK | FMODE_PREAD); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto put_fd; + rtas_work_area_free(params->work_area); + kfree(params); + return fd; } - fd_install(fd, file); - list_add(¶ms->list, &platform_dump_list); pr_info("%s (%d) initiated platform dump for dump tag %llu\n", current->comm, current->pid, dump_tag); return fd; -put_fd: - put_unused_fd(fd); -free_area: - rtas_work_area_free(params->work_area); - kfree(params); - return err; } /* diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c b/arch/powerpc/platforms/pseries/papr-rtas-common.c index 33c606e3378a..1630e0cd210c 100644 --- a/arch/powerpc/platforms/pseries/papr-rtas-common.c +++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c @@ -205,35 +205,18 @@ long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq, char *name) { const struct papr_rtas_blob *blob; - struct file *file; - long ret; int fd; blob = papr_rtas_retrieve(seq); if (IS_ERR(blob)) return PTR_ERR(blob); - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); - if (fd < 0) { - ret = fd; - goto free_blob; - } - - file = anon_inode_getfile_fmode(name, fops, (void *)blob, - O_RDONLY, FMODE_LSEEK | FMODE_PREAD); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto put_fd; - } - - fd_install(fd, file); + fd = FD_ADD(O_RDONLY | O_CLOEXEC, + anon_inode_getfile_fmode(name, fops, (void *)blob, O_RDONLY, + FMODE_LSEEK | FMODE_PREAD)); + if (fd < 0) + papr_rtas_blob_free(blob); return fd; - -put_fd: - put_unused_fd(fd); -free_blob: - papr_rtas_blob_free(blob); - return ret; } /* diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 22cda9c452d2..fadec20b87a8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -367,7 +367,7 @@ config RISCV_NONSTANDARD_CACHE_OPS systems to handle cache management. config AS_HAS_INSN - def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + def_bool $(as-instr,.insn 0x100000f) config AS_HAS_OPTION_ARCH # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4 diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index ecf2fcce2d92..4c6de57f65ef 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -134,21 +134,6 @@ endif CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) # Default target when executing plain make -boot := arch/riscv/boot -ifeq ($(CONFIG_XIP_KERNEL),y) -KBUILD_IMAGE := $(boot)/xipImage -else -ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy) -KBUILD_IMAGE := $(boot)/loader.bin -else -ifeq ($(CONFIG_EFI_ZBOOT),) -KBUILD_IMAGE := $(boot)/Image.gz -else -KBUILD_IMAGE := $(boot)/vmlinuz.efi -endif -endif -endif - boot := arch/riscv/boot boot-image-y := Image boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2 @@ -159,7 +144,7 @@ boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst boot-image-$(CONFIG_KERNEL_XZ) := Image.xz ifdef CONFIG_RISCV_M_MODE -boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin +boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin endif boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi boot-image-$(CONFIG_XIP_KERNEL) := xipImage diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi index 6367112e614a..a7442a508433 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi @@ -28,7 +28,7 @@ riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", "zifencei", "zihpm", "xtheadvector"; - thead,vlenb = <128>; + thead,vlenb = <16>; #cooling-cells = <2>; cpu0_intc: interrupt-controller { diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index f5f4f7f85543..36bba6720c26 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -437,10 +437,10 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) __clear_user(untagged_addr(to), n) : n; } -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ __get_user_nocheck(*((type *)(dst)), (__force __user type *)(src), err_label) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define arch_put_kernel_nofault(dst, src, type, err_label) \ __put_user_nocheck(*((type *)(src)), (__force __user type *)(dst), err_label) static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) @@ -460,10 +460,10 @@ static inline void user_access_restore(unsigned long enabled) { } * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. */ -#define unsafe_put_user(x, ptr, label) \ +#define arch_unsafe_put_user(x, ptr, label) \ __put_user_nocheck(x, (ptr), label) -#define unsafe_get_user(x, ptr, label) do { \ +#define arch_unsafe_get_user(x, ptr, label) do { \ __inttype(*(ptr)) __gu_val; \ __get_user_nocheck(__gu_val, (ptr), label); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h index 3b09874d7a6d..7f5030ee1fcf 100644 --- a/arch/riscv/include/asm/vendorid_list.h +++ b/arch/riscv/include/asm/vendorid_list.h @@ -7,8 +7,8 @@ #define ANDES_VENDOR_ID 0x31e #define MICROCHIP_VENDOR_ID 0x029 +#define MIPS_VENDOR_ID 0x127 #define SIFIVE_VENDOR_ID 0x489 #define THEAD_VENDOR_ID 0x5b7 -#define MIPS_VENDOR_ID 0x722 #endif diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 5e8cde055264..c443337056ab 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -648,9 +648,9 @@ int sbi_debug_console_read(char *bytes, unsigned int num_bytes) void __init sbi_init(void) { + bool srst_power_off = false; int ret; - sbi_set_power_off(); ret = sbi_get_spec_version(); if (ret > 0) sbi_spec_version = ret; @@ -683,6 +683,7 @@ void __init sbi_init(void) sbi_probe_extension(SBI_EXT_SRST)) { pr_info("SBI SRST extension detected\n"); register_platform_power_off(sbi_srst_power_off); + srst_power_off = true; sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot; sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); @@ -702,4 +703,7 @@ void __init sbi_init(void) __sbi_send_ipi = __sbi_send_ipi_v01; __sbi_rfence = __sbi_rfence_v01; } + + if (!srst_power_off) + sbi_set_power_off(); } diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index fda0346f0ea1..11422cb95a64 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -689,8 +689,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) */ read_lock_irqsave(&imsic->vsfile_lock, flags); - if (imsic->vsfile_cpu > -1) - ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + if (imsic->vsfile_cpu > -1) { + /* + * This function is typically called from kvm_vcpu_block() via + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() + * can be preempted and the blocking VCPU might resume on a + * different CPU. This means it is possible that current CPU + * does not match the imsic->vsfile_cpu hence this function + * must check imsic->vsfile_cpu before accessing HGEIP CSR. + */ + if (imsic->vsfile_cpu != vcpu->cpu) + ret = true; + else + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + } read_unlock_irqrestore(&imsic->vsfile_lock, flags); return ret; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 525fb5a330c0..58f5f3536ffd 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -171,7 +171,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change) { hva_t hva, reg_end, size; - gpa_t base_gpa; bool writable; int ret = 0; @@ -190,15 +189,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = new->userspace_addr; size = new->npages << PAGE_SHIFT; reg_end = hva + size; - base_gpa = new->base_gfn << PAGE_SHIFT; writable = !(new->flags & KVM_MEM_READONLY); mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and - * any holes between them, so iterate over all of them to find - * out if we can map any of them right now. + * any holes between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -209,7 +206,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; + hva_t vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) @@ -225,36 +222,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } /* Take the intersection of this VMA with the memory region */ - vm_start = max(hva, vma->vm_start); vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = base_gpa + (vm_start - hva); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } - - ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start, - writable, false); - if (ret) - break; } hva = vm_end; } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - if (ret) - kvm_riscv_mmu_iounmap(kvm, base_gpa, size); - out: mmap_read_unlock(current->mm); return ret; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index bccb919ca615..5ce35aba6069 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -212,7 +212,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index df22b10d9141..938e5df75b2d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -69,6 +69,9 @@ config CC_HAS_ASM_AOR_FORMAT_FLAGS Clang versions before 19.1.0 do not support A, O, and R inline assembly format flags. +config CC_HAS_STACKPROTECTOR_GLOBAL + def_bool $(cc-option, -mstack-protector-guard=global -mstack-protector-guard-record) + config S390 def_bool y # @@ -140,7 +143,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE - select ARCH_MODULE_NEEDS_WEAK_PER_CPU select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC @@ -245,6 +247,7 @@ config S390 select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SETUP_PER_CPU_AREA select HAVE_SOFTIRQ_ON_OWN_STACK + select HAVE_STACKPROTECTOR if CC_HAS_STACKPROTECTOR_GLOBAL select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE @@ -504,22 +507,6 @@ config COMMAND_LINE_SIZE This allows you to specify the maximum length of the kernel command line. -config COMPAT - def_bool n - prompt "Kernel support for 31 bit emulation" - select ARCH_WANT_OLD_COMPAT_IPC - select COMPAT_OLD_SIGACTION - select HAVE_UID16 - depends on MULTIUSER - depends on !CC_IS_CLANG && !LD_IS_LLD - help - Select this option if you want to enable your system kernel to - handle system-calls from ELF binaries for 31 bit ESA. This option - (and some other stuff like libraries and such) is needed for - executing 31 bit applications. - - If unsure say N. - config SMP def_bool y diff --git a/arch/s390/Makefile b/arch/s390/Makefile index b4769241332b..d78ad6885ca2 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 -fms-extensions KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -Wno-pointer-sign @@ -35,6 +35,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-membe KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag) UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,$(if $(CONFIG_KMSAN),65536,16384)) @@ -89,6 +90,10 @@ ifdef CONFIG_EXPOLINE aflags-y += -DCC_USING_EXPOLINE endif +ifeq ($(CONFIG_STACKPROTECTOR),y) + KBUILD_CFLAGS += -mstack-protector-guard=global -mstack-protector-guard-record +endif + ifdef CONFIG_FUNCTION_TRACER ifeq ($(call cc-option,-mfentry -mnop-mcount),) # make use of hotpatch feature if the compiler supports it @@ -134,10 +139,9 @@ zfcpdump: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ archheaders: - $(Q)$(MAKE) $(build)=$(syscalls) uapi + $(Q)$(MAKE) $(build)=$(syscalls) all archprepare: - $(Q)$(MAKE) $(build)=$(syscalls) kapi $(Q)$(MAKE) $(build)=$(tools) kapi $(extra_tools) ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. @@ -148,12 +152,9 @@ ifeq ($(KBUILD_EXTMOD),) # this hack. prepare: vdso_prepare vdso_prepare: prepare0 - $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h - $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ - $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) + $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso include/generated/vdso-offsets.h -vdso-install-y += arch/s390/kernel/vdso64/vdso64.so.dbg -vdso-install-$(CONFIG_COMPAT) += arch/s390/kernel/vdso32/vdso32.so.dbg +vdso-install-y += arch/s390/kernel/vdso/vdso.so.dbg endif diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ad2b0baa527c..feb43db63f30 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -9,8 +9,7 @@ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#define KMSG_COMPONENT "appldata" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "appldata: " fmt #include <linux/export.h> #include <linux/module.h> diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index a363d30ce739..137d4e7e1e9a 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -8,8 +8,7 @@ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#define KMSG_COMPONENT "appldata" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "appldata: " fmt #include <linux/module.h> #include <linux/init.h> diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 02f2cf082748..490167faba7a 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-$(CONFIG_KMSAN) += kmsan.o +obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o obj-all := $(obj-y) piggy.o syms.o targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 37d5b097ede5..61a205b489fb 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -28,6 +28,10 @@ struct vmlinux_info { unsigned long invalid_pg_dir_off; unsigned long alt_instructions; unsigned long alt_instructions_end; +#ifdef CONFIG_STACKPROTECTOR + unsigned long stack_prot_start; + unsigned long stack_prot_end; +#endif #ifdef CONFIG_KASAN unsigned long kasan_early_shadow_page_off; unsigned long kasan_early_shadow_pte_off; diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c index c4130a80b058..b0fd8a526b42 100644 --- a/arch/s390/boot/ipl_data.c +++ b/arch/s390/boot/ipl_data.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/compat.h> #include <linux/ptrace.h> #include <asm/cio.h> #include <asm/asm-offsets.h> @@ -12,7 +11,7 @@ #define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA) struct ipl_lowcore { - psw_t32 ipl_psw; /* 0x0000 */ + psw32_t ipl_psw; /* 0x0000 */ struct ccw0 ccwpgm[2]; /* 0x0008 */ u8 fill[56]; /* 0x0018 */ struct ccw0 ccwpgmcc[20]; /* 0x0050 */ diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index f584d7da29cb..6bc950b92be7 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/ctype.h> #include <linux/pgtable.h> +#include <asm/arch-stackprotector.h> #include <asm/abs_lowcore.h> #include <asm/page-states.h> #include <asm/machine.h> @@ -294,6 +295,11 @@ void parse_boot_command_line(void) cmma_flag = 0; } +#ifdef CONFIG_STACKPROTECTOR + if (!strcmp(param, "debug_stackprotector")) + stack_protector_debug = 1; +#endif + #if IS_ENABLED(CONFIG_KVM) if (!strcmp(param, "prot_virt")) { rc = kstrtobool(val, &enabled); diff --git a/arch/s390/boot/stackprotector.c b/arch/s390/boot/stackprotector.c new file mode 100644 index 000000000000..68494940c12a --- /dev/null +++ b/arch/s390/boot/stackprotector.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define boot_fmt(fmt) "stackprot: " fmt + +#include "boot.h" +#include "../kernel/stackprotector.c" diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 3fbd25b9498f..f77067dfc2a8 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -20,6 +20,9 @@ #include <asm/uv.h> #include <asm/abs_lowcore.h> #include <asm/physmem_info.h> +#include <asm/stacktrace.h> +#include <asm/asm-offsets.h> +#include <asm/arch-stackprotector.h> #include "decompressor.h" #include "boot.h" #include "uv.h" @@ -477,6 +480,10 @@ static void kaslr_adjust_vmlinux_info(long offset) vmlinux.invalid_pg_dir_off += offset; vmlinux.alt_instructions += offset; vmlinux.alt_instructions_end += offset; +#ifdef CONFIG_STACKPROTECTOR + vmlinux.stack_prot_start += offset; + vmlinux.stack_prot_end += offset; +#endif #ifdef CONFIG_KASAN vmlinux.kasan_early_shadow_page_off += offset; vmlinux.kasan_early_shadow_pte_off += offset; @@ -622,6 +629,7 @@ void startup_kernel(void) __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, (struct alt_instr *)_vmlinux_info.alt_instructions_end, ALT_CTX_EARLY); + stack_protector_apply_early(text_lma); /* * Save KASLR offset for early dumps, before vmcore_info is set. diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config deleted file mode 100644 index 6fd051453ae8..000000000000 --- a/arch/s390/configs/compat.config +++ /dev/null @@ -1,3 +0,0 @@ -# Help: Enable compat support -CONFIG_COMPAT=y -CONFIG_COMPAT_32BIT_TIME=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8433f769f7e1..1df484ed6329 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -796,6 +796,7 @@ CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m @@ -809,8 +810,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 4414dabd04a6..df89105dd520 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -780,6 +780,7 @@ CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m @@ -794,8 +795,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index 03f73fbd38b6..f838ca055f6d 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -2,26 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)" -config CRYPTO_SHA3_256_S390 - tristate "Hash functions: SHA3-224 and SHA3-256" - select CRYPTO_HASH - help - SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202) - - Architecture: s390 - - It is available as of z14. - -config CRYPTO_SHA3_512_S390 - tristate "Hash functions: SHA3-384 and SHA3-512" - select CRYPTO_HASH - help - SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202) - - Architecture: s390 - - It is available as of z14. - config CRYPTO_GHASH_S390 tristate "Hash functions: GHASH" select CRYPTO_HASH diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 998f4b656b18..387a229e1038 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -3,8 +3,6 @@ # Cryptographic API # -obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o -obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 5d36f4020dfa..d0a295435680 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -14,8 +14,7 @@ * Derived from "crypto/aes_generic.c" */ -#define KMSG_COMPONENT "aes_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "aes_s390: " fmt #include <crypto/aes.h> #include <crypto/algapi.h> diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c index 58444da9b004..f8cd09f341d4 100644 --- a/arch/s390/crypto/hmac_s390.c +++ b/arch/s390/crypto/hmac_s390.c @@ -5,8 +5,7 @@ * s390 specific HMAC support. */ -#define KMSG_COMPONENT "hmac_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hmac_s390: " fmt #include <asm/cpacf.h> #include <crypto/internal/hash.h> diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a624a43a2b54..64aef7eb2030 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -10,8 +10,7 @@ * Harald Freudenberger <freude@de.ibm.com> */ -#define KMSG_COMPONENT "paes_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "paes_s390: " fmt #include <linux/atomic.h> #include <linux/cpufeature.h> diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c index 89f3e6d8fd89..88342bd4c37a 100644 --- a/arch/s390/crypto/phmac_s390.c +++ b/arch/s390/crypto/phmac_s390.c @@ -5,8 +5,7 @@ * s390 specific HMAC support for protected keys. */ -#define KMSG_COMPONENT "phmac_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "phmac_s390: " fmt #include <asm/cpacf.h> #include <asm/pkey.h> diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 2becd77df741..84e3d3c6ba09 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -6,8 +6,7 @@ * Driver for the s390 pseudo random number generator */ -#define KMSG_COMPONENT "prng" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "prng: " fmt #include <linux/fs.h> #include <linux/fips.h> diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h deleted file mode 100644 index b9cd9572dd35..000000000000 --- a/arch/s390/crypto/sha.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Cryptographic API. - * - * s390 generic implementation of the SHA Secure Hash Algorithms. - * - * Copyright IBM Corp. 2007 - * Author(s): Jan Glauber (jang@de.ibm.com) - */ -#ifndef _CRYPTO_ARCH_S390_SHA_H -#define _CRYPTO_ARCH_S390_SHA_H - -#include <crypto/hash.h> -#include <crypto/sha2.h> -#include <crypto/sha3.h> -#include <linux/build_bug.h> -#include <linux/types.h> - -/* must be big enough for the largest SHA variant */ -#define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE -#define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE - -struct s390_sha_ctx { - u64 count; /* message length in bytes */ - union { - u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; - struct { - u64 state[SHA512_DIGEST_SIZE / sizeof(u64)]; - u64 count_hi; - } sha512; - struct { - __le64 state[SHA3_STATE_SIZE / sizeof(u64)]; - } sha3; - }; - int func; /* KIMD function to use */ - bool first_message_part; -}; - -struct shash_desc; - -int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, - unsigned int len); -int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, - u8 *out); - -static inline void __check_s390_sha_ctx_size(void) -{ - BUILD_BUG_ON(S390_SHA_CTX_SIZE != sizeof(struct s390_sha_ctx)); -} - -#endif diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c deleted file mode 100644 index 03bb4f4bab70..000000000000 --- a/arch/s390/crypto/sha3_256_s390.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. - * - * s390 Version: - * Copyright IBM Corp. 2019 - * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) - */ -#include <asm/cpacf.h> -#include <crypto/internal/hash.h> -#include <crypto/sha3.h> -#include <linux/cpufeature.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#include "sha.h" - -static int sha3_256_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->first_message_part = test_facility(86); - if (!sctx->first_message_part) - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA3_256; - - return 0; -} - -static int sha3_256_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - union { - u8 *u8; - u64 *u64; - } p = { .u8 = out }; - int i; - - if (sctx->first_message_part) { - memset(out, 0, SHA3_STATE_SIZE); - return 0; - } - for (i = 0; i < SHA3_STATE_SIZE / 8; i++) - put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++); - return 0; -} - -static int sha3_256_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - union { - const u8 *u8; - const u64 *u64; - } p = { .u8 = in }; - int i; - - for (i = 0; i < SHA3_STATE_SIZE / 8; i++) - sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++)); - sctx->count = 0; - sctx->first_message_part = 0; - sctx->func = CPACF_KIMD_SHA3_256; - - return 0; -} - -static int sha3_224_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sha3_256_import(desc, in); - sctx->func = CPACF_KIMD_SHA3_224; - return 0; -} - -static struct shash_alg sha3_256_alg = { - .digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */ - .init = sha3_256_init, - .update = s390_sha_update_blocks, - .finup = s390_sha_finup, - .export = sha3_256_export, - .import = sha3_256_import, - .descsize = S390_SHA_CTX_SIZE, - .statesize = SHA3_STATE_SIZE, - .base = { - .cra_name = "sha3-256", - .cra_driver_name = "sha3-256-s390", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA3_256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int sha3_224_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sha3_256_init(desc); - sctx->func = CPACF_KIMD_SHA3_224; - return 0; -} - -static struct shash_alg sha3_224_alg = { - .digestsize = SHA3_224_DIGEST_SIZE, - .init = sha3_224_init, - .update = s390_sha_update_blocks, - .finup = s390_sha_finup, - .export = sha3_256_export, /* same as for 256 */ - .import = sha3_224_import, /* function code different! */ - .descsize = S390_SHA_CTX_SIZE, - .statesize = SHA3_STATE_SIZE, - .base = { - .cra_name = "sha3-224", - .cra_driver_name = "sha3-224-s390", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA3_224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha3_256_s390_init(void) -{ - int ret; - - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA3_256)) - return -ENODEV; - - ret = crypto_register_shash(&sha3_256_alg); - if (ret < 0) - goto out; - - ret = crypto_register_shash(&sha3_224_alg); - if (ret < 0) - crypto_unregister_shash(&sha3_256_alg); -out: - return ret; -} - -static void __exit sha3_256_s390_fini(void) -{ - crypto_unregister_shash(&sha3_224_alg); - crypto_unregister_shash(&sha3_256_alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init); -module_exit(sha3_256_s390_fini); - -MODULE_ALIAS_CRYPTO("sha3-256"); -MODULE_ALIAS_CRYPTO("sha3-224"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA3-256 and SHA3-224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c deleted file mode 100644 index a5c9690eecb1..000000000000 --- a/arch/s390/crypto/sha3_512_s390.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA512 and SHA384 Secure Hash Algorithm. - * - * Copyright IBM Corp. 2019 - * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) - */ -#include <asm/cpacf.h> -#include <crypto/internal/hash.h> -#include <crypto/sha3.h> -#include <linux/cpufeature.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#include "sha.h" - -static int sha3_512_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->first_message_part = test_facility(86); - if (!sctx->first_message_part) - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA3_512; - - return 0; -} - -static int sha3_512_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - union { - u8 *u8; - u64 *u64; - } p = { .u8 = out }; - int i; - - if (sctx->first_message_part) { - memset(out, 0, SHA3_STATE_SIZE); - return 0; - } - for (i = 0; i < SHA3_STATE_SIZE / 8; i++) - put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++); - return 0; -} - -static int sha3_512_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - union { - const u8 *u8; - const u64 *u64; - } p = { .u8 = in }; - int i; - - for (i = 0; i < SHA3_STATE_SIZE / 8; i++) - sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++)); - sctx->count = 0; - sctx->first_message_part = 0; - sctx->func = CPACF_KIMD_SHA3_512; - - return 0; -} - -static int sha3_384_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sha3_512_import(desc, in); - sctx->func = CPACF_KIMD_SHA3_384; - return 0; -} - -static struct shash_alg sha3_512_alg = { - .digestsize = SHA3_512_DIGEST_SIZE, - .init = sha3_512_init, - .update = s390_sha_update_blocks, - .finup = s390_sha_finup, - .export = sha3_512_export, - .import = sha3_512_import, - .descsize = S390_SHA_CTX_SIZE, - .statesize = SHA3_STATE_SIZE, - .base = { - .cra_name = "sha3-512", - .cra_driver_name = "sha3-512-s390", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA3_512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -MODULE_ALIAS_CRYPTO("sha3-512"); - -static int sha3_384_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sha3_512_init(desc); - sctx->func = CPACF_KIMD_SHA3_384; - return 0; -} - -static struct shash_alg sha3_384_alg = { - .digestsize = SHA3_384_DIGEST_SIZE, - .init = sha3_384_init, - .update = s390_sha_update_blocks, - .finup = s390_sha_finup, - .export = sha3_512_export, /* same as for 512 */ - .import = sha3_384_import, /* function code different! */ - .descsize = S390_SHA_CTX_SIZE, - .statesize = SHA3_STATE_SIZE, - .base = { - .cra_name = "sha3-384", - .cra_driver_name = "sha3-384-s390", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA3_384_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha_ctx), - .cra_module = THIS_MODULE, - } -}; - -MODULE_ALIAS_CRYPTO("sha3-384"); - -static int __init init(void) -{ - int ret; - - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA3_512)) - return -ENODEV; - ret = crypto_register_shash(&sha3_512_alg); - if (ret < 0) - goto out; - ret = crypto_register_shash(&sha3_384_alg); - if (ret < 0) - crypto_unregister_shash(&sha3_512_alg); -out: - return ret; -} - -static void __exit fini(void) -{ - crypto_unregister_shash(&sha3_512_alg); - crypto_unregister_shash(&sha3_384_alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA3-512 and SHA3-384 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c deleted file mode 100644 index d6f839618794..000000000000 --- a/arch/s390/crypto/sha_common.c +++ /dev/null @@ -1,117 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 generic implementation of the SHA Secure Hash Algorithms. - * - * Copyright IBM Corp. 2007 - * Author(s): Jan Glauber (jang@de.ibm.com) - */ - -#include <crypto/internal/hash.h> -#include <linux/export.h> -#include <linux/module.h> -#include <asm/cpacf.h> -#include "sha.h" - -int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - unsigned int bsize = crypto_shash_blocksize(desc->tfm); - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - unsigned int n; - int fc; - - fc = ctx->func; - if (ctx->first_message_part) - fc |= CPACF_KIMD_NIP; - - /* process as many blocks as possible */ - n = (len / bsize) * bsize; - ctx->count += n; - switch (ctx->func) { - case CPACF_KLMD_SHA_512: - case CPACF_KLMD_SHA3_384: - if (ctx->count < n) - ctx->sha512.count_hi++; - break; - } - cpacf_kimd(fc, ctx->state, data, n); - ctx->first_message_part = 0; - return len - n; -} -EXPORT_SYMBOL_GPL(s390_sha_update_blocks); - -static int s390_crypto_shash_parmsize(int func) -{ - switch (func) { - case CPACF_KLMD_SHA_1: - return 20; - case CPACF_KLMD_SHA_256: - return 32; - case CPACF_KLMD_SHA_512: - return 64; - case CPACF_KLMD_SHA3_224: - case CPACF_KLMD_SHA3_256: - case CPACF_KLMD_SHA3_384: - case CPACF_KLMD_SHA3_512: - return 200; - default: - return -EINVAL; - } -} - -int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, - u8 *out) -{ - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - int mbl_offset, fc; - u64 bits; - - ctx->count += len; - - bits = ctx->count * 8; - mbl_offset = s390_crypto_shash_parmsize(ctx->func); - if (mbl_offset < 0) - return -EINVAL; - - mbl_offset = mbl_offset / sizeof(u32); - - /* set total msg bit length (mbl) in CPACF parmblock */ - switch (ctx->func) { - case CPACF_KLMD_SHA_512: - /* The SHA512 parmblock has a 128-bit mbl field. */ - if (ctx->count < len) - ctx->sha512.count_hi++; - ctx->sha512.count_hi <<= 3; - ctx->sha512.count_hi |= ctx->count >> 61; - mbl_offset += sizeof(u64) / sizeof(u32); - fallthrough; - case CPACF_KLMD_SHA_1: - case CPACF_KLMD_SHA_256: - memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); - break; - case CPACF_KLMD_SHA3_224: - case CPACF_KLMD_SHA3_256: - case CPACF_KLMD_SHA3_384: - case CPACF_KLMD_SHA3_512: - break; - default: - return -EINVAL; - } - - fc = ctx->func; - fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0; - if (ctx->first_message_part) - fc |= CPACF_KLMD_NIP; - cpacf_klmd(fc, ctx->state, src, len); - - /* copy digest to out */ - memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); - - return 0; -} -EXPORT_SYMBOL_GPL(s390_sha_finup); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("s390 SHA cipher common functions"); diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index c8af67d20994..08777f57bd24 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -7,8 +7,7 @@ * Author(s): Michael Holzheu <holzheu@de.ibm.com> */ -#define KMSG_COMPONENT "hypfs" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hypfs: " fmt #include <linux/types.h> #include <linux/errno.h> diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index ede951dc0085..013da4ff9802 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -7,8 +7,7 @@ * Author(s): Michael Holzheu <holzheu@de.ibm.com> */ -#define KMSG_COMPONENT "hypfs" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hypfs: " fmt #include <linux/types.h> #include <linux/errno.h> diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index a2952ed5518b..a72576221cab 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -7,7 +7,6 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ -#include <linux/compat.h> #include <linux/errno.h> #include <linux/gfp.h> #include <linux/string.h> @@ -116,10 +115,7 @@ static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (is_compat_task()) - argp = compat_ptr(arg); - else - argp = (void __user *) arg; + argp = (void __user *)arg; switch (cmd) { case HYPFS_DIAG304: return __hypfs_sprp_ioctl(argp); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 96409573c75d..ee5cfa8f71a0 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -6,8 +6,7 @@ * Author(s): Michael Holzheu <holzheu@de.ibm.com> */ -#define KMSG_COMPONENT "hypfs" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hypfs: " fmt #include <linux/types.h> #include <linux/errno.h> diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 56817990c73d..b24459f692fa 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -38,16 +38,30 @@ typedef unsigned int ap_qid_t; * The ap queue status word is returned by all three AP functions * (PQAP, NQAP and DQAP). There's a set of flags in the first * byte, followed by a 1 byte response code. + * + * For convenience the 'value' field is a 32 bit access of the + * whole status and the 'status_bits' and 'rc' fields comprise + * the leftmost 8 status bits and the response_code. */ struct ap_queue_status { - unsigned int queue_empty : 1; - unsigned int replies_waiting : 1; - unsigned int queue_full : 1; - unsigned int : 3; - unsigned int async : 1; - unsigned int irq_enabled : 1; - unsigned int response_code : 8; - unsigned int : 16; + union { + unsigned int value : 32; + struct { + unsigned int status_bits : 8; + unsigned int rc : 8; + unsigned int : 16; + }; + struct { + unsigned int queue_empty : 1; + unsigned int replies_waiting : 1; + unsigned int queue_full : 1; + unsigned int : 3; + unsigned int async : 1; + unsigned int irq_enabled : 1; + unsigned int response_code : 8; + unsigned int : 16; + }; + }; }; /* diff --git a/arch/s390/include/asm/arch-stackprotector.h b/arch/s390/include/asm/arch-stackprotector.h new file mode 100644 index 000000000000..953627259e91 --- /dev/null +++ b/arch/s390/include/asm/arch-stackprotector.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_ARCH_STACKPROTECTOR_H +#define _ASM_S390_ARCH_STACKPROTECTOR_H + +extern unsigned long __stack_chk_guard; +extern int stack_protector_debug; + +void __stack_protector_apply_early(unsigned long kernel_start); +int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start); + +static inline void stack_protector_apply_early(unsigned long kernel_start) +{ + if (IS_ENABLED(CONFIG_STACKPROTECTOR)) + __stack_protector_apply_early(kernel_start); +} + +static inline int stack_protector_apply(unsigned long *start, unsigned long *end) +{ + if (IS_ENABLED(CONFIG_STACKPROTECTOR)) + return __stack_protector_apply(start, end, 0); + return 0; +} + +#endif /* _ASM_S390_ARCH_STACKPROTECTOR_H */ diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index b6b619f340a5..0a82ae2300b6 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -18,6 +18,8 @@ #include <asm/scsw.h> +#define CCW_MAX_BYTE_COUNT 65535 + /** * struct ccw1 - channel command word * @cmd_code: command code diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h deleted file mode 100644 index 3cb9d813f022..000000000000 --- a/arch/s390/include/asm/compat.h +++ /dev/null @@ -1,140 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390X_COMPAT_H -#define _ASM_S390X_COMPAT_H -/* - * Architecture specific compatibility types - */ -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/thread_info.h> -#include <asm/ptrace.h> - -#define compat_mode_t compat_mode_t -typedef u16 compat_mode_t; - -#define __compat_uid_t __compat_uid_t -typedef u16 __compat_uid_t; -typedef u16 __compat_gid_t; - -#define compat_dev_t compat_dev_t -typedef u16 compat_dev_t; - -#define compat_ipc_pid_t compat_ipc_pid_t -typedef u16 compat_ipc_pid_t; - -#define compat_statfs compat_statfs - -#include <asm-generic/compat.h> - -#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \ - typeof(0?(__force t)0:0ULL), u64)) - -#define __SC_DELOUSE(t,v) ({ \ - BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \ - (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ -}) - -#define PSW32_MASK_USER 0x0000FF00UL - -#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \ - PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \ - PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \ - PSW32_ASC_PRIMARY) - -#define COMPAT_UTS_MACHINE "s390\0\0\0\0" - -typedef u16 compat_nlink_t; - -typedef struct { - u32 mask; - u32 addr; -} __aligned(8) psw_compat_t; - -typedef struct { - psw_compat_t psw; - u32 gprs[NUM_GPRS]; - u32 acrs[NUM_ACRS]; - u32 orig_gpr2; -} s390_compat_regs; - -typedef struct { - u32 gprs_high[NUM_GPRS]; -} s390_compat_regs_high; - -struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; - compat_ino_t st_ino; - compat_mode_t st_mode; - compat_nlink_t st_nlink; - __compat_uid_t st_uid; - __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; - u32 st_size; - u32 st_blksize; - u32 st_blocks; - u32 st_atime; - u32 st_atime_nsec; - u32 st_mtime; - u32 st_mtime_nsec; - u32 st_ctime; - u32 st_ctime_nsec; - u32 __unused4; - u32 __unused5; -}; - -struct compat_statfs { - u32 f_type; - u32 f_bsize; - u32 f_blocks; - u32 f_bfree; - u32 f_bavail; - u32 f_files; - u32 f_ffree; - compat_fsid_t f_fsid; - u32 f_namelen; - u32 f_frsize; - u32 f_flags; - u32 f_spare[4]; -}; - -struct compat_statfs64 { - u32 f_type; - u32 f_bsize; - u64 f_blocks; - u64 f_bfree; - u64 f_bavail; - u64 f_files; - u64 f_ffree; - compat_fsid_t f_fsid; - u32 f_namelen; - u32 f_frsize; - u32 f_flags; - u32 f_spare[5]; -}; - -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)(uptr & 0x7fffffffUL); -} -#define compat_ptr(uptr) compat_ptr(uptr) - -#ifdef CONFIG_COMPAT - -static inline int is_compat_task(void) -{ - return test_thread_flag(TIF_31BIT); -} - -#endif - -#endif /* _ASM_S390X_COMPAT_H */ diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 6c6a99660e78..d6fb999c8c6d 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -27,7 +27,6 @@ int cpu_have_feature(unsigned int nr); #define cpu_has_edat1() test_facility(8) #define cpu_has_edat2() test_facility(78) #define cpu_has_gs() test_facility(133) -#define cpu_has_idte() test_facility(3) #define cpu_has_nx() test_facility(130) #define cpu_has_rdp() test_facility(194) #define cpu_has_seq_insn() test_facility(85) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index a03df312081e..bb63fa4d20bb 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -162,8 +162,6 @@ enum { * ELF register definitions.. */ -#include <linux/compat.h> - #include <asm/ptrace.h> #include <asm/syscall.h> #include <asm/user.h> @@ -171,9 +169,6 @@ enum { typedef s390_fp_regs elf_fpregset_t; typedef s390_regs elf_gregset_t; -typedef s390_fp_regs compat_elf_fpregset_t; -typedef s390_compat_regs compat_elf_gregset_t; - #include <linux/sched/mm.h> /* for task_struct */ #include <asm/mmu_context.h> @@ -183,10 +178,6 @@ typedef s390_compat_regs compat_elf_gregset_t; #define elf_check_arch(x) \ (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ && (x)->e_ident[EI_CLASS] == ELF_CLASS) -#define compat_elf_check_arch(x) \ - (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ - && (x)->e_ident[EI_CLASS] == ELF_CLASS) -#define compat_start_thread start_thread31 /* For SVR4/S390 the function pointer to be registered with `atexit` is passed in R14. */ @@ -203,9 +194,7 @@ typedef s390_compat_regs compat_elf_gregset_t; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. 64-bit tasks are aligned to 4GB. */ -#define ELF_ET_DYN_BASE (is_compat_task() ? \ - (STACK_TOP / 3 * 2) : \ - (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) +#define ELF_ET_DYN_BASE ((STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ @@ -224,43 +213,21 @@ extern unsigned long elf_hwcap; extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) -#ifndef CONFIG_COMPAT #define SET_PERSONALITY(ex) \ do { \ set_personality(PER_LINUX | \ (current->personality & (~PER_MASK))); \ - current->thread.sys_call_table = sys_call_table; \ -} while (0) -#else /* CONFIG_COMPAT */ -#define SET_PERSONALITY(ex) \ -do { \ - if (personality(current->personality) != PER_LINUX32) \ - set_personality(PER_LINUX | \ - (current->personality & ~PER_MASK)); \ - if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ - set_thread_flag(TIF_31BIT); \ - current->thread.sys_call_table = \ - sys_call_table_emu; \ - } else { \ - clear_thread_flag(TIF_31BIT); \ - current->thread.sys_call_table = \ - sys_call_table; \ - } \ } while (0) -#endif /* CONFIG_COMPAT */ /* * Cache aliasing on the latest machines calls for a mapping granularity - * of 512KB for the anonymous mapping base. For 64-bit processes use a - * 512KB alignment and a randomization of up to 1GB. For 31-bit processes - * the virtual address space is limited, use no alignment and limit the - * randomization to 8MB. - * For the additional randomization of the program break use 32MB for - * 64-bit and 8MB for 31-bit. + * of 512KB for the anonymous mapping base. Use a 512KB alignment and a + * randomization of up to 1GB. + * For the additional randomization of the program break use 32MB. */ -#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL) -#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL) -#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL) +#define BRK_RND_MASK (0x1fffUL) +#define MMAP_RND_MASK (0x3ff80UL) +#define MMAP_ALIGN_MASK (0x7fUL) #define STACK_RND_MASK MMAP_RND_MASK /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index e99f8bca8e08..96727f3bd0dc 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLER__ #include <linux/instrumented.h> +#include <linux/kmsan.h> #include <asm/asm-extable.h> asm(".include \"asm/fpu-insn-asm.h\"\n"); @@ -393,6 +394,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) : [vxr] "=Q" (*(u8 *)vxr) : [index] "d" (index), [v1] "I" (v1) : "memory"); + kmsan_unpoison_memory(vxr, size); } #else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ @@ -409,6 +411,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) : [vxr] "=R" (*(u8 *)vxr) : [index] "d" (index), [v1] "I" (v1) : "memory", "1"); + kmsan_unpoison_memory(vxr, size); } #endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index bee2d16c2951..692c484ec163 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -105,28 +105,11 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsi } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ -/* - * Even though the system call numbers are identical for s390/s390x a - * different system call table is used for compat tasks. This may lead - * to e.g. incorrect or missing trace event sysfs files. - * Therefore simply do not trace compat system calls at all. - * See kernel/trace/trace_syscalls.c. - */ -#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS -static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) -{ - return is_compat_task(); -} - #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { - /* - * Skip __s390_ and __s390x_ prefix - due to compat wrappers - * and aliasing some symbols of 64 bit system call functions - * may get the __s390_ prefix instead of the __s390x_ prefix. - */ + /* Skip the __s390x_ prefix. */ return !strcmp(sym + 7, name) || !strcmp(sym + 8, name); } diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index ac68c657b28c..e5000ee6cdc6 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -181,6 +181,82 @@ static inline void idal_buffer_free(struct idal_buffer *ib) } /* + * Allocate an array of IDAL buffers to cover a total data size of @size. The + * resulting array is null-terminated. + * + * The amount of individual IDAL buffers is determined based on @size. + * Each IDAL buffer can have a maximum size of @CCW_MAX_BYTE_COUNT. + */ +static inline struct idal_buffer **idal_buffer_array_alloc(size_t size, int page_order) +{ + struct idal_buffer **ibs; + size_t ib_size; /* Size of a single idal buffer */ + int count; /* Amount of individual idal buffers */ + int i; + + count = (size + CCW_MAX_BYTE_COUNT - 1) / CCW_MAX_BYTE_COUNT; + ibs = kmalloc_array(count + 1, sizeof(*ibs), GFP_KERNEL); + for (i = 0; i < count; i++) { + /* Determine size for the current idal buffer */ + ib_size = min(size, CCW_MAX_BYTE_COUNT); + size -= ib_size; + ibs[i] = idal_buffer_alloc(ib_size, page_order); + if (IS_ERR(ibs[i])) { + while (i--) + idal_buffer_free(ibs[i]); + kfree(ibs); + ibs = NULL; + return ERR_PTR(-ENOMEM); + } + } + ibs[i] = NULL; + return ibs; +} + +/* + * Free array of IDAL buffers + */ +static inline void idal_buffer_array_free(struct idal_buffer ***ibs) +{ + struct idal_buffer **p; + + if (!ibs || !*ibs) + return; + for (p = *ibs; *p; p++) + idal_buffer_free(*p); + kfree(*ibs); + *ibs = NULL; +} + +/* + * Determine size of IDAL buffer array + */ +static inline int idal_buffer_array_size(struct idal_buffer **ibs) +{ + int size = 0; + + while (ibs && *ibs) { + size++; + ibs++; + } + return size; +} + +/* + * Determine total data size covered by IDAL buffer array + */ +static inline size_t idal_buffer_array_datasize(struct idal_buffer **ibs) +{ + size_t size = 0; + + while (ibs && *ibs) { + size += (*ibs)->size; + ibs++; + } + return size; +} + +/* * Test if a idal list is really needed. */ static inline bool __idal_buffer_is_needed(struct idal_buffer *ib) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index d9c853db9a40..50ffe75adeb4 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -100,7 +100,8 @@ struct lowcore { /* Save areas. */ __u64 save_area[8]; /* 0x0200 */ - __u8 pad_0x0240[0x0280-0x0240]; /* 0x0240 */ + __u64 stack_canary; /* 0x0240 */ + __u8 pad_0x0248[0x0280-0x0248]; /* 0x0248 */ __u64 save_area_restart[1]; /* 0x0280 */ __u64 pcpu; /* 0x0288 */ diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index ebeabd0aaa51..534d0320e2aa 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -77,6 +77,7 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs) #define PAI_SAVE_AREA(x) ((x)->hw.event_base) #define PAI_CPU_MASK(x) ((x)->hw.addr_filters) +#define PAI_PMU_IDX(x) ((x)->hw.last_tag) #define PAI_SWLIST(x) (&(x)->hw.tp_list) #endif diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 965886dfe954..5899f57f17d1 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -13,14 +13,6 @@ #define __my_cpu_offset get_lowcore()->percpu_offset /* - * For 64 bit module code, the module may be more than 4G above the - * per cpu area, use weak definitions to force the compiler to - * generate external references. - * Therefore, we have enabled CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU - * in the Kconfig. - */ - -/* * We use a compare-and-swap loop since that uses less cpu cycles than * disabling and enabling interrupts like the generic variant would do. */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b7100c6a4054..bca9b29778c3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -648,18 +648,6 @@ static inline int mm_uses_skeys(struct mm_struct *mm) return 0; } -static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new) -{ - union register_pair r1 = { .even = old, .odd = new, }; - unsigned long address = (unsigned long)ptr | 1; - - asm volatile( - " csp %[r1],%[address]" - : [r1] "+&d" (r1.pair), "+m" (*ptr) - : [address] "d" (address) - : "cc"); -} - /** * cspg() - Compare and Swap and Purge (CSPG) * @ptr: Pointer to the value to be exchanged @@ -1154,17 +1142,15 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local) { unsigned long pto; pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1); - asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]" + asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]" : "+m" (*ptep) - : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt), - [asce] "a" (asce), [m4] "i" (local)); + : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK), + [m4] "i" (local)); } static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, @@ -1348,7 +1334,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * A local RDP can be used to do the flush. */ if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) - __ptep_rdp(address, ptep, 0, 0, 1); + __ptep_rdp(address, ptep, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault @@ -1402,7 +1388,6 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr, int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste); -void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); @@ -1692,10 +1677,10 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ -static inline void __pmdp_csp(pmd_t *pmdp) +static inline void __pmdp_cspg(pmd_t *pmdp) { - csp((unsigned int *)pmdp + 1, pmd_val(*pmdp), - pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + cspg((unsigned long *)pmdp, pmd_val(*pmdp), + pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); } #define IDTE_GLOBAL 0 diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 93e1034485d7..3affba95845b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -119,18 +119,12 @@ extern void execve_tail(void); unsigned long vdso_text_size(void); unsigned long vdso_size(void); -/* - * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. - */ - -#define TASK_SIZE (test_thread_flag(TIF_31BIT) ? \ - _REGION3_SIZE : TASK_SIZE_MAX) -#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ - (_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1)) +#define TASK_SIZE (TASK_SIZE_MAX) +#define TASK_UNMAPPED_BASE (_REGION2_SIZE >> 1) #define TASK_SIZE_MAX (-PAGE_SIZE) #define VDSO_BASE (STACK_TOP + PAGE_SIZE) -#define VDSO_LIMIT (test_thread_flag(TIF_31BIT) ? _REGION3_SIZE : _REGION2_SIZE) +#define VDSO_LIMIT (_REGION2_SIZE) #define STACK_TOP (VDSO_LIMIT - vdso_size() - PAGE_SIZE) #define STACK_TOP_MAX (_REGION2_SIZE - vdso_size() - PAGE_SIZE) @@ -181,7 +175,6 @@ struct thread_struct { unsigned long system_timer; /* task cputime in kernel space */ unsigned long hardirq_timer; /* task cputime in hardirq context */ unsigned long softirq_timer; /* task cputime in softirq context */ - const sys_call_ptr_t *sys_call_table; /* system call table address */ union teid gmap_teid; /* address and flags of last gmap fault */ unsigned int gmap_int_code; /* int code of last gmap fault */ int ufpu_flags; /* user fpu flags */ @@ -379,14 +372,19 @@ static inline void local_mcck_enable(void) /* * Rewind PSW instruction address by specified number of bytes. */ -static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) +static inline unsigned long __rewind_psw(psw_t psw, long ilen) { unsigned long mask; mask = (psw.mask & PSW_MASK_EA) ? -1UL : (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : (1UL << 24) - 1; - return (psw.addr - ilc) & mask; + return (psw.addr - ilen) & mask; +} + +static inline unsigned long __forward_psw(psw_t psw, long ilen) +{ + return __rewind_psw(psw, -ilen); } /* diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index dfa770b15fad..962cf042c66d 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -8,16 +8,19 @@ #define _S390_PTRACE_H #include <linux/bits.h> +#include <linux/typecheck.h> #include <uapi/asm/ptrace.h> #include <asm/thread_info.h> #include <asm/tpi.h> #define PIF_SYSCALL 0 /* inside a system call */ +#define PIF_PSW_ADDR_ADJUSTED 1 /* psw address has been adjusted */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) +#define _PIF_ADDR_PSW_ADJUSTED BIT(PIF_PSW_ADDR_ADJUSTED) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) @@ -99,7 +102,7 @@ enum { typedef struct { unsigned int mask; unsigned int addr; -} psw_t32 __aligned(8); +} psw32_t __aligned(8); #define PGM_INT_CODE_MASK 0x7f #define PGM_INT_CODE_PER 0x80 diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h index 71d46f0ba97b..f904b674fee0 100644 --- a/arch/s390/include/asm/seccomp.h +++ b/arch/s390/include/asm/seccomp.h @@ -19,10 +19,5 @@ #define SECCOMP_ARCH_NATIVE AUDIT_ARCH_S390X #define SECCOMP_ARCH_NATIVE_NR NR_syscalls #define SECCOMP_ARCH_NATIVE_NAME "s390x" -#ifdef CONFIG_COMPAT -# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_S390 -# define SECCOMP_ARCH_COMPAT_NR NR_syscalls -# define SECCOMP_ARCH_COMPAT_NAME "s390" -#endif #endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 03f4d01664f8..fb2bdbf35da5 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -43,7 +43,7 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern void smp_call_ipl_cpu(void (*func)(void *), void *); +extern void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data); extern void smp_emergency_stop(void); extern int smp_find_processor_id(u16 address); diff --git a/arch/s390/include/asm/stackprotector.h b/arch/s390/include/asm/stackprotector.h new file mode 100644 index 000000000000..0497850103dd --- /dev/null +++ b/arch/s390/include/asm/stackprotector.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_STACKPROTECTOR_H +#define _ASM_S390_STACKPROTECTOR_H + +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/lowcore.h> + +static __always_inline void boot_init_stack_canary(void) +{ + current->stack_canary = get_random_canary(); + get_lowcore()->stack_canary = current->stack_canary; +} + +#endif /* _ASM_S390_STACKPROTECTOR_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 10ce5c4ccbd6..4271e4169f45 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -15,7 +15,6 @@ #include <asm/ptrace.h> extern const sys_call_ptr_t sys_call_table[]; -extern const sys_call_ptr_t sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) @@ -46,15 +45,7 @@ static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->gprs[2]; -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) { - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. - */ - error = (long)(int)error; - } -#endif + return IS_ERR_VALUE(error) ? error : 0; } @@ -78,10 +69,6 @@ static inline void syscall_get_arguments(struct task_struct *task, { unsigned long mask = -1UL; -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) - mask = 0xffffffff; -#endif for (int i = 1; i < 6; i++) args[i] = regs->gprs[2 + i] & mask; @@ -99,10 +86,6 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) - return AUDIT_ARCH_S390; -#endif return AUDIT_ARCH_S390X; } diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 35c1d1b860d8..9eb58d5348d8 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -13,101 +13,12 @@ ,, regs->orig_gpr2,, regs->gprs[3],, regs->gprs[4] \ ,, regs->gprs[5],, regs->gprs[6],, regs->gprs[7]) -#ifdef CONFIG_COMPAT - -#define __SC_COMPAT_CAST(t, a) \ -({ \ - long __ReS = a; \ - \ - BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ - !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t) && \ - !__TYPE_IS_LL(t)); \ - if (__TYPE_IS_L(t)) \ - __ReS = (s32)a; \ - if (__TYPE_IS_UL(t)) \ - __ReS = (u32)a; \ - if (__TYPE_IS_PTR(t)) \ - __ReS = a & 0x7fffffff; \ - if (__TYPE_IS_LL(t)) \ - return -ENOSYS; \ - (t)__ReS; \ -}) - -/* - * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias - * named __s390x_sys_*() - */ -#define COMPAT_SYSCALL_DEFINE0(sname) \ - long __s390_compat_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \ - long __s390_compat_sys_##sname(void) - -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - long __s390_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__s390_sys_##sname, ERRNO); \ - long __s390x_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ - static inline long __do_sys_##sname(void); \ - long __s390_sys_##sname(void) \ - { \ - return __do_sys_##sname(); \ - } \ - long __s390x_sys_##sname(void) \ - { \ - return __do_sys_##sname(); \ - } \ - static inline long __do_sys_##sname(void) - -#define COND_SYSCALL(name) \ - cond_syscall(__s390x_sys_##name); \ - cond_syscall(__s390_sys_##name) - -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - long __s390_compat_sys##name(struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ - static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \ - static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)); \ - long __s390_compat_sys##name(struct pt_regs *regs) \ - { \ - return __se_compat_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \ - } \ - static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \ - { \ - __MAP(x, __SC_TEST, __VA_ARGS__); \ - return __do_compat_sys##name(__MAP(x, __SC_DELOUSE, __VA_ARGS__)); \ - } \ - static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)) - -/* - * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. - */ -#define COND_SYSCALL_COMPAT(name) \ - cond_syscall(__s390_compat_sys_##name) - -#define __S390_SYS_STUBx(x, name, ...) \ - long __s390_sys##name(struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ - static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \ - long __s390_sys##name(struct pt_regs *regs) \ - { \ - return ___se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \ - } \ - static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \ - { \ - __MAP(x, __SC_TEST, __VA_ARGS__); \ - return __do_sys##name(__MAP(x, __SC_COMPAT_CAST, __VA_ARGS__)); \ - } - -#else /* CONFIG_COMPAT */ - #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - long __s390x_sys_##sname(void); \ + long __s390x_sys_##sname(struct pt_regs *__unused); \ ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ static inline long __do_sys_##sname(void); \ - long __s390x_sys_##sname(void) \ + long __s390x_sys_##sname(struct pt_regs *__unused) \ { \ return __do_sys_##sname(); \ } \ @@ -118,8 +29,6 @@ #define __S390_SYS_STUBx(x, fullname, name, ...) -#endif /* CONFIG_COMPAT */ - #define __SYSCALL_DEFINEx(x, name, ...) \ long __s390x_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 7878e9bfbf07..6a548a819400 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -69,7 +69,6 @@ void arch_setup_new_exec(void); #define TIF_GUARDED_STORAGE 17 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 18 /* Run KVM guests with isolated BP */ #define TIF_PER_TRAP 19 /* Need to handle PER trap on exit to usermode */ -#define TIF_31BIT 20 /* 32bit process */ #define TIF_SINGLE_STEP 21 /* This task is single stepped */ #define TIF_BLOCK_STEP 22 /* This task is block stepped */ #define TIF_UPROBE_SINGLESTEP 23 /* This task is uprobe single stepped */ @@ -78,7 +77,6 @@ void arch_setup_new_exec(void); #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) -#define _TIF_31BIT BIT(TIF_31BIT) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) #define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP) #define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 75491baa2197..163ccbbe8c47 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -35,9 +35,9 @@ static inline void __tlb_flush_idte(unsigned long asce) */ static inline void __tlb_flush_global(void) { - unsigned int dummy = 0; + unsigned long dummy = 0; - csp(&dummy, 0, 0); + cspg(&dummy, 0, 0); } /* @@ -54,7 +54,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); - if (cpu_has_idte() && gmap_asce != -1UL) { + if (gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); @@ -68,10 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) static inline void __tlb_flush_kernel(void) { - if (cpu_has_idte()) - __tlb_flush_idte(init_mm.context.asce); - else - __tlb_flush_global(); + __tlb_flush_idte(init_mm.context.asce); } static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) @@ -86,7 +83,6 @@ static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) /* * TLB flushing: - * flush_tlb() - flushes the current mm struct TLBs * flush_tlb_all() - flushes all processes TLBs * flush_tlb_mm(mm) - flushes the specified mm context TLB's * flush_tlb_page(vma, vmaddr) - flushes one page @@ -102,7 +98,6 @@ static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) * only one user. At the end of the update the flush_tlb_mm and * flush_tlb_range functions need to do the flush. */ -#define flush_tlb() do { } while (0) #define flush_tlb_all() do { } while (0) #define flush_tlb_page(vma, addr) do { } while (0) diff --git a/arch/s390/include/asm/trace/ap.h b/arch/s390/include/asm/trace/ap.h new file mode 100644 index 000000000000..5c2e6c664b4d --- /dev/null +++ b/arch/s390/include/asm/trace/ap.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Tracepoint definitions for s390 ap bus related trace events + * + * There are two AP bus related tracepoint events defined here: + * There is a tracepoint s390_ap_nqap event immediately after a request + * has been pushed into the AP firmware queue with the NQAP AP command. + * The other tracepoint s390_ap_dqap event fires immediately after a + * reply has been pulled out of the AP firmware queue via DQAP AP command. + * The idea of these two trace events focuses on performance to measure + * the runtime of a crypto request/reply as close as possible at the + * firmware level. In combination with the two zcrypt tracepoints (see the + * zcrypt.h trace event definition file) this gives measurement data about + * the runtime of a request/reply within the zcrpyt and AP bus layer. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM s390 + +#if !defined(_TRACE_S390_AP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_S390_AP_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(s390_ap_nqapdqap_template, + TP_PROTO(u16 card, u16 dom, u32 status, u64 psmid), + TP_ARGS(card, dom, status, psmid), + TP_STRUCT__entry( + __field(u16, card) + __field(u16, dom) + __field(u32, status) + __field(u64, psmid)), + TP_fast_assign( + __entry->card = card; + __entry->dom = dom; + __entry->status = status; + __entry->psmid = psmid;), + TP_printk("card=%u dom=%u status=0x%08x psmid=0x%016lx", + (unsigned short)__entry->card, + (unsigned short)__entry->dom, + (unsigned int)__entry->status, + (unsigned long)__entry->psmid) +); + +/** + * trace_s390_ap_nqap - ap msg nqap tracepoint function + * @card: Crypto card number addressed. + * @dom: Domain within the crypto card addressed. + * @status: AP queue status (GR1 on return of nqap). + * @psmid: Unique id identifying this request/reply. + * + * Called immediately after a request has been enqueued into + * the AP firmware queue with the NQAP command. + */ +DEFINE_EVENT(s390_ap_nqapdqap_template, + s390_ap_nqap, + TP_PROTO(u16 card, u16 dom, u32 status, u64 psmid), + TP_ARGS(card, dom, status, psmid) +); + +/** + * trace_s390_ap_dqap - ap msg dqap tracepoint function + * @card: Crypto card number addressed. + * @dom: Domain within the crypto card addressed. + * @status: AP queue status (GR1 on return of dqap). + * @psmid: Unique id identifying this request/reply. + * + * Called immediately after a reply has been dequeued from + * the AP firmware queue with the DQAP command. + */ +DEFINE_EVENT(s390_ap_nqapdqap_template, + s390_ap_dqap, + TP_PROTO(u16 card, u16 dom, u32 status, u64 psmid), + TP_ARGS(card, dom, status, psmid) +); + +#endif /* _TRACE_S390_AP_H */ + +/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH asm/trace +#define TRACE_INCLUDE_FILE ap + +#include <trace/define_trace.h> diff --git a/arch/s390/include/asm/trace/zcrypt.h b/arch/s390/include/asm/trace/zcrypt.h index 457ddaa99e19..bfb01e335f31 100644 --- a/arch/s390/include/asm/trace/zcrypt.h +++ b/arch/s390/include/asm/trace/zcrypt.h @@ -2,7 +2,7 @@ /* * Tracepoint definitions for the s390 zcrypt device driver * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016,2025 * Author(s): Harald Freudenberger <freude@de.ibm.com> * * Currently there are two tracepoint events defined here. @@ -73,14 +73,15 @@ TRACE_EVENT(s390_zcrypt_req, /** * trace_s390_zcrypt_rep - zcrypt reply tracepoint function - * @ptr: Address of the local buffer where the request from userspace - * is stored. Can be used as a unique id to match together - * request and reply. - * @fc: Function code. - * @rc: The bare returncode as returned by the device driver ioctl - * function. - * @dev: The adapter nr where this request was actually processed. - * @dom: Domain id of the device where this request was processed. + * @ptr: Address of the local buffer where the request from userspace + * is stored. Can be used as a unique id to match together + * request and reply. + * @fc: Function code. + * @rc: The bare returncode as returned by the device driver ioctl + * function. + * @card: The adapter nr where this request was actually processed. + * @dom: Domain id of the device where this request was processed. + * @psmid: Unique id identifying this request/reply. * * Called upon recognising the reply from the crypto adapter. This * message may act as the exit timestamp for the request but also @@ -88,26 +89,29 @@ TRACE_EVENT(s390_zcrypt_req, * and the returncode from the device driver. */ TRACE_EVENT(s390_zcrypt_rep, - TP_PROTO(void *ptr, u32 fc, u32 rc, u16 dev, u16 dom), - TP_ARGS(ptr, fc, rc, dev, dom), + TP_PROTO(void *ptr, u32 fc, u32 rc, u16 card, u16 dom, u64 psmid), + TP_ARGS(ptr, fc, rc, card, dom, psmid), TP_STRUCT__entry( __field(void *, ptr) __field(u32, fc) __field(u32, rc) - __field(u16, device) - __field(u16, domain)), + __field(u16, card) + __field(u16, dom) + __field(u64, psmid)), TP_fast_assign( __entry->ptr = ptr; __entry->fc = fc; __entry->rc = rc; - __entry->device = dev; - __entry->domain = dom;), - TP_printk("ptr=%p fc=0x%04x rc=%d dev=0x%02hx domain=0x%04hx", + __entry->card = card; + __entry->dom = dom; + __entry->psmid = psmid;), + TP_printk("ptr=%p fc=0x%04x rc=%d card=%u dom=%u psmid=0x%016lx", __entry->ptr, - (unsigned int) __entry->fc, - (int) __entry->rc, - (unsigned short) __entry->device, - (unsigned short) __entry->domain) + (unsigned int)__entry->fc, + (int)__entry->rc, + (unsigned short)__entry->card, + (unsigned short)__entry->dom, + (unsigned long)__entry->psmid) ); #endif /* _TRACE_S390_ZCRYPT_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 3e5b8b677057..c5e02addcd67 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -468,8 +468,8 @@ do { \ #endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_nofault __mvc_kernel_nofault -#define __put_kernel_nofault __mvc_kernel_nofault +#define arch_get_kernel_nofault __mvc_kernel_nofault +#define arch_put_kernel_nofault __mvc_kernel_nofault void __cmpxchg_user_key_called_with_bad_pointer(void); diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 70fc671397da..921c3fb3586b 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -8,7 +8,8 @@ #define _ASM_S390_UNISTD_H_ #include <uapi/asm/unistd.h> -#include <asm/unistd_nr.h> + +#define NR_syscalls (__NR_syscalls) #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_OLD_READDIR @@ -27,11 +28,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -# ifdef CONFIG_COMPAT -# define __ARCH_WANT_COMPAT_STAT -# define __ARCH_WANT_SYS_TIME32 -# define __ARCH_WANT_SYS_UTIME32 -# endif #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/s390/include/asm/vdso-symbols.h b/arch/s390/include/asm/vdso-symbols.h index 0df17574d788..e3561e67c4e3 100644 --- a/arch/s390/include/asm/vdso-symbols.h +++ b/arch/s390/include/asm/vdso-symbols.h @@ -2,16 +2,8 @@ #ifndef __S390_VDSO_SYMBOLS_H__ #define __S390_VDSO_SYMBOLS_H__ -#include <generated/vdso64-offsets.h> -#ifdef CONFIG_COMPAT -#include <generated/vdso32-offsets.h> -#endif +#include <generated/vdso-offsets.h> -#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) -#ifdef CONFIG_COMPAT -#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) -#else -#define VDSO32_SYMBOL(tsk, name) (-1UL) -#endif +#define VDSO_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso_offset_##name)) #endif /* __S390_VDSO_SYMBOLS_H__ */ diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h index cceaf47b021a..7af27a985f25 100644 --- a/arch/s390/include/uapi/asm/bitsperlong.h +++ b/arch/s390/include/uapi/asm/bitsperlong.h @@ -2,11 +2,7 @@ #ifndef __ASM_S390_BITSPERLONG_H #define __ASM_S390_BITSPERLONG_H -#ifndef __s390x__ -#define __BITS_PER_LONG 32 -#else #define __BITS_PER_LONG 64 -#endif #include <asm-generic/bitsperlong.h> diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h index 1030cd186899..9277e76d6d72 100644 --- a/arch/s390/include/uapi/asm/ipcbuf.h +++ b/arch/s390/include/uapi/asm/ipcbuf.h @@ -24,9 +24,6 @@ struct ipc64_perm __kernel_mode_t mode; unsigned short __pad1; unsigned short seq; -#ifndef __s390x__ - unsigned short __pad2; -#endif /* ! __s390x__ */ unsigned long __unused1; unsigned long __unused2; }; diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h index 1913613e71b6..ad5ab940d192 100644 --- a/arch/s390/include/uapi/asm/posix_types.h +++ b/arch/s390/include/uapi/asm/posix_types.h @@ -26,17 +26,6 @@ typedef unsigned short __kernel_old_gid_t; #define __kernel_old_uid_t __kernel_old_uid_t #endif -#ifndef __s390x__ - -typedef unsigned long __kernel_ino_t; -typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_ipc_pid_t; -typedef unsigned short __kernel_uid_t; -typedef unsigned short __kernel_gid_t; -typedef int __kernel_ptrdiff_t; - -#else /* __s390x__ */ - typedef unsigned int __kernel_ino_t; typedef unsigned int __kernel_mode_t; typedef int __kernel_ipc_pid_t; @@ -45,8 +34,6 @@ typedef unsigned int __kernel_gid_t; typedef long __kernel_ptrdiff_t; typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ -#endif /* __s390x__ */ - #define __kernel_ino_t __kernel_ino_t #define __kernel_mode_t __kernel_mode_t #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index ea202072f1ad..ea29ba470e5a 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -14,94 +14,6 @@ * Offsets in the user_regs_struct. They are used for the ptrace * system call and in entry.S */ -#ifndef __s390x__ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x04 -#define PT_GPR0 0x08 -#define PT_GPR1 0x0C -#define PT_GPR2 0x10 -#define PT_GPR3 0x14 -#define PT_GPR4 0x18 -#define PT_GPR5 0x1C -#define PT_GPR6 0x20 -#define PT_GPR7 0x24 -#define PT_GPR8 0x28 -#define PT_GPR9 0x2C -#define PT_GPR10 0x30 -#define PT_GPR11 0x34 -#define PT_GPR12 0x38 -#define PT_GPR13 0x3C -#define PT_GPR14 0x40 -#define PT_GPR15 0x44 -#define PT_ACR0 0x48 -#define PT_ACR1 0x4C -#define PT_ACR2 0x50 -#define PT_ACR3 0x54 -#define PT_ACR4 0x58 -#define PT_ACR5 0x5C -#define PT_ACR6 0x60 -#define PT_ACR7 0x64 -#define PT_ACR8 0x68 -#define PT_ACR9 0x6C -#define PT_ACR10 0x70 -#define PT_ACR11 0x74 -#define PT_ACR12 0x78 -#define PT_ACR13 0x7C -#define PT_ACR14 0x80 -#define PT_ACR15 0x84 -#define PT_ORIGGPR2 0x88 -#define PT_FPC 0x90 -/* - * A nasty fact of life that the ptrace api - * only supports passing of longs. - */ -#define PT_FPR0_HI 0x98 -#define PT_FPR0_LO 0x9C -#define PT_FPR1_HI 0xA0 -#define PT_FPR1_LO 0xA4 -#define PT_FPR2_HI 0xA8 -#define PT_FPR2_LO 0xAC -#define PT_FPR3_HI 0xB0 -#define PT_FPR3_LO 0xB4 -#define PT_FPR4_HI 0xB8 -#define PT_FPR4_LO 0xBC -#define PT_FPR5_HI 0xC0 -#define PT_FPR5_LO 0xC4 -#define PT_FPR6_HI 0xC8 -#define PT_FPR6_LO 0xCC -#define PT_FPR7_HI 0xD0 -#define PT_FPR7_LO 0xD4 -#define PT_FPR8_HI 0xD8 -#define PT_FPR8_LO 0XDC -#define PT_FPR9_HI 0xE0 -#define PT_FPR9_LO 0xE4 -#define PT_FPR10_HI 0xE8 -#define PT_FPR10_LO 0xEC -#define PT_FPR11_HI 0xF0 -#define PT_FPR11_LO 0xF4 -#define PT_FPR12_HI 0xF8 -#define PT_FPR12_LO 0xFC -#define PT_FPR13_HI 0x100 -#define PT_FPR13_LO 0x104 -#define PT_FPR14_HI 0x108 -#define PT_FPR14_LO 0x10C -#define PT_FPR15_HI 0x110 -#define PT_FPR15_LO 0x114 -#define PT_CR_9 0x118 -#define PT_CR_10 0x11C -#define PT_CR_11 0x120 -#define PT_IEEE_IP 0x13C -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x140-1 - -#define GPR_SIZE 4 -#define CR_SIZE 4 - -#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ - -#else /* __s390x__ */ - #define PT_PSWMASK 0x00 #define PT_PSWADDR 0x08 #define PT_GPR0 0x10 @@ -166,38 +78,6 @@ #define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ -#endif /* __s390x__ */ - -#ifndef __s390x__ - -#define PSW_MASK_PER _AC(0x40000000, UL) -#define PSW_MASK_DAT _AC(0x04000000, UL) -#define PSW_MASK_IO _AC(0x02000000, UL) -#define PSW_MASK_EXT _AC(0x01000000, UL) -#define PSW_MASK_KEY _AC(0x00F00000, UL) -#define PSW_MASK_BASE _AC(0x00080000, UL) /* always one */ -#define PSW_MASK_MCHECK _AC(0x00040000, UL) -#define PSW_MASK_WAIT _AC(0x00020000, UL) -#define PSW_MASK_PSTATE _AC(0x00010000, UL) -#define PSW_MASK_ASC _AC(0x0000C000, UL) -#define PSW_MASK_CC _AC(0x00003000, UL) -#define PSW_MASK_PM _AC(0x00000F00, UL) -#define PSW_MASK_RI _AC(0x00000000, UL) -#define PSW_MASK_EA _AC(0x00000000, UL) -#define PSW_MASK_BA _AC(0x00000000, UL) - -#define PSW_MASK_USER _AC(0x0000FF00, UL) - -#define PSW_ADDR_AMODE _AC(0x80000000, UL) -#define PSW_ADDR_INSN _AC(0x7FFFFFFF, UL) - -#define PSW_ASC_PRIMARY _AC(0x00000000, UL) -#define PSW_ASC_ACCREG _AC(0x00004000, UL) -#define PSW_ASC_SECONDARY _AC(0x00008000, UL) -#define PSW_ASC_HOME _AC(0x0000C000, UL) - -#else /* __s390x__ */ - #define PSW_MASK_PER _AC(0x4000000000000000, UL) #define PSW_MASK_DAT _AC(0x0400000000000000, UL) #define PSW_MASK_IO _AC(0x0200000000000000, UL) @@ -224,8 +104,6 @@ #define PSW_ASC_SECONDARY _AC(0x0000800000000000, UL) #define PSW_ASC_HOME _AC(0x0000C00000000000, UL) -#endif /* __s390x__ */ - #define NUM_GPRS 16 #define NUM_FPRS 16 #define NUM_CRS 16 @@ -308,9 +186,7 @@ typedef struct { #define PER_EM_MASK 0xE8000000UL typedef struct { -#ifdef __s390x__ unsigned : 32; -#endif /* __s390x__ */ unsigned em_branching : 1; unsigned em_instruction_fetch : 1; /* diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h index 8b35033334c4..7c90b30c50fd 100644 --- a/arch/s390/include/uapi/asm/sigcontext.h +++ b/arch/s390/include/uapi/asm/sigcontext.h @@ -17,24 +17,12 @@ #define __NUM_VXRS_LOW 16 #define __NUM_VXRS_HIGH 16 -#ifndef __s390x__ - -/* Has to be at least _NSIG_WORDS from asm/signal.h */ -#define _SIGCONTEXT_NSIG 64 -#define _SIGCONTEXT_NSIG_BPW 32 -/* Size of stack frame allocated when calling signal handler. */ -#define __SIGNAL_FRAMESIZE 96 - -#else /* __s390x__ */ - /* Has to be at least _NSIG_WORDS from asm/signal.h */ #define _SIGCONTEXT_NSIG 64 #define _SIGCONTEXT_NSIG_BPW 64 /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 160 -#endif /* __s390x__ */ - #define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW) #define _SIGMASK_COPY_SIZE (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS) @@ -66,9 +54,6 @@ typedef struct typedef struct { -#ifndef __s390x__ - unsigned long gprs_high[__NUM_GPRS]; -#endif unsigned long long vxrs_low[__NUM_VXRS_LOW]; __vector128 vxrs_high[__NUM_VXRS_HIGH]; unsigned char __reserved[128]; diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h index ac253d23606b..21599298c2f5 100644 --- a/arch/s390/include/uapi/asm/stat.h +++ b/arch/s390/include/uapi/asm/stat.h @@ -8,74 +8,6 @@ #ifndef _S390_STAT_H #define _S390_STAT_H -#ifndef __s390x__ -struct __old_kernel_stat { - unsigned short st_dev; - unsigned short st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - unsigned long st_size; - unsigned long st_atime; - unsigned long st_mtime; - unsigned long st_ctime; -}; - -struct stat { - unsigned short st_dev; - unsigned short __pad1; - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - unsigned short __pad2; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - unsigned long __unused4; - unsigned long __unused5; -}; - -/* This matches struct stat64 in glibc2.1, hence the absolutely - * insane amounts of padding around dev_t's. - */ -struct stat64 { - unsigned long long st_dev; - unsigned int __pad1; -#define STAT64_HAS_BROKEN_ST_INO 1 - unsigned long __st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned long st_uid; - unsigned long st_gid; - unsigned long long st_rdev; - unsigned int __pad3; - long long st_size; - unsigned long st_blksize; - unsigned char __pad4[4]; - unsigned long __pad5; /* future possible st_blocks high bits */ - unsigned long st_blocks; /* Number 512-byte blocks allocated. */ - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; /* will be high 32 bits of ctime someday */ - unsigned long long st_ino; -}; - -#else /* __s390x__ */ - struct stat { unsigned long st_dev; unsigned long st_ino; @@ -97,8 +29,6 @@ struct stat { unsigned long __unused[3]; }; -#endif /* __s390x__ */ - #define STAT_HAVE_NSEC 1 #endif diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 01b5fe8b9db6..b0c5afe19db2 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -8,10 +8,6 @@ #ifndef _UAPI_ASM_S390_UNISTD_H_ #define _UAPI_ASM_S390_UNISTD_H_ -#ifdef __s390x__ #include <asm/unistd_64.h> -#else -#include <asm/unistd_32.h> -#endif #endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index eb06ff888314..42c83d60d6fa 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -36,7 +36,7 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls -obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o +obj-y := head.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o @@ -56,9 +56,6 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o -compat-obj-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o -obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += mcount.o @@ -70,7 +67,7 @@ obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o - +obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o obj-$(CONFIG_CERT_STORE) += cert_store.o @@ -79,10 +76,9 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o -obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o +obj-$(CONFIG_PERF_EVENTS) += perf_pai.o obj-$(CONFIG_TRACEPOINTS) += trace.o # vdso -obj-y += vdso64/ -obj-$(CONFIG_COMPAT) += vdso32/ +obj-y += vdso/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index a8915663e917..cfe27f6579e3 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -21,6 +21,9 @@ int main(void) OFFSET(__TASK_stack, task_struct, stack); OFFSET(__TASK_thread, task_struct, thread); OFFSET(__TASK_pid, task_struct, pid); +#ifdef CONFIG_STACKPROTECTOR + OFFSET(__TASK_stack_canary, task_struct, stack_canary); +#endif BLANK(); /* thread struct offsets */ OFFSET(__THREAD_ksp, thread_struct, ksp); @@ -139,6 +142,7 @@ int main(void) OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ + OFFSET(__LC_STACK_CANARY, lowcore, stack_canary); OFFSET(__LC_DUMP_REIPL, lowcore, ipib); OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info); OFFSET(__LC_OS_INFO, lowcore, os_info); diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c index 02051a596b87..7897d9411e13 100644 --- a/arch/s390/kernel/audit.c +++ b/arch/s390/kernel/audit.c @@ -3,7 +3,6 @@ #include <linux/types.h> #include <linux/audit.h> #include <asm/unistd.h> -#include "audit.h" static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> @@ -32,19 +31,11 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { -#ifdef CONFIG_COMPAT - if (arch == AUDIT_ARCH_S390) - return 1; -#endif return 0; } int audit_classify_syscall(int abi, unsigned syscall) { -#ifdef CONFIG_COMPAT - if (abi == AUDIT_ARCH_S390) - return s390_classify_syscall(syscall); -#endif switch(syscall) { case __NR_open: return AUDITSC_OPEN; @@ -63,13 +54,6 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { -#ifdef CONFIG_COMPAT - audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class); - audit_register_class(AUDIT_CLASS_READ_32, s390_read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class); - audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class); -#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h deleted file mode 100644 index 4d4b596412ec..000000000000 --- a/arch/s390/kernel/audit.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ARCH_S390_KERNEL_AUDIT_H -#define __ARCH_S390_KERNEL_AUDIT_H - -#include <linux/types.h> - -#ifdef CONFIG_COMPAT -extern int s390_classify_syscall(unsigned); -extern __u32 s390_dir_class[]; -extern __u32 s390_write_class[]; -extern __u32 s390_read_class[]; -extern __u32 s390_chattr_class[]; -extern __u32 s390_signal_class[]; -#endif /* CONFIG_COMPAT */ - -#endif /* __ARCH_S390_KERNEL_AUDIT_H */ diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c deleted file mode 100644 index a7c46e8310f0..000000000000 --- a/arch/s390/kernel/compat_audit.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#undef __s390x__ -#include <linux/audit_arch.h> -#include <asm/unistd.h> -#include "audit.h" - -unsigned s390_dir_class[] = { -#include <asm-generic/audit_dir_write.h> -~0U -}; - -unsigned s390_chattr_class[] = { -#include <asm-generic/audit_change_attr.h> -~0U -}; - -unsigned s390_write_class[] = { -#include <asm-generic/audit_write.h> -~0U -}; - -unsigned s390_read_class[] = { -#include <asm-generic/audit_read.h> -~0U -}; - -unsigned s390_signal_class[] = { -#include <asm-generic/audit_signal.h> -~0U -}; - -int s390_classify_syscall(unsigned syscall) -{ - switch(syscall) { - case __NR_open: - return AUDITSC_OPEN; - case __NR_openat: - return AUDITSC_OPENAT; - case __NR_socketcall: - return AUDITSC_SOCKETCALL; - case __NR_execve: - return AUDITSC_EXECVE; - case __NR_openat2: - return AUDITSC_OPENAT2; - default: - return AUDITSC_COMPAT; - } -} diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c deleted file mode 100644 index f9d418d1b619..000000000000 --- a/arch/s390/kernel/compat_linux.c +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * S390 version - * Copyright IBM Corp. 2000 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Gerhard Tonn (ton@de.ibm.com) - * Thomas Spatzier (tspat@de.ibm.com) - * - * Conversion between 31bit and 64bit native syscalls. - * - * Heavily inspired by the 32-bit Sparc compat code which is - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * - */ - - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/file.h> -#include <linux/signal.h> -#include <linux/resource.h> -#include <linux/times.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/uio.h> -#include <linux/quota.h> -#include <linux/poll.h> -#include <linux/personality.h> -#include <linux/stat.h> -#include <linux/filter.h> -#include <linux/highmem.h> -#include <linux/mman.h> -#include <linux/ipv6.h> -#include <linux/in.h> -#include <linux/icmpv6.h> -#include <linux/syscalls.h> -#include <linux/sysctl.h> -#include <linux/binfmts.h> -#include <linux/capability.h> -#include <linux/compat.h> -#include <linux/vfs.h> -#include <linux/ptrace.h> -#include <linux/fadvise.h> -#include <linux/ipc.h> -#include <linux/slab.h> - -#include <asm/types.h> -#include <linux/uaccess.h> - -#include <net/scm.h> -#include <net/sock.h> - -#include "compat_linux.h" - -#ifdef CONFIG_SYSVIPC -COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, - compat_ulong_t, third, compat_uptr_t, ptr) -{ - if (call >> 16) /* hack for backward compatibility */ - return -EINVAL; - return compat_ksys_ipc(call, first, second, third, ptr, third); -} -#endif - -COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) -{ - return ksys_truncate(path, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) -{ - return ksys_ftruncate(fd, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, - compat_size_t, count, u32, high, u32, low) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, - compat_size_t, count, u32, high, u32, low) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); -} - -COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) -{ - return ksys_readahead(fd, (unsigned long)high << 32 | low, count); -} - -struct stat64_emu31 { - unsigned long long st_dev; - unsigned int __pad1; -#define STAT64_HAS_BROKEN_ST_INO 1 - u32 __st_ino; - unsigned int st_mode; - unsigned int st_nlink; - u32 st_uid; - u32 st_gid; - unsigned long long st_rdev; - unsigned int __pad3; - long st_size; - u32 st_blksize; - unsigned char __pad4[4]; - u32 __pad5; /* future possible st_blocks high bits */ - u32 st_blocks; /* Number 512-byte blocks allocated. */ - u32 st_atime; - u32 __pad6; - u32 st_mtime; - u32 __pad7; - u32 st_ctime; - u32 __pad8; /* will be high 32 bits of ctime someday */ - unsigned long st_ino; -}; - -static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) -{ - struct stat64_emu31 tmp; - - memset(&tmp, 0, sizeof(tmp)); - - tmp.st_dev = huge_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - tmp.__st_ino = (u32)stat->ino; - tmp.st_mode = stat->mode; - tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); - tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); - tmp.st_rdev = huge_encode_dev(stat->rdev); - tmp.st_size = stat->size; - tmp.st_blksize = (u32)stat->blksize; - tmp.st_blocks = (u32)stat->blocks; - tmp.st_atime = (u32)stat->atime.tv_sec; - tmp.st_mtime = (u32)stat->mtime.tv_sec; - tmp.st_ctime = (u32)stat->ctime.tv_sec; - - return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; -} - -COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) -{ - struct kstat stat; - int ret = vfs_stat(filename, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) -{ - struct kstat stat; - int ret = vfs_lstat(filename, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf) -{ - struct kstat stat; - int ret = vfs_fstat(fd, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename, - struct stat64_emu31 __user *, statbuf, int, flag) -{ - struct kstat stat; - int error; - - error = vfs_fstatat(dfd, filename, &stat, flag); - if (error) - return error; - return cp_stat64(statbuf, &stat); -} - -/* - * Linux/i386 didn't use to be able to handle more than - * 4 system call parameters, so these system calls used a memory - * block for parameter passing.. - */ - -struct mmap_arg_struct_emu31 { - compat_ulong_t addr; - compat_ulong_t len; - compat_ulong_t prot; - compat_ulong_t flags; - compat_ulong_t fd; - compat_ulong_t offset; -}; - -COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg) -{ - struct mmap_arg_struct_emu31 a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - if (a.offset & ~PAGE_MASK) - return -EINVAL; - return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); -} - -COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) -{ - struct mmap_arg_struct_emu31 a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -} - -COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - - return ksys_read(fd, buf, count); -} - -COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - - return ksys_write(fd, buf, count); -} - -/* - * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64. - * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE} - * because the 31 bit values differ from the 64 bit values. - */ - -COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise) -{ - if (advise == 4) - advise = POSIX_FADV_DONTNEED; - else if (advise == 5) - advise = POSIX_FADV_NOREUSE; - return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len, - advise); -} - -struct fadvise64_64_args { - int fd; - long long offset; - long long len; - int advice; -}; - -COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) -{ - struct fadvise64_64_args a; - - if ( copy_from_user(&a, args, sizeof(a)) ) - return -EFAULT; - if (a.advice == 4) - a.advice = POSIX_FADV_DONTNEED; - else if (a.advice == 5) - a.advice = POSIX_FADV_NOREUSE; - return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice); -} - -COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, - u32, nhigh, u32, nlow, unsigned int, flags) -{ - return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, - ((u64)nhigh << 32) + nlow, flags); -} - -COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, - u32, lenhigh, u32, lenlow) -{ - return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, - ((u64)lenhigh << 32) + lenlow); -} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h deleted file mode 100644 index ef23739b277c..000000000000 --- a/arch/s390/kernel/compat_linux.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390X_S390_H -#define _ASM_S390X_S390_H - -#include <linux/compat.h> -#include <linux/socket.h> -#include <linux/syscalls.h> -#include <asm/ptrace.h> - -/* - * Macro that masks the high order bit of a 32 bit pointer and - * converts it to a 64 bit pointer. - */ -#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL)) -#define AA(__x) ((unsigned long)(__x)) - -/* Now 32bit compatibility types */ -struct ipc_kludge_32 { - __u32 msgp; /* pointer */ - __s32 msgtyp; -}; - -/* asm/sigcontext.h */ -typedef union { - __u64 d; - __u32 f; -} freg_t32; - -typedef struct { - unsigned int fpc; - unsigned int pad; - freg_t32 fprs[__NUM_FPRS]; -} _s390_fp_regs32; - -typedef struct { - psw_t32 psw; - __u32 gprs[__NUM_GPRS]; - __u32 acrs[__NUM_ACRS]; -} _s390_regs_common32; - -typedef struct { - _s390_regs_common32 regs; - _s390_fp_regs32 fpregs; -} _sigregs32; - -typedef struct { - __u32 gprs_high[__NUM_GPRS]; - __u64 vxrs_low[__NUM_VXRS_LOW]; - __vector128 vxrs_high[__NUM_VXRS_HIGH]; - __u8 __reserved[128]; -} _sigregs_ext32; - -#define _SIGCONTEXT_NSIG32 64 -#define _SIGCONTEXT_NSIG_BPW32 32 -#define __SIGNAL_FRAMESIZE32 96 -#define _SIGMASK_COPY_SIZE32 (sizeof(u32) * 2) - -struct sigcontext32 { - __u32 oldmask[_COMPAT_NSIG_WORDS]; - __u32 sregs; /* pointer */ -}; - -/* asm/signal.h */ - -/* asm/ucontext.h */ -struct ucontext32 { - __u32 uc_flags; - __u32 uc_link; /* pointer */ - compat_stack_t uc_stack; - _sigregs32 uc_mcontext; - compat_sigset_t uc_sigmask; - /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ - unsigned char __unused[128 - sizeof(compat_sigset_t)]; - _sigregs_ext32 uc_mcontext_ext; -}; - -struct stat64_emu31; -struct mmap_arg_struct_emu31; -struct fadvise64_64_args; - -long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); -long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); -long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); -long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); -long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); -long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); -long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf); -long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf); -long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); -long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); -long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); -long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count); -long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count); -long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); -long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); -long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); -long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); -long compat_sys_sigreturn(void); -long compat_sys_rt_sigreturn(void); - -#endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h deleted file mode 100644 index 3c400fc7e987..000000000000 --- a/arch/s390/kernel/compat_ptrace.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PTRACE32_H -#define _PTRACE32_H - -#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ -#include "compat_linux.h" /* needed for psw_compat_t */ - -struct compat_per_struct_kernel { - __u32 cr9; /* PER control bits */ - __u32 cr10; /* PER starting address */ - __u32 cr11; /* PER ending address */ - __u32 bits; /* Obsolete software bits */ - __u32 starting_addr; /* User specified start address */ - __u32 ending_addr; /* User specified end address */ - __u16 perc_atmid; /* PER trap ATMID */ - __u32 address; /* PER trap instruction address */ - __u8 access_id; /* PER trap access identification */ -}; - -struct compat_user_regs_struct -{ - psw_compat_t psw; - u32 gprs[NUM_GPRS]; - u32 acrs[NUM_ACRS]; - u32 orig_gpr2; - /* nb: there's a 4-byte hole here */ - s390_fp_regs fp_regs; - /* - * These per registers are in here so that gdb can modify them - * itself as there is no "official" ptrace interface for hardware - * watchpoints. This is the way intel does it. - */ - struct compat_per_struct_kernel per_info; - u32 ieee_instruction_pointer; /* obsolete, always 0 */ -}; - -struct compat_user { - /* We start with the registers, to mimic the way that "memory" - is returned from the ptrace(3,...) function. */ - struct compat_user_regs_struct regs; - /* The rest of this junk is to help gdb figure out what goes where */ - u32 u_tsize; /* Text segment size (pages). */ - u32 u_dsize; /* Data segment size (pages). */ - u32 u_ssize; /* Stack segment size (pages). */ - u32 start_code; /* Starting virtual address of text. */ - u32 start_stack; /* Starting virtual address of stack area. - This is actually the bottom of the stack, - the top of the stack is always found in the - esp register. */ - s32 signal; /* Signal that caused the core dump. */ - u32 u_ar0; /* Used by gdb to help find the values for */ - /* the registers. */ - u32 magic; /* To uniquely identify a core file */ - char u_comm[32]; /* User command that was responsible */ -}; - -typedef struct -{ - __u32 len; - __u32 kernel_addr; - __u32 process_addr; -} compat_ptrace_area; - -#endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c deleted file mode 100644 index 5a86b9d1da71..000000000000 --- a/arch/s390/kernel/compat_signal.c +++ /dev/null @@ -1,420 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2000, 2006 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) - * Gerhard Tonn (ton@de.ibm.com) - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - */ - -#include <linux/compat.h> -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/tty.h> -#include <linux/personality.h> -#include <linux/binfmts.h> -#include <asm/vdso-symbols.h> -#include <asm/access-regs.h> -#include <asm/ucontext.h> -#include <linux/uaccess.h> -#include <asm/lowcore.h> -#include <asm/fpu.h> -#include "compat_linux.h" -#include "compat_ptrace.h" -#include "entry.h" - -typedef struct -{ - __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - struct sigcontext32 sc; - _sigregs32 sregs; - int signo; - _sigregs_ext32 sregs_ext; - __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ -} sigframe32; - -typedef struct -{ - __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - __u16 svc_insn; - compat_siginfo_t info; - struct ucontext32 uc; -} rt_sigframe32; - -/* Store registers needed to create the signal frame */ -static void store_sigregs(void) -{ - save_access_regs(current->thread.acrs); - save_user_fpu_regs(); -} - -/* Load registers after signal return */ -static void load_sigregs(void) -{ - restore_access_regs(current->thread.acrs); -} - -static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) -{ - _sigregs32 user_sregs; - int i; - - user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32); - user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI; - user_sregs.regs.psw.mask |= PSW32_USER_BITS; - user_sregs.regs.psw.addr = (__u32) regs->psw.addr | - (__u32)(regs->psw.mask & PSW_MASK_BA); - for (i = 0; i < NUM_GPRS; i++) - user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; - memcpy(&user_sregs.regs.acrs, current->thread.acrs, - sizeof(user_sregs.regs.acrs)); - fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.ufpu); - if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) - return -EFAULT; - return 0; -} - -static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) -{ - _sigregs32 user_sregs; - int i; - - /* Always make any pending restarted system call return -EINTR */ - current->restart_block.fn = do_no_restart_syscall; - - if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) - return -EFAULT; - - if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI)) - return -EINVAL; - - /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ - regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | - (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 | - (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 | - (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE); - /* Check for invalid user address space control. */ - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) - regs->psw.mask = PSW_ASC_PRIMARY | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; - memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, - sizeof(current->thread.acrs)); - fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, ¤t->thread.ufpu); - - clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ - return 0; -} - -static int save_sigregs_ext32(struct pt_regs *regs, - _sigregs_ext32 __user *sregs_ext) -{ - __u32 gprs_high[NUM_GPRS]; - __u64 vxrs[__NUM_VXRS_LOW]; - int i; - - /* Save high gprs to signal stack */ - for (i = 0; i < NUM_GPRS; i++) - gprs_high[i] = regs->gprs[i] >> 32; - if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high, - sizeof(sregs_ext->gprs_high))) - return -EFAULT; - - /* Save vector registers to signal stack */ - if (cpu_has_vx()) { - for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = current->thread.ufpu.vxrs[i].low; - if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, - sizeof(sregs_ext->vxrs_low)) || - __copy_to_user(&sregs_ext->vxrs_high, - current->thread.ufpu.vxrs + __NUM_VXRS_LOW, - sizeof(sregs_ext->vxrs_high))) - return -EFAULT; - } - return 0; -} - -static int restore_sigregs_ext32(struct pt_regs *regs, - _sigregs_ext32 __user *sregs_ext) -{ - __u32 gprs_high[NUM_GPRS]; - __u64 vxrs[__NUM_VXRS_LOW]; - int i; - - /* Restore high gprs from signal stack */ - if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(sregs_ext->gprs_high))) - return -EFAULT; - for (i = 0; i < NUM_GPRS; i++) - *(__u32 *)®s->gprs[i] = gprs_high[i]; - - /* Restore vector registers from signal stack */ - if (cpu_has_vx()) { - if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, - sizeof(sregs_ext->vxrs_low)) || - __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW, - &sregs_ext->vxrs_high, - sizeof(sregs_ext->vxrs_high))) - return -EFAULT; - for (i = 0; i < __NUM_VXRS_LOW; i++) - current->thread.ufpu.vxrs[i].low = vxrs[i]; - } - return 0; -} - -COMPAT_SYSCALL_DEFINE0(sigreturn) -{ - struct pt_regs *regs = task_pt_regs(current); - sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; - sigset_t set; - - if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask)) - goto badframe; - set_current_blocked(&set); - save_user_fpu_regs(); - if (restore_sigregs32(regs, &frame->sregs)) - goto badframe; - if (restore_sigregs_ext32(regs, &frame->sregs_ext)) - goto badframe; - load_sigregs(); - return regs->gprs[2]; -badframe: - force_sig(SIGSEGV); - return 0; -} - -COMPAT_SYSCALL_DEFINE0(rt_sigreturn) -{ - struct pt_regs *regs = task_pt_regs(current); - rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; - sigset_t set; - - if (get_compat_sigset(&set, &frame->uc.uc_sigmask)) - goto badframe; - set_current_blocked(&set); - if (compat_restore_altstack(&frame->uc.uc_stack)) - goto badframe; - save_user_fpu_regs(); - if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) - goto badframe; - if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) - goto badframe; - load_sigregs(); - return regs->gprs[2]; -badframe: - force_sig(SIGSEGV); - return 0; -} - -/* - * Set up a signal frame. - */ - - -/* - * Determine which stack to use.. - */ -static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) -{ - unsigned long sp; - - /* Default to using normal stack */ - sp = (unsigned long) A(regs->gprs[15]); - - /* Overflow on alternate signal stack gives SIGSEGV. */ - if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) - return (void __user *) -1UL; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (! sas_ss_flags(sp)) - sp = current->sas_ss_sp + current->sas_ss_size; - } - - return (void __user *)((sp - frame_size) & -8ul); -} - -static int setup_frame32(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs) -{ - int sig = ksig->sig; - sigframe32 __user *frame; - unsigned long restorer; - size_t frame_size; - - /* - * gprs_high are always present for 31-bit compat tasks. - * The space for vector registers is only allocated if - * the machine supports it - */ - frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved); - if (!cpu_has_vx()) - frame_size -= sizeof(frame->sregs_ext.vxrs_low) + - sizeof(frame->sregs_ext.vxrs_high); - frame = get_sigframe(&ksig->ka, regs, frame_size); - if (frame == (void __user *) -1UL) - return -EFAULT; - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) - return -EFAULT; - - /* Create struct sigcontext32 on the signal stack */ - if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask, - set, sizeof(compat_sigset_t))) - return -EFAULT; - if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs)) - return -EFAULT; - - /* Store registers needed to create the signal frame */ - store_sigregs(); - - /* Create _sigregs32 on the signal stack */ - if (save_sigregs32(regs, &frame->sregs)) - return -EFAULT; - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) - return -EFAULT; - - /* Create _sigregs_ext32 on the signal stack */ - if (save_sigregs_ext32(regs, &frame->sregs_ext)) - return -EFAULT; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ksig->ka.sa.sa_flags & SA_RESTORER) { - restorer = (unsigned long __force) - ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; - } else { - restorer = VDSO32_SYMBOL(current, sigreturn); - } - - /* Set up registers for signal handler */ - regs->gprs[14] = restorer; - regs->gprs[15] = (__force __u64) frame; - /* Force 31 bit amode and default user address space control. */ - regs->psw.mask = PSW_MASK_BA | - (PSW_USER_BITS & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler; - - regs->gprs[2] = sig; - regs->gprs[3] = (__force __u64) &frame->sc; - - /* We forgot to include these in the sigcontext. - To avoid breaking binary compatibility, they are passed as args. */ - if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || - sig == SIGTRAP || sig == SIGFPE) { - /* set extra registers only for synchronous signals */ - regs->gprs[4] = regs->int_code & 127; - regs->gprs[5] = regs->int_parm_long; - regs->gprs[6] = current->thread.last_break; - } - - return 0; -} - -static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs) -{ - rt_sigframe32 __user *frame; - unsigned long restorer; - size_t frame_size; - u32 uc_flags; - - frame_size = sizeof(*frame) - - sizeof(frame->uc.uc_mcontext_ext.__reserved); - /* - * gprs_high are always present for 31-bit compat tasks. - * The space for vector registers is only allocated if - * the machine supports it - */ - uc_flags = UC_GPRS_HIGH; - if (cpu_has_vx()) { - uc_flags |= UC_VXRS; - } else { - frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + - sizeof(frame->uc.uc_mcontext_ext.vxrs_high); - } - frame = get_sigframe(&ksig->ka, regs, frame_size); - if (frame == (void __user *) -1UL) - return -EFAULT; - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) - return -EFAULT; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ksig->ka.sa.sa_flags & SA_RESTORER) { - restorer = (unsigned long __force) - ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; - } else { - restorer = VDSO32_SYMBOL(current, rt_sigreturn); - } - - /* Create siginfo on the signal stack */ - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) - return -EFAULT; - - /* Store registers needed to create the signal frame */ - store_sigregs(); - - /* Create ucontext on the signal stack. */ - if (__put_user(uc_flags, &frame->uc.uc_flags) || - __put_user(0, &frame->uc.uc_link) || - __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || - save_sigregs32(regs, &frame->uc.uc_mcontext) || - put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) || - save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->gprs[14] = restorer; - regs->gprs[15] = (__force __u64) frame; - /* Force 31 bit amode and default user address space control. */ - regs->psw.mask = PSW_MASK_BA | - (PSW_USER_BITS & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler; - - regs->gprs[2] = ksig->sig; - regs->gprs[3] = (__force __u64) &frame->info; - regs->gprs[4] = (__force __u64) &frame->uc; - regs->gprs[5] = current->thread.last_break; - return 0; -} - -/* - * OK, we're invoking a handler - */ - -void handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs) -{ - int ret; - - /* Set up the stack frame */ - if (ksig->ka.sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame32(ksig, oldset, regs); - else - ret = setup_frame32(ksig, oldset, regs); - - signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP)); -} - diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c index 3bebc47beeab..9d85b4bc7036 100644 --- a/arch/s390/kernel/cpacf.c +++ b/arch/s390/kernel/cpacf.c @@ -3,8 +3,7 @@ * Copyright IBM Corp. 2024 */ -#define KMSG_COMPONENT "cpacf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpacf: " fmt #include <linux/cpu.h> #include <linux/device.h> diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 2f4174b961de..ab611764642a 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -6,8 +6,7 @@ * Christian Borntraeger (cborntra@de.ibm.com), */ -#define KMSG_COMPONENT "cpcmd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpcmd: " fmt #include <linux/kernel.h> #include <linux/export.h> diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6a26f202441d..71cdb6845dd7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -10,8 +10,7 @@ * Bugreports to: <Linux390@de.ibm.com> */ -#define KMSG_COMPONENT "s390dbf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "s390dbf: " fmt #include <linux/stddef.h> #include <linux/kernel.h> diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 63a1d4226ff8..1cec93895b3a 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -503,24 +503,27 @@ static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len) void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; + unsigned long addr, pswaddr; unsigned char code[64]; char buffer[128], *ptr; - unsigned long addr; int start, end, opsize, hops, i; + pswaddr = regs->psw.addr; + if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED)) + pswaddr = __forward_psw(regs->psw, regs->int_code >> 16); /* Get a snapshot of the 64 bytes surrounding the fault address. */ - for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { - addr = regs->psw.addr - 34 + start; + for (start = 32; start && pswaddr >= 34 - start; start -= 2) { + addr = pswaddr - 34 + start; if (copy_from_regs(regs, code + start - 2, (void *)addr, 2)) break; } for (end = 32; end < 64; end += 2) { - addr = regs->psw.addr + end - 32; + addr = pswaddr + end - 32; if (copy_from_regs(regs, code + end, (void *)addr, 2)) break; } /* Code snapshot usable ? */ - if ((regs->psw.addr & 1) || start >= end) { + if ((pswaddr & 1) || start >= end) { printk("%s Code: Bad PSW.\n", mode); return; } @@ -543,12 +546,12 @@ void show_code(struct pt_regs *regs) while (start < end && hops < 8) { opsize = insn_length(code[start]); if (start + opsize == 32) - *ptr++ = '#'; + *ptr++ = '*'; else if (start == 32) *ptr++ = '>'; else *ptr++ = ' '; - addr = regs->psw.addr + start - 32; + addr = pswaddr + start - 32; ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index dd410962ecbe..f9d52e05e01e 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -155,12 +155,16 @@ static void show_last_breaking_event(struct pt_regs *regs) void show_registers(struct pt_regs *regs) { struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long pswaddr; char *mode; + pswaddr = regs->psw.addr; + if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED)) + pswaddr = __forward_psw(regs->psw, regs->int_code >> 16); mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)pswaddr); if (!user_mode(regs)) - pr_cont(" (%pSR)", (void *)regs->psw.addr); + pr_cont(" (%pSR)", (void *)pswaddr); pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext, diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 544e5403dd91..b27239c03d79 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -4,8 +4,7 @@ * Author(s): Hongjie Yang <hongjie@us.ibm.com>, */ -#define KMSG_COMPONENT "setup" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "setup: " fmt #include <linux/sched/debug.h> #include <linux/cpufeature.h> @@ -120,21 +119,21 @@ static noinline __init void setup_arch_string(void) EBCASC(mach->type, sizeof(mach->type)); EBCASC(mach->model, sizeof(mach->model)); EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); - sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s", - mach->manufacturer, mach->type, - mach->model, mach->model_capacity); + scnprintf(mstr, sizeof(mstr), "%-16.16s %-4.4s %-16.16s %-16.16s", + mach->manufacturer, mach->type, + mach->model, mach->model_capacity); strim_all(mstr); if (stsi(vm, 3, 2, 2) == 0 && vm->count) { EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi)); - sprintf(hvstr, "%-16.16s", vm->vm[0].cpi); + scnprintf(hvstr, sizeof(hvstr), "%-16.16s", vm->vm[0].cpi); strim_all(hvstr); } else { - sprintf(hvstr, "%s", - machine_is_lpar() ? "LPAR" : - machine_is_vm() ? "z/VM" : - machine_is_kvm() ? "KVM" : "unknown"); + scnprintf(hvstr, sizeof(hvstr), "%s", + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } - sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr); + scnprintf(arch_hw_string, sizeof(arch_hw_string), "HW: %s (%s)", mstr, hvstr); dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 75b0fbb236d0..c360087807d8 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -162,9 +162,13 @@ SYM_FUNC_START(__switch_to_asm) stg %r3,__LC_CURRENT(%r13) # store task struct of next stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next - aghi %r3,__TASK_pid - mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next + lay %r4,__TASK_pid(%r3) + mvc __LC_CURRENT_PID(4,%r13),0(%r4) # store pid of next ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) +#ifdef CONFIG_STACKPROTECTOR + lg %r3,__TASK_stack_canary(%r3) + stg %r3,__LC_STACK_CANARY(%r13) +#endif lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task BR_EX %r14 SYM_FUNC_END(__switch_to_asm) @@ -606,20 +610,3 @@ SYM_DATA_START_LOCAL(daton_psw) .quad PSW_KERNEL_BITS .quad .Ldaton SYM_DATA_END(daton_psw) - - .section .rodata, "a" - .balign 8 -#define SYSCALL(esame,emu) .quad __s390x_ ## esame -SYM_DATA_START(sys_call_table) -#include <asm/syscall_table.h> -SYM_DATA_END(sys_call_table) -#undef SYSCALL - -#ifdef CONFIG_COMPAT - -#define SYSCALL(esame,emu) .quad __s390_ ## emu -SYM_DATA_START(sys_call_table_emu) -#include <asm/syscall_table.h> -SYM_DATA_END(sys_call_table_emu) -#undef SYSCALL -#endif diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head.S index 7edb9ded199c..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head.S diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 2507bc3f7757..217206522266 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -3,8 +3,7 @@ * Copyright IBM Corp. 2024 */ -#define KMSG_COMPONENT "hd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hd: " fmt /* * Hiperdispatch: @@ -65,7 +64,7 @@ #define HD_DELAY_FACTOR (4) #define HD_DELAY_INTERVAL (HZ / 4) -#define HD_STEAL_THRESHOLD 30 +#define HD_STEAL_THRESHOLD 10 #define HD_STEAL_AVG_WEIGHT 16 static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 91e207b50394..9d1f8a50f5a4 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -22,12 +22,14 @@ #include <linux/bug.h> #include <linux/memory.h> #include <linux/execmem.h> +#include <asm/arch-stackprotector.h> #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> #include <asm/ftrace.lds.h> #include <asm/set_memory.h> #include <asm/setup.h> +#include <asm/asm-offsets.h> #if 0 #define DEBUGP printk @@ -495,9 +497,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings, *secname; void *aseg; -#ifdef CONFIG_FUNCTION_TRACER - int ret; -#endif + int rc = 0; if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable && me->arch.plt_size) { @@ -527,14 +527,21 @@ int module_finalize(const Elf_Ehdr *hdr, (str_has_prefix(secname, ".s390_return"))) nospec_revert(aseg, aseg + s->sh_size); + if (IS_ENABLED(CONFIG_STACKPROTECTOR) && + (str_has_prefix(secname, "__stack_protector_loc"))) { + rc = stack_protector_apply(aseg, aseg + s->sh_size); + if (rc) + break; + } + #ifdef CONFIG_FUNCTION_TRACER if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) { - ret = module_alloc_ftrace_hotpatch_trampolines(me, s); - if (ret < 0) - return ret; + rc = module_alloc_ftrace_hotpatch_trampolines(me, s); + if (rc) + break; } #endif /* CONFIG_FUNCTION_TRACER */ } - return 0; + return rc; } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 11f33243a23f..a55abbf65333 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -184,7 +184,7 @@ static notrace void nmi_print_info(void) sclp_emergency_printk(message); } -static notrace void s390_handle_damage(void) +static notrace void __noreturn s390_handle_damage(void) { struct lowcore *lc = get_lowcore(); union ctlreg0 cr0, cr0_new; @@ -214,7 +214,6 @@ static notrace void s390_handle_damage(void) lc->mcck_new_psw = psw_save; local_ctl_load(0, &cr0.reg); disabled_wait(); - while (1); } NOKPROBE_SYMBOL(s390_handle_damage); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index c2a468986212..94fa44776d0c 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -6,8 +6,7 @@ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "os_info" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "os_info: " fmt #include <linux/crash_dump.h> #include <linux/kernel.h> diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 04457d88e589..408ab93112bf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -6,8 +6,7 @@ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> * Thomas Richter <tmricht@linux.ibm.com> */ -#define KMSG_COMPONENT "cpum_cf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpum_cf: " fmt #include <linux/kernel.h> #include <linux/kernel_stat.h> @@ -1206,7 +1205,7 @@ static int __init cpumf_pmu_init(void) } /* Setup s390dbf facility */ - cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); + cf_dbg = debug_register("cpum_cf", 2, 1, 128); if (!cf_dbg) { pr_err("Registration of s390dbf(cpum_cf) failed\n"); rc = -ENOMEM; @@ -1689,7 +1688,6 @@ static const struct file_operations cfset_fops = { .open = cfset_open, .release = cfset_release, .unlocked_ioctl = cfset_ioctl, - .compat_ioctl = cfset_ioctl, }; static struct miscdevice cfset_dev = { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index f432869f8921..459af23a47a5 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -5,8 +5,7 @@ * Copyright IBM Corp. 2013, 2018 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "cpum_sf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpum_sf: " fmt #include <linux/kernel.h> #include <linux/kernel_stat.h> @@ -1093,7 +1092,7 @@ static void perf_event_count_update(struct perf_event *event, u64 count) * combined-sampling data entry consists of a basic- and a diagnostic-sampling * data entry. The sampling function is determined by the flags in the perf * event hardware structure. The function always works with a combined-sampling - * data entry but ignores the the diagnostic portion if it is not available. + * data entry but ignores the diagnostic portion if it is not available. * * Note that the implementation focuses on basic-sampling data entries and, if * such an entry is not valid, the entire combined-sampling data entry is @@ -2070,7 +2069,7 @@ static int __init init_cpum_sampling_pmu(void) CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); } - sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + sfdbg = debug_register("cpum_sf", 2, 1, 80); if (!sfdbg) { pr_err("Registering for s390dbf failed\n"); return -ENOMEM; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 91b8716c883a..606750bae508 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -5,8 +5,7 @@ * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "perf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "perf: " fmt #include <linux/kernel.h> #include <linux/perf_event.h> @@ -15,7 +14,6 @@ #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/uaccess.h> -#include <linux/compat.h> #include <linux/sysfs.h> #include <asm/stacktrace.h> #include <asm/irq.h> diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c new file mode 100644 index 000000000000..810f5b6c5e01 --- /dev/null +++ b/arch/s390/kernel/perf_pai.c @@ -0,0 +1,1230 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support - Processor Activity Instrumentation Facility + * + * Copyright IBM Corp. 2026 + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + */ +#define pr_fmt(fmt) "pai: " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/perf_event.h> +#include <asm/ctlreg.h> +#include <asm/pai.h> +#include <asm/debug.h> + +static debug_info_t *paidbg; + +DEFINE_STATIC_KEY_FALSE(pai_key); + +enum { + PAI_PMU_CRYPTO, /* Index of PMU pai_crypto */ + PAI_PMU_EXT, /* Index of PMU pai_ext */ + PAI_PMU_MAX /* # of PAI PMUs */ +}; + +enum { + PAIE1_CB_SZ = 0x200, /* Size of PAIE1 control block */ + PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */ +}; + +struct pai_userdata { + u16 num; + u64 value; +} __packed; + +/* Create the PAI extension 1 control block area. + * The PAI extension control block 1 is pointed to by lowcore + * address 0x1508 for each CPU. This control block is 512 bytes in size + * and requires a 512 byte boundary alignment. + */ +struct paiext_cb { /* PAI extension 1 control block */ + u64 header; /* Not used */ + u64 reserved1; + u64 acc; /* Addr to analytics counter control block */ + u8 reserved2[PAIE1_CTRBLOCK_SZ - 3 * sizeof(u64)]; +} __packed; + +struct pai_map { + unsigned long *area; /* Area for CPU to store counters */ + struct pai_userdata *save; /* Page to store no-zero counters */ + unsigned int active_events; /* # of PAI crypto users */ + refcount_t refcnt; /* Reference count mapped buffers */ + struct perf_event *event; /* Perf event for sampling */ + struct list_head syswide_list; /* List system-wide sampling events */ + struct paiext_cb *paiext_cb; /* PAI extension control block area */ + bool fullpage; /* True: counter area is a full page */ +}; + +struct pai_mapptr { + struct pai_map *mapptr; +}; + +static struct pai_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct pai_mapptr __percpu *mapptr; +} pai_root[PAI_PMU_MAX]; + +/* This table defines the different parameters of the PAI PMUs. During + * initialization the machine dependent values are extracted and saved. + * However most of the values are static and do not change. + * There is one table entry per PAI PMU. + */ +struct pai_pmu { /* Define PAI PMU characteristics */ + const char *pmuname; /* Name of PMU */ + const int facility_nr; /* Facility number to check for support */ + unsigned int num_avail; /* # Counters defined by hardware */ + unsigned int num_named; /* # Counters known by name */ + unsigned long base; /* Counter set base number */ + unsigned long kernel_offset; /* Offset to kernel part in counter page */ + unsigned long area_size; /* Size of counter area */ + const char * const *names; /* List of counter names */ + struct pmu *pmu; /* Ptr to supporting PMU */ + int (*init)(struct pai_pmu *p); /* PMU support init function */ + void (*exit)(struct pai_pmu *p); /* PMU support exit function */ + struct attribute_group *event_group; /* Ptr to attribute of events */ +}; + +static struct pai_pmu pai_pmu[]; /* Forward declaration */ + +/* Free per CPU data when the last event is removed. */ +static void pai_root_free(int idx) +{ + if (refcount_dec_and_test(&pai_root[idx].refcnt)) { + free_percpu(pai_root[idx].mapptr); + pai_root[idx].mapptr = NULL; + } + debug_sprintf_event(paidbg, 5, "%s root[%d].refcount %d\n", __func__, + idx, refcount_read(&pai_root[idx].refcnt)); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int pai_root_alloc(int idx) +{ + if (!refcount_inc_not_zero(&pai_root[idx].refcnt)) { + /* The memory is already zeroed. */ + pai_root[idx].mapptr = alloc_percpu(struct pai_mapptr); + if (!pai_root[idx].mapptr) + return -ENOMEM; + refcount_set(&pai_root[idx].refcnt, 1); + } + return 0; +} + +/* Release the PMU if event is the last perf event */ +static DEFINE_MUTEX(pai_reserve_mutex); + +/* Free all memory allocated for event counting/sampling setup */ +static void pai_free(struct pai_mapptr *mp) +{ + if (mp->mapptr->fullpage) + free_page((unsigned long)mp->mapptr->area); + else + kfree(mp->mapptr->area); + kfree(mp->mapptr->paiext_cb); + kvfree(mp->mapptr->save); + kfree(mp->mapptr); + mp->mapptr = NULL; +} + +/* Adjust usage counters and remove allocated memory when all users are + * gone. + */ +static void pai_event_destroy_cpu(struct perf_event *event, int cpu) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = per_cpu_ptr(pai_root[idx].mapptr, cpu); + struct pai_map *cpump = mp->mapptr; + + mutex_lock(&pai_reserve_mutex); + debug_sprintf_event(paidbg, 5, "%s event %#llx idx %d cpu %d users %d " + "refcnt %u\n", __func__, event->attr.config, idx, + event->cpu, cpump->active_events, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) + pai_free(mp); + pai_root_free(idx); + mutex_unlock(&pai_reserve_mutex); +} + +static void pai_event_destroy(struct perf_event *event) +{ + int cpu; + + free_page(PAI_SAVE_AREA(event)); + if (event->cpu == -1) { + struct cpumask *mask = PAI_CPU_MASK(event); + + for_each_cpu(cpu, mask) + pai_event_destroy_cpu(event, cpu); + kfree(mask); + } else { + pai_event_destroy_cpu(event, event->cpu); + } +} + +static void paicrypt_event_destroy(struct perf_event *event) +{ + static_branch_dec(&pai_key); + pai_event_destroy(event); +} + +static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset) +{ + if (offset) + nr += offset / sizeof(*page); + return page[nr]; +} + +/* Read the counter values. Return value from location in CMP. For base + * event xxx_ALL sum up all events. Returns counter value. + */ +static u64 pai_getdata(struct perf_event *event, bool kernel) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_pmu *pp = &pai_pmu[idx]; + struct pai_map *cpump = mp->mapptr; + unsigned int i; + u64 sum = 0; + + if (event->attr.config != pp->base) { + return pai_getctr(cpump->area, + event->attr.config - pp->base, + kernel ? pp->kernel_offset : 0); + } + + for (i = 1; i <= pp->num_avail; i++) { + u64 val = pai_getctr(cpump->area, i, + kernel ? pp->kernel_offset : 0); + + if (!val) + continue; + sum += val; + } + return sum; +} + +static u64 paicrypt_getall(struct perf_event *event) +{ + u64 sum = 0; + + if (!event->attr.exclude_kernel) + sum += pai_getdata(event, true); + if (!event->attr.exclude_user) + sum += pai_getdata(event, false); + + return sum; +} + +/* Check concurrent access of counting and sampling for crypto events. + * This function is called in process context and it is save to block. + * When the event initialization functions fails, no other call back will + * be invoked. + * + * Allocate the memory for the event. + */ +static int pai_alloc_cpu(struct perf_event *event, int cpu) +{ + int rc, idx = PAI_PMU_IDX(event); + struct pai_map *cpump = NULL; + bool need_paiext_cb = false; + struct pai_mapptr *mp; + + mutex_lock(&pai_reserve_mutex); + /* Allocate root node */ + rc = pai_root_alloc(idx); + if (rc) + goto unlock; + + /* Allocate node for this event */ + mp = per_cpu_ptr(pai_root[idx].mapptr, cpu); + cpump = mp->mapptr; + if (!cpump) { /* Paicrypt_map allocated? */ + rc = -ENOMEM; + cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); + if (!cpump) + goto undo; + /* Allocate memory for counter page and counter extraction. + * Only the first counting event has to allocate a page. + */ + mp->mapptr = cpump; + if (idx == PAI_PMU_CRYPTO) { + cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL); + /* free_page() can handle 0x0 address */ + cpump->fullpage = true; + } else { /* PAI_PMU_EXT */ + /* + * Allocate memory for counter area and counter extraction. + * These are + * - a 512 byte block and requires 512 byte boundary + * alignment. + * - a 1KB byte block and requires 1KB boundary + * alignment. + * Only the first counting event has to allocate the area. + * + * Note: This works with commit 59bb47985c1d by default. + * Backporting this to kernels without this commit might + * needs adjustment. + */ + cpump->area = kzalloc(pai_pmu[idx].area_size, GFP_KERNEL); + cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); + need_paiext_cb = true; + } + cpump->save = kvmalloc_array(pai_pmu[idx].num_avail + 1, + sizeof(struct pai_userdata), + GFP_KERNEL); + if (!cpump->area || !cpump->save || + (need_paiext_cb && !cpump->paiext_cb)) { + pai_free(mp); + goto undo; + } + INIT_LIST_HEAD(&cpump->syswide_list); + refcount_set(&cpump->refcnt, 1); + rc = 0; + } else { + refcount_inc(&cpump->refcnt); + } + +undo: + if (rc) { + /* Error in allocation of event, decrement anchor. Since + * the event in not created, its destroy() function is never + * invoked. Adjust the reference counter for the anchor. + */ + pai_root_free(idx); + } +unlock: + mutex_unlock(&pai_reserve_mutex); + /* If rc is non-zero, no increment of counter/sampler was done. */ + return rc; +} + +static int pai_alloc(struct perf_event *event) +{ + struct cpumask *maskptr; + int cpu, rc = -ENOMEM; + + maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); + if (!maskptr) + goto out; + + for_each_online_cpu(cpu) { + rc = pai_alloc_cpu(event, cpu); + if (rc) { + for_each_cpu(cpu, maskptr) + pai_event_destroy_cpu(event, cpu); + kfree(maskptr); + goto out; + } + cpumask_set_cpu(cpu, maskptr); + } + + /* + * On error all cpumask are freed and all events have been destroyed. + * Save of which CPUs data structures have been allocated for. + * Release them in pai_event_destroy call back function + * for this event. + */ + PAI_CPU_MASK(event) = maskptr; + rc = 0; +out: + return rc; +} + +/* Validate event number and return error if event is not supported. + * On successful return, PAI_PMU_IDX(event) is set to the index of + * the supporting paing_support[] array element. + */ +static int pai_event_valid(struct perf_event *event, int idx) +{ + struct perf_event_attr *a = &event->attr; + struct pai_pmu *pp = &pai_pmu[idx]; + + /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) + return -ENOENT; + /* Allow only CRYPTO_ALL/NNPA_ALL for sampling */ + if (a->sample_period && a->config != pp->base) + return -EINVAL; + /* PAI crypto event must be in valid range, try others if not */ + if (a->config < pp->base || a->config > pp->base + pp->num_avail) + return -ENOENT; + if (idx == PAI_PMU_EXT && a->exclude_user) + return -EINVAL; + PAI_PMU_IDX(event) = idx; + return 0; +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int pai_event_init(struct perf_event *event, int idx) +{ + struct perf_event_attr *a = &event->attr; + int rc; + + /* PAI event must be valid and in supported range */ + rc = pai_event_valid(event, idx); + if (rc) + goto out; + /* Get a page to store last counter values for sampling */ + if (a->sample_period) { + PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); + if (!PAI_SAVE_AREA(event)) { + rc = -ENOMEM; + goto out; + } + } + + if (event->cpu >= 0) + rc = pai_alloc_cpu(event, event->cpu); + else + rc = pai_alloc(event); + if (rc) { + free_page(PAI_SAVE_AREA(event)); + goto out; + } + + if (a->sample_period) { + a->sample_period = 1; + a->freq = 0; + /* Register for paicrypt_sched_task() to be called */ + event->attach_state |= PERF_ATTACH_SCHED_CB; + /* Add raw data which contain the memory mapped counters */ + a->sample_type |= PERF_SAMPLE_RAW; + /* Turn off inheritance */ + a->inherit = 0; + } +out: + return rc; +} + +static int paicrypt_event_init(struct perf_event *event) +{ + int rc = pai_event_init(event, PAI_PMU_CRYPTO); + + if (!rc) { + event->destroy = paicrypt_event_destroy; + static_branch_inc(&pai_key); + } + return rc; +} + +static void pai_read(struct perf_event *event, + u64 (*fct)(struct perf_event *event)) +{ + u64 prev, new, delta; + + prev = local64_read(&event->hw.prev_count); + new = fct(event); + local64_set(&event->hw.prev_count, new); + delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; + local64_add(delta, &event->count); +} + +static void paicrypt_read(struct perf_event *event) +{ + pai_read(event, paicrypt_getall); +} + +static void pai_start(struct perf_event *event, int flags, + u64 (*fct)(struct perf_event *event)) +{ + int idx = PAI_PMU_IDX(event); + struct pai_pmu *pp = &pai_pmu[idx]; + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + u64 sum; + + if (!event->attr.sample_period) { /* Counting */ + sum = fct(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } else { /* Sampling */ + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); + /* Enable context switch callback for system-wide sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); + perf_sched_cb_inc(event->pmu); + } else { + cpump->event = event; + } + } +} + +static void paicrypt_start(struct perf_event *event, int flags) +{ + pai_start(event, flags, paicrypt_getall); +} + +static int pai_add(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + unsigned long ccd; + + if (++cpump->active_events == 1) { + if (!pcb) { /* PAI crypto */ + ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET; + WRITE_ONCE(get_lowcore()->ccd, ccd); + local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); + } else { /* PAI extension 1 */ + ccd = virt_to_phys(pcb); + WRITE_ONCE(get_lowcore()->aicd, ccd); + pcb->acc = virt_to_phys(cpump->area) | 0x1; + /* Enable CPU instruction lookup for PAIE1 control block */ + local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); + } + } + if (flags & PERF_EF_START) + pai_pmu[idx].pmu->start(event, PERF_EF_RELOAD); + event->hw.state = 0; + return 0; +} + +static int paicrypt_add(struct perf_event *event, int flags) +{ + return pai_add(event, flags); +} + +static void pai_have_sample(struct perf_event *, struct pai_map *); +static void pai_stop(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ + pai_pmu[idx].pmu->read(event); + } else { /* Sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + perf_sched_cb_dec(event->pmu); + list_del(PAI_SWLIST(event)); + } else { + pai_have_sample(event, cpump); + cpump->event = NULL; + } + } + event->hw.state = PERF_HES_STOPPED; +} + +static void paicrypt_stop(struct perf_event *event, int flags) +{ + pai_stop(event, flags); +} + +static void pai_del(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + + pai_pmu[idx].pmu->stop(event, PERF_EF_UPDATE); + if (--cpump->active_events == 0) { + if (!pcb) { /* PAI crypto */ + local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); + WRITE_ONCE(get_lowcore()->ccd, 0); + } else { /* PAI extension 1 */ + /* Disable CPU instruction lookup for PAIE1 control block */ + local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); + pcb->acc = 0; + WRITE_ONCE(get_lowcore()->aicd, 0); + } + } +} + +static void paicrypt_del(struct perf_event *event, int flags) +{ + pai_del(event, flags); +} + +/* Create raw data and save it in buffer. Calculate the delta for each + * counter between this invocation and the last invocation. + * Returns number of bytes copied. + * Saves only entries with positive counter difference of the form + * 2 bytes: Number of counter + * 8 bytes: Value of counter + */ +static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page, + struct pai_pmu *pp, unsigned long *page_old, + bool exclude_user, bool exclude_kernel) +{ + int i, outidx = 0; + + for (i = 1; i <= pp->num_avail; i++) { + u64 val = 0, val_old = 0; + + if (!exclude_kernel) { + val += pai_getctr(page, i, pp->kernel_offset); + val_old += pai_getctr(page_old, i, pp->kernel_offset); + } + if (!exclude_user) { + val += pai_getctr(page, i, 0); + val_old += pai_getctr(page_old, i, 0); + } + if (val >= val_old) + val -= val_old; + else + val = (~0ULL - val_old) + val + 1; + if (val) { + userdata[outidx].num = i; + userdata[outidx].value = val; + outidx++; + } + } + return outidx * sizeof(*userdata); +} + +/* Write sample when one or more counters values are nonzero. + * + * Note: The function paicrypt_sched_task() and pai_push_sample() are not + * invoked after function paicrypt_del() has been called because of function + * perf_sched_cb_dec(). Both functions are only + * called when sampling is active. Function perf_sched_cb_inc() + * has been invoked to install function paicrypt_sched_task() as call back + * to run at context switch time. + * + * This causes function perf_event_context_sched_out() and + * perf_event_context_sched_in() to check whether the PMU has installed an + * sched_task() callback. That callback is not active after paicrypt_del() + * returns and has deleted the event on that CPU. + */ +static int pai_push_sample(size_t rawsize, struct pai_map *cpump, + struct perf_event *event) +{ + int idx = PAI_PMU_IDX(event); + struct pai_pmu *pp = &pai_pmu[idx]; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + + /* Setup perf sample */ + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + memset(&data, 0, sizeof(data)); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (event->attr.sample_type & PERF_SAMPLE_TID) { + data.tid_entry.pid = task_tgid_nr(current); + data.tid_entry.tid = task_pid_nr(current); + } + if (event->attr.sample_type & PERF_SAMPLE_TIME) + data.time = event->clock(); + if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) + data.id = event->id; + if (event->attr.sample_type & PERF_SAMPLE_CPU) { + data.cpu_entry.cpu = smp_processor_id(); + data.cpu_entry.reserved = 0; + } + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = rawsize; + raw.frag.data = cpump->save; + perf_sample_save_raw_data(&data, event, &raw); + } + + overflow = perf_event_overflow(event, &data, ®s); + perf_event_update_userpage(event); + /* Save crypto counter lowcore page after reading event data. */ + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); + return overflow; +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void pai_have_sample(struct perf_event *event, struct pai_map *cpump) +{ + struct pai_pmu *pp; + size_t rawsize; + + if (!event) /* No event active */ + return; + pp = &pai_pmu[PAI_PMU_IDX(event)]; + rawsize = pai_copy(cpump->save, cpump->area, pp, + (unsigned long *)PAI_SAVE_AREA(event), + event->attr.exclude_user, + event->attr.exclude_kernel); + if (rawsize) /* No incremented counters */ + pai_push_sample(rawsize, cpump, event); +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void pai_have_samples(int idx) +{ + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct perf_event *event; + + list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) + pai_have_sample(event, cpump); +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event CRYPTO_ALL is allowed. + */ +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, save old values. + */ + if (!sched_in) + pai_have_samples(PAI_PMU_CRYPTO); +} + +/* ============================= paiext ====================================*/ + +static void paiext_event_destroy(struct perf_event *event) +{ + pai_event_destroy(event); +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int paiext_event_init(struct perf_event *event) +{ + int rc = pai_event_init(event, PAI_PMU_EXT); + + if (!rc) { + event->attr.exclude_kernel = true; /* No kernel space part */ + event->destroy = paiext_event_destroy; + /* Offset of NNPA in paiext_cb */ + event->hw.config_base = offsetof(struct paiext_cb, acc); + } + return rc; +} + +static u64 paiext_getall(struct perf_event *event) +{ + return pai_getdata(event, false); +} + +static void paiext_read(struct perf_event *event) +{ + pai_read(event, paiext_getall); +} + +static void paiext_start(struct perf_event *event, int flags) +{ + pai_start(event, flags, paiext_getall); +} + +static int paiext_add(struct perf_event *event, int flags) +{ + return pai_add(event, flags); +} + +static void paiext_stop(struct perf_event *event, int flags) +{ + pai_stop(event, flags); +} + +static void paiext_del(struct perf_event *event, int flags) +{ + pai_del(event, flags); +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event NNPA_ALL is allowed. + */ +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, save old values. + */ + if (!sched_in) + pai_have_samples(PAI_PMU_EXT); +} + +/* Attribute definitions for paicrypt interface. As with other CPU + * Measurement Facilities, there is one attribute per mapped counter. + * The number of mapped counters may vary per machine generation. Use + * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction + * to determine the number of mapped counters. The instructions returns + * a positive number, which is the highest number of supported counters. + * All counters less than this number are also supported, there are no + * holes. A returned number of zero means no support for mapped counters. + * + * The identification of the counter is a unique number. The chosen range + * is 0x1000 + offset in mapped kernel page. + * All CPU Measurement Facility counters identifiers must be unique and + * the numbers from 0 to 496 are already used for the CPU Measurement + * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already + * used for the CPU Measurement Sampling facility. + */ +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *paicrypt_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paicrypt_events_group = { + .name = "events", + .attrs = NULL /* Filled in attr_event_init() */ +}; + +static struct attribute_group paicrypt_format_group = { + .name = "format", + .attrs = paicrypt_format_attr, +}; + +static const struct attribute_group *paicrypt_attr_groups[] = { + &paicrypt_events_group, + &paicrypt_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paicrypt = { + .task_ctx_nr = perf_hw_context, + .event_init = paicrypt_event_init, + .add = paicrypt_add, + .del = paicrypt_del, + .start = paicrypt_start, + .stop = paicrypt_stop, + .read = paicrypt_read, + .sched_task = paicrypt_sched_task, + .attr_groups = paicrypt_attr_groups +}; + +/* List of symbolic PAI counter names. */ +static const char * const paicrypt_ctrnames[] = { + [0] = "CRYPTO_ALL", + [1] = "KM_DEA", + [2] = "KM_TDEA_128", + [3] = "KM_TDEA_192", + [4] = "KM_ENCRYPTED_DEA", + [5] = "KM_ENCRYPTED_TDEA_128", + [6] = "KM_ENCRYPTED_TDEA_192", + [7] = "KM_AES_128", + [8] = "KM_AES_192", + [9] = "KM_AES_256", + [10] = "KM_ENCRYPTED_AES_128", + [11] = "KM_ENCRYPTED_AES_192", + [12] = "KM_ENCRYPTED_AES_256", + [13] = "KM_XTS_AES_128", + [14] = "KM_XTS_AES_256", + [15] = "KM_XTS_ENCRYPTED_AES_128", + [16] = "KM_XTS_ENCRYPTED_AES_256", + [17] = "KMC_DEA", + [18] = "KMC_TDEA_128", + [19] = "KMC_TDEA_192", + [20] = "KMC_ENCRYPTED_DEA", + [21] = "KMC_ENCRYPTED_TDEA_128", + [22] = "KMC_ENCRYPTED_TDEA_192", + [23] = "KMC_AES_128", + [24] = "KMC_AES_192", + [25] = "KMC_AES_256", + [26] = "KMC_ENCRYPTED_AES_128", + [27] = "KMC_ENCRYPTED_AES_192", + [28] = "KMC_ENCRYPTED_AES_256", + [29] = "KMC_PRNG", + [30] = "KMA_GCM_AES_128", + [31] = "KMA_GCM_AES_192", + [32] = "KMA_GCM_AES_256", + [33] = "KMA_GCM_ENCRYPTED_AES_128", + [34] = "KMA_GCM_ENCRYPTED_AES_192", + [35] = "KMA_GCM_ENCRYPTED_AES_256", + [36] = "KMF_DEA", + [37] = "KMF_TDEA_128", + [38] = "KMF_TDEA_192", + [39] = "KMF_ENCRYPTED_DEA", + [40] = "KMF_ENCRYPTED_TDEA_128", + [41] = "KMF_ENCRYPTED_TDEA_192", + [42] = "KMF_AES_128", + [43] = "KMF_AES_192", + [44] = "KMF_AES_256", + [45] = "KMF_ENCRYPTED_AES_128", + [46] = "KMF_ENCRYPTED_AES_192", + [47] = "KMF_ENCRYPTED_AES_256", + [48] = "KMCTR_DEA", + [49] = "KMCTR_TDEA_128", + [50] = "KMCTR_TDEA_192", + [51] = "KMCTR_ENCRYPTED_DEA", + [52] = "KMCTR_ENCRYPTED_TDEA_128", + [53] = "KMCTR_ENCRYPTED_TDEA_192", + [54] = "KMCTR_AES_128", + [55] = "KMCTR_AES_192", + [56] = "KMCTR_AES_256", + [57] = "KMCTR_ENCRYPTED_AES_128", + [58] = "KMCTR_ENCRYPTED_AES_192", + [59] = "KMCTR_ENCRYPTED_AES_256", + [60] = "KMO_DEA", + [61] = "KMO_TDEA_128", + [62] = "KMO_TDEA_192", + [63] = "KMO_ENCRYPTED_DEA", + [64] = "KMO_ENCRYPTED_TDEA_128", + [65] = "KMO_ENCRYPTED_TDEA_192", + [66] = "KMO_AES_128", + [67] = "KMO_AES_192", + [68] = "KMO_AES_256", + [69] = "KMO_ENCRYPTED_AES_128", + [70] = "KMO_ENCRYPTED_AES_192", + [71] = "KMO_ENCRYPTED_AES_256", + [72] = "KIMD_SHA_1", + [73] = "KIMD_SHA_256", + [74] = "KIMD_SHA_512", + [75] = "KIMD_SHA3_224", + [76] = "KIMD_SHA3_256", + [77] = "KIMD_SHA3_384", + [78] = "KIMD_SHA3_512", + [79] = "KIMD_SHAKE_128", + [80] = "KIMD_SHAKE_256", + [81] = "KIMD_GHASH", + [82] = "KLMD_SHA_1", + [83] = "KLMD_SHA_256", + [84] = "KLMD_SHA_512", + [85] = "KLMD_SHA3_224", + [86] = "KLMD_SHA3_256", + [87] = "KLMD_SHA3_384", + [88] = "KLMD_SHA3_512", + [89] = "KLMD_SHAKE_128", + [90] = "KLMD_SHAKE_256", + [91] = "KMAC_DEA", + [92] = "KMAC_TDEA_128", + [93] = "KMAC_TDEA_192", + [94] = "KMAC_ENCRYPTED_DEA", + [95] = "KMAC_ENCRYPTED_TDEA_128", + [96] = "KMAC_ENCRYPTED_TDEA_192", + [97] = "KMAC_AES_128", + [98] = "KMAC_AES_192", + [99] = "KMAC_AES_256", + [100] = "KMAC_ENCRYPTED_AES_128", + [101] = "KMAC_ENCRYPTED_AES_192", + [102] = "KMAC_ENCRYPTED_AES_256", + [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA", + [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128", + [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192", + [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA", + [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", + [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", + [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128", + [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192", + [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", + [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", + [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", + [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", + [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", + [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", + [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", + [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256", + [119] = "PCC_SCALAR_MULTIPLY_P256", + [120] = "PCC_SCALAR_MULTIPLY_P384", + [121] = "PCC_SCALAR_MULTIPLY_P521", + [122] = "PCC_SCALAR_MULTIPLY_ED25519", + [123] = "PCC_SCALAR_MULTIPLY_ED448", + [124] = "PCC_SCALAR_MULTIPLY_X25519", + [125] = "PCC_SCALAR_MULTIPLY_X448", + [126] = "PRNO_SHA_512_DRNG", + [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO", + [128] = "PRNO_TRNG", + [129] = "KDSA_ECDSA_VERIFY_P256", + [130] = "KDSA_ECDSA_VERIFY_P384", + [131] = "KDSA_ECDSA_VERIFY_P521", + [132] = "KDSA_ECDSA_SIGN_P256", + [133] = "KDSA_ECDSA_SIGN_P384", + [134] = "KDSA_ECDSA_SIGN_P521", + [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256", + [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384", + [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521", + [138] = "KDSA_EDDSA_VERIFY_ED25519", + [139] = "KDSA_EDDSA_VERIFY_ED448", + [140] = "KDSA_EDDSA_SIGN_ED25519", + [141] = "KDSA_EDDSA_SIGN_ED448", + [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519", + [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448", + [144] = "PCKMO_ENCRYPT_DEA_KEY", + [145] = "PCKMO_ENCRYPT_TDEA_128_KEY", + [146] = "PCKMO_ENCRYPT_TDEA_192_KEY", + [147] = "PCKMO_ENCRYPT_AES_128_KEY", + [148] = "PCKMO_ENCRYPT_AES_192_KEY", + [149] = "PCKMO_ENCRYPT_AES_256_KEY", + [150] = "PCKMO_ENCRYPT_ECC_P256_KEY", + [151] = "PCKMO_ENCRYPT_ECC_P384_KEY", + [152] = "PCKMO_ENCRYPT_ECC_P521_KEY", + [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY", + [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY", + [155] = "IBM_RESERVED_155", + [156] = "IBM_RESERVED_156", + [157] = "KM_FULL_XTS_AES_128", + [158] = "KM_FULL_XTS_AES_256", + [159] = "KM_FULL_XTS_ENCRYPTED_AES_128", + [160] = "KM_FULL_XTS_ENCRYPTED_AES_256", + [161] = "KMAC_HMAC_SHA_224", + [162] = "KMAC_HMAC_SHA_256", + [163] = "KMAC_HMAC_SHA_384", + [164] = "KMAC_HMAC_SHA_512", + [165] = "KMAC_HMAC_ENCRYPTED_SHA_224", + [166] = "KMAC_HMAC_ENCRYPTED_SHA_256", + [167] = "KMAC_HMAC_ENCRYPTED_SHA_384", + [168] = "KMAC_HMAC_ENCRYPTED_SHA_512", + [169] = "PCKMO_ENCRYPT_HMAC_512_KEY", + [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY", + [171] = "PCKMO_ENCRYPT_AES_XTS_128", + [172] = "PCKMO_ENCRYPT_AES_XTS_256", +}; + +static struct attribute *paiext_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paiext_events_group = { + .name = "events", + .attrs = NULL, /* Filled in attr_event_init() */ +}; + +static struct attribute_group paiext_format_group = { + .name = "format", + .attrs = paiext_format_attr, +}; + +static const struct attribute_group *paiext_attr_groups[] = { + &paiext_events_group, + &paiext_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paiext = { + .task_ctx_nr = perf_hw_context, + .event_init = paiext_event_init, + .add = paiext_add, + .del = paiext_del, + .start = paiext_start, + .stop = paiext_stop, + .read = paiext_read, + .sched_task = paiext_sched_task, + .attr_groups = paiext_attr_groups, +}; + +/* List of symbolic PAI extension 1 NNPA counter names. */ +static const char * const paiext_ctrnames[] = { + [0] = "NNPA_ALL", + [1] = "NNPA_ADD", + [2] = "NNPA_SUB", + [3] = "NNPA_MUL", + [4] = "NNPA_DIV", + [5] = "NNPA_MIN", + [6] = "NNPA_MAX", + [7] = "NNPA_LOG", + [8] = "NNPA_EXP", + [9] = "NNPA_IBM_RESERVED_9", + [10] = "NNPA_RELU", + [11] = "NNPA_TANH", + [12] = "NNPA_SIGMOID", + [13] = "NNPA_SOFTMAX", + [14] = "NNPA_BATCHNORM", + [15] = "NNPA_MAXPOOL2D", + [16] = "NNPA_AVGPOOL2D", + [17] = "NNPA_LSTMACT", + [18] = "NNPA_GRUACT", + [19] = "NNPA_CONVOLUTION", + [20] = "NNPA_MATMUL_OP", + [21] = "NNPA_MATMUL_OP_BCAST23", + [22] = "NNPA_SMALLBATCH", + [23] = "NNPA_LARGEDIM", + [24] = "NNPA_SMALLTENSOR", + [25] = "NNPA_1MFRAME", + [26] = "NNPA_2GFRAME", + [27] = "NNPA_ACCESSEXCEPT", + [28] = "NNPA_TRANSFORM", + [29] = "NNPA_GELU", + [30] = "NNPA_MOMENTS", + [31] = "NNPA_LAYERNORM", + [32] = "NNPA_MATMUL_OP_BCAST1", + [33] = "NNPA_SQRT", + [34] = "NNPA_INVSQRT", + [35] = "NNPA_NORM", + [36] = "NNPA_REDUCE", +}; + +static void __init attr_event_free(struct attribute **attrs) +{ + struct perf_pmu_events_attr *pa; + unsigned int i; + + for (i = 0; attrs[i]; i++) { + struct device_attribute *dap; + + dap = container_of(attrs[i], struct device_attribute, attr); + pa = container_of(dap, struct perf_pmu_events_attr, attr); + kfree(pa); + } + kfree(attrs); +} + +static struct attribute * __init attr_event_init_one(int num, + unsigned long base, + const char *name) +{ + struct perf_pmu_events_attr *pa; + + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return NULL; + + sysfs_attr_init(&pa->attr.attr); + pa->id = base + num; + pa->attr.attr.name = name; + pa->attr.attr.mode = 0444; + pa->attr.show = cpumf_events_sysfs_show; + pa->attr.store = NULL; + return &pa->attr.attr; +} + +static struct attribute ** __init attr_event_init(struct pai_pmu *p) +{ + unsigned int min_attr = min_t(unsigned int, p->num_named, p->num_avail); + struct attribute **attrs; + unsigned int i; + + attrs = kmalloc_array(min_attr + 1, sizeof(*attrs), GFP_KERNEL | __GFP_ZERO); + if (!attrs) + goto out; + for (i = 0; i < min_attr; i++) { + attrs[i] = attr_event_init_one(i, p->base, p->names[i]); + if (!attrs[i]) { + attr_event_free(attrs); + attrs = NULL; + goto out; + } + } + attrs[i] = NULL; +out: + return attrs; +} + +static void __init pai_pmu_exit(struct pai_pmu *p) +{ + attr_event_free(p->event_group->attrs); + p->event_group->attrs = NULL; +} + +/* Add a PMU. Install its events and register the PMU device driver + * call back functions. + */ +static int __init pai_pmu_init(struct pai_pmu *p) +{ + int rc = -ENOMEM; + + + /* Export known PAI events */ + p->event_group->attrs = attr_event_init(p); + if (!p->event_group->attrs) { + pr_err("Creation of PMU %s /sysfs failed\n", p->pmuname); + goto out; + } + + rc = perf_pmu_register(p->pmu, p->pmuname, -1); + if (rc) { + pai_pmu_exit(p); + pr_err("Registering PMU %s failed with rc=%i\n", p->pmuname, + rc); + } +out: + return rc; +} + +/* PAI PMU characteristics table */ +static struct pai_pmu pai_pmu[] __refdata = { + [PAI_PMU_CRYPTO] = { + .pmuname = "pai_crypto", + .facility_nr = 196, + .num_named = ARRAY_SIZE(paicrypt_ctrnames), + .names = paicrypt_ctrnames, + .base = PAI_CRYPTO_BASE, + .kernel_offset = PAI_CRYPTO_KERNEL_OFFSET, + .area_size = PAGE_SIZE, + .init = pai_pmu_init, + .exit = pai_pmu_exit, + .pmu = &paicrypt, + .event_group = &paicrypt_events_group + }, + [PAI_PMU_EXT] = { + .pmuname = "pai_ext", + .facility_nr = 197, + .num_named = ARRAY_SIZE(paiext_ctrnames), + .names = paiext_ctrnames, + .base = PAI_NNPA_BASE, + .kernel_offset = 0, + .area_size = PAIE1_CTRBLOCK_SZ, + .init = pai_pmu_init, + .exit = pai_pmu_exit, + .pmu = &paiext, + .event_group = &paiext_events_group + } +}; + +/* + * Check if the PMU (via facility) is supported by machine. Try all of the + * supported PAI PMUs. + * Return number of successfully installed PMUs. + */ +static int __init paipmu_setup(void) +{ + struct qpaci_info_block ib; + int install_ok = 0, rc; + struct pai_pmu *p; + size_t i; + + for (i = 0; i < ARRAY_SIZE(pai_pmu); ++i) { + p = &pai_pmu[i]; + + if (!test_facility(p->facility_nr)) + continue; + + qpaci(&ib); + switch (i) { + case PAI_PMU_CRYPTO: + p->num_avail = ib.num_cc; + if (p->num_avail >= PAI_CRYPTO_MAXCTR) { + pr_err("Too many PMU %s counters %d\n", + p->pmuname, p->num_avail); + continue; + } + break; + case PAI_PMU_EXT: + p->num_avail = ib.num_nnpa; + break; + } + p->num_avail += 1; /* Add xxx_ALL event */ + if (p->init) { + rc = p->init(p); + if (!rc) + ++install_ok; + } + } + return install_ok; +} + +static int __init pai_init(void) +{ + /* Setup s390dbf facility */ + paidbg = debug_register("pai", 32, 256, 128); + if (!paidbg) { + pr_err("Registration of s390dbf pai failed\n"); + return -ENOMEM; + } + debug_register_view(paidbg, &debug_sprintf_view); + + if (!paipmu_setup()) { + /* No PMU registration, no need for debug buffer */ + debug_unregister_view(paidbg, &debug_sprintf_view); + debug_unregister(paidbg); + return -ENODEV; + } + return 0; +} + +device_initcall(pai_init); diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c deleted file mode 100644 index 62bf8a15bf32..000000000000 --- a/arch/s390/kernel/perf_pai_crypto.c +++ /dev/null @@ -1,843 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Performance event support - Processor Activity Instrumentation Facility - * - * Copyright IBM Corp. 2022 - * Author(s): Thomas Richter <tmricht@linux.ibm.com> - */ -#define KMSG_COMPONENT "pai_crypto" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/percpu.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/perf_event.h> -#include <asm/ctlreg.h> -#include <asm/pai.h> -#include <asm/debug.h> - -static debug_info_t *cfm_dbg; -static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */ - /* extracted with QPACI instruction */ - -DEFINE_STATIC_KEY_FALSE(pai_key); - -struct pai_userdata { - u16 num; - u64 value; -} __packed; - -struct paicrypt_map { - unsigned long *page; /* Page for CPU to store counters */ - struct pai_userdata *save; /* Page to store no-zero counters */ - unsigned int active_events; /* # of PAI crypto users */ - refcount_t refcnt; /* Reference count mapped buffers */ - struct perf_event *event; /* Perf event for sampling */ - struct list_head syswide_list; /* List system-wide sampling events */ -}; - -struct paicrypt_mapptr { - struct paicrypt_map *mapptr; -}; - -static struct paicrypt_root { /* Anchor to per CPU data */ - refcount_t refcnt; /* Overall active events */ - struct paicrypt_mapptr __percpu *mapptr; -} paicrypt_root; - -/* Free per CPU data when the last event is removed. */ -static void paicrypt_root_free(void) -{ - if (refcount_dec_and_test(&paicrypt_root.refcnt)) { - free_percpu(paicrypt_root.mapptr); - paicrypt_root.mapptr = NULL; - } - debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__, - refcount_read(&paicrypt_root.refcnt)); -} - -/* - * On initialization of first event also allocate per CPU data dynamically. - * Start with an array of pointers, the array size is the maximum number of - * CPUs possible, which might be larger than the number of CPUs currently - * online. - */ -static int paicrypt_root_alloc(void) -{ - if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) { - /* The memory is already zeroed. */ - paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr); - if (!paicrypt_root.mapptr) - return -ENOMEM; - refcount_set(&paicrypt_root.refcnt, 1); - } - return 0; -} - -/* Release the PMU if event is the last perf event */ -static DEFINE_MUTEX(pai_reserve_mutex); - -/* Free all memory allocated for event counting/sampling setup */ -static void paicrypt_free(struct paicrypt_mapptr *mp) -{ - free_page((unsigned long)mp->mapptr->page); - kvfree(mp->mapptr->save); - kfree(mp->mapptr); - mp->mapptr = NULL; -} - -/* Adjust usage counters and remove allocated memory when all users are - * gone. - */ -static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu) -{ - struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); - struct paicrypt_map *cpump = mp->mapptr; - - mutex_lock(&pai_reserve_mutex); - debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d " - "refcnt %u\n", __func__, event->attr.config, - event->cpu, cpump->active_events, - refcount_read(&cpump->refcnt)); - if (refcount_dec_and_test(&cpump->refcnt)) - paicrypt_free(mp); - paicrypt_root_free(); - mutex_unlock(&pai_reserve_mutex); -} - -static void paicrypt_event_destroy(struct perf_event *event) -{ - int cpu; - - static_branch_dec(&pai_key); - free_page(PAI_SAVE_AREA(event)); - if (event->cpu == -1) { - struct cpumask *mask = PAI_CPU_MASK(event); - - for_each_cpu(cpu, mask) - paicrypt_event_destroy_cpu(event, cpu); - kfree(mask); - } else { - paicrypt_event_destroy_cpu(event, event->cpu); - } -} - -static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel) -{ - if (kernel) - nr += PAI_CRYPTO_MAXCTR; - return page[nr]; -} - -/* Read the counter values. Return value from location in CMP. For event - * CRYPTO_ALL sum up all events. - */ -static u64 paicrypt_getdata(struct perf_event *event, bool kernel) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - u64 sum = 0; - int i; - - if (event->attr.config != PAI_CRYPTO_BASE) { - return paicrypt_getctr(cpump->page, - event->attr.config - PAI_CRYPTO_BASE, - kernel); - } - - for (i = 1; i <= paicrypt_cnt; i++) { - u64 val = paicrypt_getctr(cpump->page, i, kernel); - - if (!val) - continue; - sum += val; - } - return sum; -} - -static u64 paicrypt_getall(struct perf_event *event) -{ - u64 sum = 0; - - if (!event->attr.exclude_kernel) - sum += paicrypt_getdata(event, true); - if (!event->attr.exclude_user) - sum += paicrypt_getdata(event, false); - - return sum; -} - -/* Check concurrent access of counting and sampling for crypto events. - * This function is called in process context and it is save to block. - * When the event initialization functions fails, no other call back will - * be invoked. - * - * Allocate the memory for the event. - */ -static int paicrypt_alloc_cpu(struct perf_event *event, int cpu) -{ - struct paicrypt_map *cpump = NULL; - struct paicrypt_mapptr *mp; - int rc; - - mutex_lock(&pai_reserve_mutex); - /* Allocate root node */ - rc = paicrypt_root_alloc(); - if (rc) - goto unlock; - - /* Allocate node for this event */ - mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); - cpump = mp->mapptr; - if (!cpump) { /* Paicrypt_map allocated? */ - rc = -ENOMEM; - cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); - if (!cpump) - goto undo; - /* Allocate memory for counter page and counter extraction. - * Only the first counting event has to allocate a page. - */ - mp->mapptr = cpump; - cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); - cpump->save = kvmalloc_array(paicrypt_cnt + 1, - sizeof(struct pai_userdata), - GFP_KERNEL); - if (!cpump->page || !cpump->save) { - paicrypt_free(mp); - goto undo; - } - INIT_LIST_HEAD(&cpump->syswide_list); - refcount_set(&cpump->refcnt, 1); - rc = 0; - } else { - refcount_inc(&cpump->refcnt); - } - -undo: - if (rc) { - /* Error in allocation of event, decrement anchor. Since - * the event in not created, its destroy() function is never - * invoked. Adjust the reference counter for the anchor. - */ - paicrypt_root_free(); - } -unlock: - mutex_unlock(&pai_reserve_mutex); - return rc; -} - -static int paicrypt_alloc(struct perf_event *event) -{ - struct cpumask *maskptr; - int cpu, rc = -ENOMEM; - - maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); - if (!maskptr) - goto out; - - for_each_online_cpu(cpu) { - rc = paicrypt_alloc_cpu(event, cpu); - if (rc) { - for_each_cpu(cpu, maskptr) - paicrypt_event_destroy_cpu(event, cpu); - kfree(maskptr); - goto out; - } - cpumask_set_cpu(cpu, maskptr); - } - - /* - * On error all cpumask are freed and all events have been destroyed. - * Save of which CPUs data structures have been allocated for. - * Release them in paicrypt_event_destroy call back function - * for this event. - */ - PAI_CPU_MASK(event) = maskptr; - rc = 0; -out: - return rc; -} - -/* Might be called on different CPU than the one the event is intended for. */ -static int paicrypt_event_init(struct perf_event *event) -{ - struct perf_event_attr *a = &event->attr; - int rc = 0; - - /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ - if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) - return -ENOENT; - /* PAI crypto event must be in valid range, try others if not */ - if (a->config < PAI_CRYPTO_BASE || - a->config > PAI_CRYPTO_BASE + paicrypt_cnt) - return -ENOENT; - /* Allow only CRYPTO_ALL for sampling */ - if (a->sample_period && a->config != PAI_CRYPTO_BASE) - return -EINVAL; - /* Get a page to store last counter values for sampling */ - if (a->sample_period) { - PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); - if (!PAI_SAVE_AREA(event)) { - rc = -ENOMEM; - goto out; - } - } - - if (event->cpu >= 0) - rc = paicrypt_alloc_cpu(event, event->cpu); - else - rc = paicrypt_alloc(event); - if (rc) { - free_page(PAI_SAVE_AREA(event)); - goto out; - } - event->destroy = paicrypt_event_destroy; - - if (a->sample_period) { - a->sample_period = 1; - a->freq = 0; - /* Register for paicrypt_sched_task() to be called */ - event->attach_state |= PERF_ATTACH_SCHED_CB; - /* Add raw data which contain the memory mapped counters */ - a->sample_type |= PERF_SAMPLE_RAW; - /* Turn off inheritance */ - a->inherit = 0; - } - - static_branch_inc(&pai_key); -out: - return rc; -} - -static void paicrypt_read(struct perf_event *event) -{ - u64 prev, new, delta; - - prev = local64_read(&event->hw.prev_count); - new = paicrypt_getall(event); - local64_set(&event->hw.prev_count, new); - delta = (prev <= new) ? new - prev - : (-1ULL - prev) + new + 1; /* overflow */ - local64_add(delta, &event->count); -} - -static void paicrypt_start(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - u64 sum; - - if (!event->attr.sample_period) { /* Counting */ - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - } else { /* Sampling */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE); - /* Enable context switch callback for system-wide sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); - perf_sched_cb_inc(event->pmu); - } else { - cpump->event = event; - } - } -} - -static int paicrypt_add(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - unsigned long ccd; - - if (++cpump->active_events == 1) { - ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET; - WRITE_ONCE(get_lowcore()->ccd, ccd); - local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); - } - if (flags & PERF_EF_START) - paicrypt_start(event, PERF_EF_RELOAD); - event->hw.state = 0; - return 0; -} - -static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *); -static void paicrypt_stop(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - - if (!event->attr.sample_period) { /* Counting */ - paicrypt_read(event); - } else { /* Sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - perf_sched_cb_dec(event->pmu); - list_del(PAI_SWLIST(event)); - } else { - paicrypt_have_sample(event, cpump); - cpump->event = NULL; - } - } - event->hw.state = PERF_HES_STOPPED; -} - -static void paicrypt_del(struct perf_event *event, int flags) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - - paicrypt_stop(event, PERF_EF_UPDATE); - if (--cpump->active_events == 0) { - local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); - WRITE_ONCE(get_lowcore()->ccd, 0); - } -} - -/* Create raw data and save it in buffer. Calculate the delta for each - * counter between this invocation and the last invocation. - * Returns number of bytes copied. - * Saves only entries with positive counter difference of the form - * 2 bytes: Number of counter - * 8 bytes: Value of counter - */ -static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page, - unsigned long *page_old, bool exclude_user, - bool exclude_kernel) -{ - int i, outidx = 0; - - for (i = 1; i <= paicrypt_cnt; i++) { - u64 val = 0, val_old = 0; - - if (!exclude_kernel) { - val += paicrypt_getctr(page, i, true); - val_old += paicrypt_getctr(page_old, i, true); - } - if (!exclude_user) { - val += paicrypt_getctr(page, i, false); - val_old += paicrypt_getctr(page_old, i, false); - } - if (val >= val_old) - val -= val_old; - else - val = (~0ULL - val_old) + val + 1; - if (val) { - userdata[outidx].num = i; - userdata[outidx].value = val; - outidx++; - } - } - return outidx * sizeof(struct pai_userdata); -} - -static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, - struct perf_event *event) -{ - struct perf_sample_data data; - struct perf_raw_record raw; - struct pt_regs regs; - int overflow; - - /* Setup perf sample */ - memset(®s, 0, sizeof(regs)); - memset(&raw, 0, sizeof(raw)); - memset(&data, 0, sizeof(data)); - perf_sample_data_init(&data, 0, event->hw.last_period); - if (event->attr.sample_type & PERF_SAMPLE_TID) { - data.tid_entry.pid = task_tgid_nr(current); - data.tid_entry.tid = task_pid_nr(current); - } - if (event->attr.sample_type & PERF_SAMPLE_TIME) - data.time = event->clock(); - if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) - data.id = event->id; - if (event->attr.sample_type & PERF_SAMPLE_CPU) { - data.cpu_entry.cpu = smp_processor_id(); - data.cpu_entry.reserved = 0; - } - if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.frag.size = rawsize; - raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, event, &raw); - } - - overflow = perf_event_overflow(event, &data, ®s); - perf_event_update_userpage(event); - /* Save crypto counter lowcore page after reading event data. */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE); - return overflow; -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paicrypt_have_sample(struct perf_event *event, - struct paicrypt_map *cpump) -{ - size_t rawsize; - - if (!event) /* No event active */ - return; - rawsize = paicrypt_copy(cpump->save, cpump->page, - (unsigned long *)PAI_SAVE_AREA(event), - event->attr.exclude_user, - event->attr.exclude_kernel); - if (rawsize) /* No incremented counters */ - paicrypt_push_sample(rawsize, cpump, event); -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paicrypt_have_samples(void) -{ - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - struct perf_event *event; - - list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) - paicrypt_have_sample(event, cpump); -} - -/* Called on schedule-in and schedule-out. No access to event structure, - * but for sampling only event CRYPTO_ALL is allowed. - */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, - struct task_struct *task, bool sched_in) -{ - /* We started with a clean page on event installation. So read out - * results on schedule_out and if page was dirty, save old values. - */ - if (!sched_in) - paicrypt_have_samples(); -} - -/* Attribute definitions for paicrypt interface. As with other CPU - * Measurement Facilities, there is one attribute per mapped counter. - * The number of mapped counters may vary per machine generation. Use - * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction - * to determine the number of mapped counters. The instructions returns - * a positive number, which is the highest number of supported counters. - * All counters less than this number are also supported, there are no - * holes. A returned number of zero means no support for mapped counters. - * - * The identification of the counter is a unique number. The chosen range - * is 0x1000 + offset in mapped kernel page. - * All CPU Measurement Facility counters identifiers must be unique and - * the numbers from 0 to 496 are already used for the CPU Measurement - * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already - * used for the CPU Measurement Sampling facility. - */ -PMU_FORMAT_ATTR(event, "config:0-63"); - -static struct attribute *paicrypt_format_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group paicrypt_events_group = { - .name = "events", - .attrs = NULL /* Filled in attr_event_init() */ -}; - -static struct attribute_group paicrypt_format_group = { - .name = "format", - .attrs = paicrypt_format_attr, -}; - -static const struct attribute_group *paicrypt_attr_groups[] = { - &paicrypt_events_group, - &paicrypt_format_group, - NULL, -}; - -/* Performance monitoring unit for mapped counters */ -static struct pmu paicrypt = { - .task_ctx_nr = perf_hw_context, - .event_init = paicrypt_event_init, - .add = paicrypt_add, - .del = paicrypt_del, - .start = paicrypt_start, - .stop = paicrypt_stop, - .read = paicrypt_read, - .sched_task = paicrypt_sched_task, - .attr_groups = paicrypt_attr_groups -}; - -/* List of symbolic PAI counter names. */ -static const char * const paicrypt_ctrnames[] = { - [0] = "CRYPTO_ALL", - [1] = "KM_DEA", - [2] = "KM_TDEA_128", - [3] = "KM_TDEA_192", - [4] = "KM_ENCRYPTED_DEA", - [5] = "KM_ENCRYPTED_TDEA_128", - [6] = "KM_ENCRYPTED_TDEA_192", - [7] = "KM_AES_128", - [8] = "KM_AES_192", - [9] = "KM_AES_256", - [10] = "KM_ENCRYPTED_AES_128", - [11] = "KM_ENCRYPTED_AES_192", - [12] = "KM_ENCRYPTED_AES_256", - [13] = "KM_XTS_AES_128", - [14] = "KM_XTS_AES_256", - [15] = "KM_XTS_ENCRYPTED_AES_128", - [16] = "KM_XTS_ENCRYPTED_AES_256", - [17] = "KMC_DEA", - [18] = "KMC_TDEA_128", - [19] = "KMC_TDEA_192", - [20] = "KMC_ENCRYPTED_DEA", - [21] = "KMC_ENCRYPTED_TDEA_128", - [22] = "KMC_ENCRYPTED_TDEA_192", - [23] = "KMC_AES_128", - [24] = "KMC_AES_192", - [25] = "KMC_AES_256", - [26] = "KMC_ENCRYPTED_AES_128", - [27] = "KMC_ENCRYPTED_AES_192", - [28] = "KMC_ENCRYPTED_AES_256", - [29] = "KMC_PRNG", - [30] = "KMA_GCM_AES_128", - [31] = "KMA_GCM_AES_192", - [32] = "KMA_GCM_AES_256", - [33] = "KMA_GCM_ENCRYPTED_AES_128", - [34] = "KMA_GCM_ENCRYPTED_AES_192", - [35] = "KMA_GCM_ENCRYPTED_AES_256", - [36] = "KMF_DEA", - [37] = "KMF_TDEA_128", - [38] = "KMF_TDEA_192", - [39] = "KMF_ENCRYPTED_DEA", - [40] = "KMF_ENCRYPTED_TDEA_128", - [41] = "KMF_ENCRYPTED_TDEA_192", - [42] = "KMF_AES_128", - [43] = "KMF_AES_192", - [44] = "KMF_AES_256", - [45] = "KMF_ENCRYPTED_AES_128", - [46] = "KMF_ENCRYPTED_AES_192", - [47] = "KMF_ENCRYPTED_AES_256", - [48] = "KMCTR_DEA", - [49] = "KMCTR_TDEA_128", - [50] = "KMCTR_TDEA_192", - [51] = "KMCTR_ENCRYPTED_DEA", - [52] = "KMCTR_ENCRYPTED_TDEA_128", - [53] = "KMCTR_ENCRYPTED_TDEA_192", - [54] = "KMCTR_AES_128", - [55] = "KMCTR_AES_192", - [56] = "KMCTR_AES_256", - [57] = "KMCTR_ENCRYPTED_AES_128", - [58] = "KMCTR_ENCRYPTED_AES_192", - [59] = "KMCTR_ENCRYPTED_AES_256", - [60] = "KMO_DEA", - [61] = "KMO_TDEA_128", - [62] = "KMO_TDEA_192", - [63] = "KMO_ENCRYPTED_DEA", - [64] = "KMO_ENCRYPTED_TDEA_128", - [65] = "KMO_ENCRYPTED_TDEA_192", - [66] = "KMO_AES_128", - [67] = "KMO_AES_192", - [68] = "KMO_AES_256", - [69] = "KMO_ENCRYPTED_AES_128", - [70] = "KMO_ENCRYPTED_AES_192", - [71] = "KMO_ENCRYPTED_AES_256", - [72] = "KIMD_SHA_1", - [73] = "KIMD_SHA_256", - [74] = "KIMD_SHA_512", - [75] = "KIMD_SHA3_224", - [76] = "KIMD_SHA3_256", - [77] = "KIMD_SHA3_384", - [78] = "KIMD_SHA3_512", - [79] = "KIMD_SHAKE_128", - [80] = "KIMD_SHAKE_256", - [81] = "KIMD_GHASH", - [82] = "KLMD_SHA_1", - [83] = "KLMD_SHA_256", - [84] = "KLMD_SHA_512", - [85] = "KLMD_SHA3_224", - [86] = "KLMD_SHA3_256", - [87] = "KLMD_SHA3_384", - [88] = "KLMD_SHA3_512", - [89] = "KLMD_SHAKE_128", - [90] = "KLMD_SHAKE_256", - [91] = "KMAC_DEA", - [92] = "KMAC_TDEA_128", - [93] = "KMAC_TDEA_192", - [94] = "KMAC_ENCRYPTED_DEA", - [95] = "KMAC_ENCRYPTED_TDEA_128", - [96] = "KMAC_ENCRYPTED_TDEA_192", - [97] = "KMAC_AES_128", - [98] = "KMAC_AES_192", - [99] = "KMAC_AES_256", - [100] = "KMAC_ENCRYPTED_AES_128", - [101] = "KMAC_ENCRYPTED_AES_192", - [102] = "KMAC_ENCRYPTED_AES_256", - [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA", - [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128", - [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192", - [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA", - [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", - [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", - [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128", - [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192", - [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", - [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", - [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", - [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", - [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", - [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", - [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", - [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256", - [119] = "PCC_SCALAR_MULTIPLY_P256", - [120] = "PCC_SCALAR_MULTIPLY_P384", - [121] = "PCC_SCALAR_MULTIPLY_P521", - [122] = "PCC_SCALAR_MULTIPLY_ED25519", - [123] = "PCC_SCALAR_MULTIPLY_ED448", - [124] = "PCC_SCALAR_MULTIPLY_X25519", - [125] = "PCC_SCALAR_MULTIPLY_X448", - [126] = "PRNO_SHA_512_DRNG", - [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO", - [128] = "PRNO_TRNG", - [129] = "KDSA_ECDSA_VERIFY_P256", - [130] = "KDSA_ECDSA_VERIFY_P384", - [131] = "KDSA_ECDSA_VERIFY_P521", - [132] = "KDSA_ECDSA_SIGN_P256", - [133] = "KDSA_ECDSA_SIGN_P384", - [134] = "KDSA_ECDSA_SIGN_P521", - [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256", - [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384", - [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521", - [138] = "KDSA_EDDSA_VERIFY_ED25519", - [139] = "KDSA_EDDSA_VERIFY_ED448", - [140] = "KDSA_EDDSA_SIGN_ED25519", - [141] = "KDSA_EDDSA_SIGN_ED448", - [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519", - [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448", - [144] = "PCKMO_ENCRYPT_DEA_KEY", - [145] = "PCKMO_ENCRYPT_TDEA_128_KEY", - [146] = "PCKMO_ENCRYPT_TDEA_192_KEY", - [147] = "PCKMO_ENCRYPT_AES_128_KEY", - [148] = "PCKMO_ENCRYPT_AES_192_KEY", - [149] = "PCKMO_ENCRYPT_AES_256_KEY", - [150] = "PCKMO_ENCRYPT_ECC_P256_KEY", - [151] = "PCKMO_ENCRYPT_ECC_P384_KEY", - [152] = "PCKMO_ENCRYPT_ECC_P521_KEY", - [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY", - [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY", - [155] = "IBM_RESERVED_155", - [156] = "IBM_RESERVED_156", - [157] = "KM_FULL_XTS_AES_128", - [158] = "KM_FULL_XTS_AES_256", - [159] = "KM_FULL_XTS_ENCRYPTED_AES_128", - [160] = "KM_FULL_XTS_ENCRYPTED_AES_256", - [161] = "KMAC_HMAC_SHA_224", - [162] = "KMAC_HMAC_SHA_256", - [163] = "KMAC_HMAC_SHA_384", - [164] = "KMAC_HMAC_SHA_512", - [165] = "KMAC_HMAC_ENCRYPTED_SHA_224", - [166] = "KMAC_HMAC_ENCRYPTED_SHA_256", - [167] = "KMAC_HMAC_ENCRYPTED_SHA_384", - [168] = "KMAC_HMAC_ENCRYPTED_SHA_512", - [169] = "PCKMO_ENCRYPT_HMAC_512_KEY", - [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY", - [171] = "PCKMO_ENCRYPT_AES_XTS_128", - [172] = "PCKMO_ENCRYPT_AES_XTS_256", -}; - -static void __init attr_event_free(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - int i; - - for (i = 0; i < num; i++) { - struct device_attribute *dap; - - dap = container_of(attrs[i], struct device_attribute, attr); - pa = container_of(dap, struct perf_pmu_events_attr, attr); - kfree(pa); - } - kfree(attrs); -} - -static int __init attr_event_init_one(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - - /* Index larger than array_size, no counter name available */ - if (num >= ARRAY_SIZE(paicrypt_ctrnames)) { - attrs[num] = NULL; - return 0; - } - - pa = kzalloc(sizeof(*pa), GFP_KERNEL); - if (!pa) - return -ENOMEM; - - sysfs_attr_init(&pa->attr.attr); - pa->id = PAI_CRYPTO_BASE + num; - pa->attr.attr.name = paicrypt_ctrnames[num]; - pa->attr.attr.mode = 0444; - pa->attr.show = cpumf_events_sysfs_show; - pa->attr.store = NULL; - attrs[num] = &pa->attr.attr; - return 0; -} - -/* Create PMU sysfs event attributes on the fly. */ -static int __init attr_event_init(void) -{ - struct attribute **attrs; - int ret, i; - - attrs = kmalloc_array(paicrypt_cnt + 2, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - for (i = 0; i <= paicrypt_cnt; i++) { - ret = attr_event_init_one(attrs, i); - if (ret) { - attr_event_free(attrs, i); - return ret; - } - } - attrs[i] = NULL; - paicrypt_events_group.attrs = attrs; - return 0; -} - -static int __init paicrypt_init(void) -{ - struct qpaci_info_block ib; - int rc; - - if (!test_facility(196)) - return 0; - - qpaci(&ib); - paicrypt_cnt = ib.num_cc; - if (paicrypt_cnt == 0) - return 0; - if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR) { - pr_err("Too many PMU pai_crypto counters %d\n", paicrypt_cnt); - return -E2BIG; - } - - rc = attr_event_init(); /* Export known PAI crypto events */ - if (rc) { - pr_err("Creation of PMU pai_crypto /sysfs failed\n"); - return rc; - } - - /* Setup s390dbf facility */ - cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); - if (!cfm_dbg) { - pr_err("Registration of s390dbf pai_crypto failed\n"); - return -ENOMEM; - } - debug_register_view(cfm_dbg, &debug_sprintf_view); - - rc = perf_pmu_register(&paicrypt, "pai_crypto", -1); - if (rc) { - pr_err("Registering the pai_crypto PMU failed with rc=%i\n", - rc); - debug_unregister_view(cfm_dbg, &debug_sprintf_view); - debug_unregister(cfm_dbg); - return rc; - } - return 0; -} - -device_initcall(paicrypt_init); diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c deleted file mode 100644 index 7b32935273ce..000000000000 --- a/arch/s390/kernel/perf_pai_ext.c +++ /dev/null @@ -1,756 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Performance event support - Processor Activity Instrumentation Extension - * Facility - * - * Copyright IBM Corp. 2022 - * Author(s): Thomas Richter <tmricht@linux.ibm.com> - */ -#define KMSG_COMPONENT "pai_ext" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/percpu.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/perf_event.h> -#include <asm/ctlreg.h> -#include <asm/pai.h> -#include <asm/debug.h> - -#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ -#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ - -static debug_info_t *paiext_dbg; -static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ - -struct pai_userdata { - u16 num; - u64 value; -} __packed; - -/* Create the PAI extension 1 control block area. - * The PAI extension control block 1 is pointed to by lowcore - * address 0x1508 for each CPU. This control block is 512 bytes in size - * and requires a 512 byte boundary alignment. - */ -struct paiext_cb { /* PAI extension 1 control block */ - u64 header; /* Not used */ - u64 reserved1; - u64 acc; /* Addr to analytics counter control block */ - u8 reserved2[488]; -} __packed; - -struct paiext_map { - unsigned long *area; /* Area for CPU to store counters */ - struct pai_userdata *save; /* Area to store non-zero counters */ - unsigned int active_events; /* # of PAI Extension users */ - refcount_t refcnt; - struct perf_event *event; /* Perf event for sampling */ - struct paiext_cb *paiext_cb; /* PAI extension control block area */ - struct list_head syswide_list; /* List system-wide sampling events */ -}; - -struct paiext_mapptr { - struct paiext_map *mapptr; -}; - -static struct paiext_root { /* Anchor to per CPU data */ - refcount_t refcnt; /* Overall active events */ - struct paiext_mapptr __percpu *mapptr; -} paiext_root; - -/* Free per CPU data when the last event is removed. */ -static void paiext_root_free(void) -{ - if (refcount_dec_and_test(&paiext_root.refcnt)) { - free_percpu(paiext_root.mapptr); - paiext_root.mapptr = NULL; - } - debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__, - refcount_read(&paiext_root.refcnt)); -} - -/* On initialization of first event also allocate per CPU data dynamically. - * Start with an array of pointers, the array size is the maximum number of - * CPUs possible, which might be larger than the number of CPUs currently - * online. - */ -static int paiext_root_alloc(void) -{ - if (!refcount_inc_not_zero(&paiext_root.refcnt)) { - /* The memory is already zeroed. */ - paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); - if (!paiext_root.mapptr) { - /* Returning without refcnt adjustment is ok. The - * error code is handled by paiext_alloc() which - * decrements refcnt when an event can not be - * created. - */ - return -ENOMEM; - } - refcount_set(&paiext_root.refcnt, 1); - } - return 0; -} - -/* Protects against concurrent increment of sampler and counter member - * increments at the same time and prohibits concurrent execution of - * counting and sampling events. - * Ensures that analytics counter block is deallocated only when the - * sampling and counting on that cpu is zero. - * For details see paiext_alloc(). - */ -static DEFINE_MUTEX(paiext_reserve_mutex); - -/* Free all memory allocated for event counting/sampling setup */ -static void paiext_free(struct paiext_mapptr *mp) -{ - kfree(mp->mapptr->area); - kfree(mp->mapptr->paiext_cb); - kvfree(mp->mapptr->save); - kfree(mp->mapptr); - mp->mapptr = NULL; -} - -/* Release the PMU if event is the last perf event */ -static void paiext_event_destroy_cpu(struct perf_event *event, int cpu) -{ - struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu); - struct paiext_map *cpump = mp->mapptr; - - mutex_lock(&paiext_reserve_mutex); - if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ - paiext_free(mp); - paiext_root_free(); - mutex_unlock(&paiext_reserve_mutex); -} - -static void paiext_event_destroy(struct perf_event *event) -{ - int cpu; - - free_page(PAI_SAVE_AREA(event)); - if (event->cpu == -1) { - struct cpumask *mask = PAI_CPU_MASK(event); - - for_each_cpu(cpu, mask) - paiext_event_destroy_cpu(event, cpu); - kfree(mask); - } else { - paiext_event_destroy_cpu(event, event->cpu); - } - debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__, - event->cpu); -} - -/* Used to avoid races in checking concurrent access of counting and - * sampling for pai_extension events. - * - * Only one instance of event pai_ext/NNPA_ALL/ for sampling is - * allowed and when this event is running, no counting event is allowed. - * Several counting events are allowed in parallel, but no sampling event - * is allowed while one (or more) counting events are running. - * - * This function is called in process context and it is safe to block. - * When the event initialization functions fails, no other call back will - * be invoked. - * - * Allocate the memory for the event. - */ -static int paiext_alloc_cpu(struct perf_event *event, int cpu) -{ - struct paiext_mapptr *mp; - struct paiext_map *cpump; - int rc; - - mutex_lock(&paiext_reserve_mutex); - rc = paiext_root_alloc(); - if (rc) - goto unlock; - - mp = per_cpu_ptr(paiext_root.mapptr, cpu); - cpump = mp->mapptr; - if (!cpump) { /* Paiext_map allocated? */ - rc = -ENOMEM; - cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); - if (!cpump) - goto undo; - - /* Allocate memory for counter area and counter extraction. - * These are - * - a 512 byte block and requires 512 byte boundary alignment. - * - a 1KB byte block and requires 1KB boundary alignment. - * Only the first counting event has to allocate the area. - * - * Note: This works with commit 59bb47985c1d by default. - * Backporting this to kernels without this commit might - * need adjustment. - */ - mp->mapptr = cpump; - cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); - cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); - cpump->save = kvmalloc_array(paiext_cnt + 1, - sizeof(struct pai_userdata), - GFP_KERNEL); - if (!cpump->save || !cpump->area || !cpump->paiext_cb) { - paiext_free(mp); - goto undo; - } - INIT_LIST_HEAD(&cpump->syswide_list); - refcount_set(&cpump->refcnt, 1); - rc = 0; - } else { - refcount_inc(&cpump->refcnt); - } - -undo: - if (rc) { - /* Error in allocation of event, decrement anchor. Since - * the event in not created, its destroy() function is never - * invoked. Adjust the reference counter for the anchor. - */ - paiext_root_free(); - } -unlock: - mutex_unlock(&paiext_reserve_mutex); - /* If rc is non-zero, no increment of counter/sampler was done. */ - return rc; -} - -static int paiext_alloc(struct perf_event *event) -{ - struct cpumask *maskptr; - int cpu, rc = -ENOMEM; - - maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); - if (!maskptr) - goto out; - - for_each_online_cpu(cpu) { - rc = paiext_alloc_cpu(event, cpu); - if (rc) { - for_each_cpu(cpu, maskptr) - paiext_event_destroy_cpu(event, cpu); - kfree(maskptr); - goto out; - } - cpumask_set_cpu(cpu, maskptr); - } - - /* - * On error all cpumask are freed and all events have been destroyed. - * Save of which CPUs data structures have been allocated for. - * Release them in paicrypt_event_destroy call back function - * for this event. - */ - PAI_CPU_MASK(event) = maskptr; - rc = 0; -out: - return rc; -} - -/* The PAI extension 1 control block supports up to 128 entries. Return - * the index within PAIE1_CB given the event number. Also validate event - * number. - */ -static int paiext_event_valid(struct perf_event *event) -{ - u64 cfg = event->attr.config; - - if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { - /* Offset NNPA in paiext_cb */ - event->hw.config_base = offsetof(struct paiext_cb, acc); - return 0; - } - return -ENOENT; -} - -/* Might be called on different CPU than the one the event is intended for. */ -static int paiext_event_init(struct perf_event *event) -{ - struct perf_event_attr *a = &event->attr; - int rc; - - /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ - if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) - return -ENOENT; - /* PAI extension event must be valid and in supported range */ - rc = paiext_event_valid(event); - if (rc) - return rc; - /* Allow only event NNPA_ALL for sampling. */ - if (a->sample_period && a->config != PAI_NNPA_BASE) - return -EINVAL; - /* Prohibit exclude_user event selection */ - if (a->exclude_user) - return -EINVAL; - /* Get a page to store last counter values for sampling */ - if (a->sample_period) { - PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); - if (!PAI_SAVE_AREA(event)) - return -ENOMEM; - } - - if (event->cpu >= 0) - rc = paiext_alloc_cpu(event, event->cpu); - else - rc = paiext_alloc(event); - if (rc) { - free_page(PAI_SAVE_AREA(event)); - return rc; - } - event->destroy = paiext_event_destroy; - - if (a->sample_period) { - a->sample_period = 1; - a->freq = 0; - /* Register for paicrypt_sched_task() to be called */ - event->attach_state |= PERF_ATTACH_SCHED_CB; - /* Add raw data which are the memory mapped counters */ - a->sample_type |= PERF_SAMPLE_RAW; - /* Turn off inheritance */ - a->inherit = 0; - } - - return 0; -} - -static u64 paiext_getctr(unsigned long *area, int nr) -{ - return area[nr]; -} - -/* Read the counter values. Return value from location in buffer. For event - * NNPA_ALL sum up all events. - */ -static u64 paiext_getdata(struct perf_event *event) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - u64 sum = 0; - int i; - - if (event->attr.config != PAI_NNPA_BASE) - return paiext_getctr(cpump->area, - event->attr.config - PAI_NNPA_BASE); - - for (i = 1; i <= paiext_cnt; i++) - sum += paiext_getctr(cpump->area, i); - - return sum; -} - -static u64 paiext_getall(struct perf_event *event) -{ - return paiext_getdata(event); -} - -static void paiext_read(struct perf_event *event) -{ - u64 prev, new, delta; - - prev = local64_read(&event->hw.prev_count); - new = paiext_getall(event); - local64_set(&event->hw.prev_count, new); - delta = new - prev; - local64_add(delta, &event->count); -} - -static void paiext_start(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - u64 sum; - - if (!event->attr.sample_period) { /* Counting */ - sum = paiext_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - } else { /* Sampling */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->area, - PAIE1_CTRBLOCK_SZ); - /* Enable context switch callback for system-wide sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); - perf_sched_cb_inc(event->pmu); - } else { - cpump->event = event; - } - } -} - -static int paiext_add(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct paiext_cb *pcb = cpump->paiext_cb; - - if (++cpump->active_events == 1) { - get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb); - pcb->acc = virt_to_phys(cpump->area) | 0x1; - /* Enable CPU instruction lookup for PAIE1 control block */ - local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); - } - if (flags & PERF_EF_START) - paiext_start(event, PERF_EF_RELOAD); - event->hw.state = 0; - return 0; -} - -static void paiext_have_sample(struct perf_event *, struct paiext_map *); -static void paiext_stop(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - - if (!event->attr.sample_period) { /* Counting */ - paiext_read(event); - } else { /* Sampling */ - if (!(event->attach_state & PERF_ATTACH_TASK)) { - list_del(PAI_SWLIST(event)); - perf_sched_cb_dec(event->pmu); - } else { - paiext_have_sample(event, cpump); - cpump->event = NULL; - } - } - event->hw.state = PERF_HES_STOPPED; -} - -static void paiext_del(struct perf_event *event, int flags) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct paiext_cb *pcb = cpump->paiext_cb; - - paiext_stop(event, PERF_EF_UPDATE); - if (--cpump->active_events == 0) { - /* Disable CPU instruction lookup for PAIE1 control block */ - local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); - pcb->acc = 0; - get_lowcore()->aicd = 0; - } -} - -/* Create raw data and save it in buffer. Returns number of bytes copied. - * Saves only positive counter entries of the form - * 2 bytes: Number of counter - * 8 bytes: Value of counter - */ -static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area, - unsigned long *area_old) -{ - int i, outidx = 0; - - for (i = 1; i <= paiext_cnt; i++) { - u64 val = paiext_getctr(area, i); - u64 val_old = paiext_getctr(area_old, i); - - if (val >= val_old) - val -= val_old; - else - val = (~0ULL - val_old) + val + 1; - if (val) { - userdata[outidx].num = i; - userdata[outidx].value = val; - outidx++; - } - } - return outidx * sizeof(*userdata); -} - -/* Write sample when one or more counters values are nonzero. - * - * Note: The function paiext_sched_task() and paiext_push_sample() are not - * invoked after function paiext_del() has been called because of function - * perf_sched_cb_dec(). - * The function paiext_sched_task() and paiext_push_sample() are only - * called when sampling is active. Function perf_sched_cb_inc() - * has been invoked to install function paiext_sched_task() as call back - * to run at context switch time (see paiext_add()). - * - * This causes function perf_event_context_sched_out() and - * perf_event_context_sched_in() to check whether the PMU has installed an - * sched_task() callback. That callback is not active after paiext_del() - * returns and has deleted the event on that CPU. - */ -static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, - struct perf_event *event) -{ - struct perf_sample_data data; - struct perf_raw_record raw; - struct pt_regs regs; - int overflow; - - /* Setup perf sample */ - memset(®s, 0, sizeof(regs)); - memset(&raw, 0, sizeof(raw)); - memset(&data, 0, sizeof(data)); - perf_sample_data_init(&data, 0, event->hw.last_period); - if (event->attr.sample_type & PERF_SAMPLE_TID) { - data.tid_entry.pid = task_tgid_nr(current); - data.tid_entry.tid = task_pid_nr(current); - } - if (event->attr.sample_type & PERF_SAMPLE_TIME) - data.time = event->clock(); - if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) - data.id = event->id; - if (event->attr.sample_type & PERF_SAMPLE_CPU) - data.cpu_entry.cpu = smp_processor_id(); - if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.frag.size = rawsize; - raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, event, &raw); - } - - overflow = perf_event_overflow(event, &data, ®s); - perf_event_update_userpage(event); - /* Save NNPA lowcore area after read in event */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->area, - PAIE1_CTRBLOCK_SZ); - return overflow; -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paiext_have_sample(struct perf_event *event, - struct paiext_map *cpump) -{ - size_t rawsize; - - if (!event) - return; - rawsize = paiext_copy(cpump->save, cpump->area, - (unsigned long *)PAI_SAVE_AREA(event)); - if (rawsize) /* Incremented counters */ - paiext_push_sample(rawsize, cpump, event); -} - -/* Check if there is data to be saved on schedule out of a task. */ -static void paiext_have_samples(void) -{ - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct perf_event *event; - - list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) - paiext_have_sample(event, cpump); -} - -/* Called on schedule-in and schedule-out. No access to event structure, - * but for sampling only event NNPA_ALL is allowed. - */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, - struct task_struct *task, bool sched_in) -{ - /* We started with a clean page on event installation. So read out - * results on schedule_out and if page was dirty, save old values. - */ - if (!sched_in) - paiext_have_samples(); -} - -/* Attribute definitions for pai extension1 interface. As with other CPU - * Measurement Facilities, there is one attribute per mapped counter. - * The number of mapped counters may vary per machine generation. Use - * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction - * to determine the number of mapped counters. The instructions returns - * a positive number, which is the highest number of supported counters. - * All counters less than this number are also supported, there are no - * holes. A returned number of zero means no support for mapped counters. - * - * The identification of the counter is a unique number. The chosen range - * is 0x1800 + offset in mapped kernel page. - * All CPU Measurement Facility counters identifiers must be unique and - * the numbers from 0 to 496 are already used for the CPU Measurement - * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography - * counters. - * Numbers 0xb0000, 0xbc000 and 0xbd000 are already - * used for the CPU Measurement Sampling facility. - */ -PMU_FORMAT_ATTR(event, "config:0-63"); - -static struct attribute *paiext_format_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group paiext_events_group = { - .name = "events", - .attrs = NULL, /* Filled in attr_event_init() */ -}; - -static struct attribute_group paiext_format_group = { - .name = "format", - .attrs = paiext_format_attr, -}; - -static const struct attribute_group *paiext_attr_groups[] = { - &paiext_events_group, - &paiext_format_group, - NULL, -}; - -/* Performance monitoring unit for mapped counters */ -static struct pmu paiext = { - .task_ctx_nr = perf_hw_context, - .event_init = paiext_event_init, - .add = paiext_add, - .del = paiext_del, - .start = paiext_start, - .stop = paiext_stop, - .read = paiext_read, - .sched_task = paiext_sched_task, - .attr_groups = paiext_attr_groups, -}; - -/* List of symbolic PAI extension 1 NNPA counter names. */ -static const char * const paiext_ctrnames[] = { - [0] = "NNPA_ALL", - [1] = "NNPA_ADD", - [2] = "NNPA_SUB", - [3] = "NNPA_MUL", - [4] = "NNPA_DIV", - [5] = "NNPA_MIN", - [6] = "NNPA_MAX", - [7] = "NNPA_LOG", - [8] = "NNPA_EXP", - [9] = "NNPA_IBM_RESERVED_9", - [10] = "NNPA_RELU", - [11] = "NNPA_TANH", - [12] = "NNPA_SIGMOID", - [13] = "NNPA_SOFTMAX", - [14] = "NNPA_BATCHNORM", - [15] = "NNPA_MAXPOOL2D", - [16] = "NNPA_AVGPOOL2D", - [17] = "NNPA_LSTMACT", - [18] = "NNPA_GRUACT", - [19] = "NNPA_CONVOLUTION", - [20] = "NNPA_MATMUL_OP", - [21] = "NNPA_MATMUL_OP_BCAST23", - [22] = "NNPA_SMALLBATCH", - [23] = "NNPA_LARGEDIM", - [24] = "NNPA_SMALLTENSOR", - [25] = "NNPA_1MFRAME", - [26] = "NNPA_2GFRAME", - [27] = "NNPA_ACCESSEXCEPT", - [28] = "NNPA_TRANSFORM", - [29] = "NNPA_GELU", - [30] = "NNPA_MOMENTS", - [31] = "NNPA_LAYERNORM", - [32] = "NNPA_MATMUL_OP_BCAST1", - [33] = "NNPA_SQRT", - [34] = "NNPA_INVSQRT", - [35] = "NNPA_NORM", - [36] = "NNPA_REDUCE", -}; - -static void __init attr_event_free(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - struct device_attribute *dap; - int i; - - for (i = 0; i < num; i++) { - dap = container_of(attrs[i], struct device_attribute, attr); - pa = container_of(dap, struct perf_pmu_events_attr, attr); - kfree(pa); - } - kfree(attrs); -} - -static int __init attr_event_init_one(struct attribute **attrs, int num) -{ - struct perf_pmu_events_attr *pa; - - /* Index larger than array_size, no counter name available */ - if (num >= ARRAY_SIZE(paiext_ctrnames)) { - attrs[num] = NULL; - return 0; - } - - pa = kzalloc(sizeof(*pa), GFP_KERNEL); - if (!pa) - return -ENOMEM; - - sysfs_attr_init(&pa->attr.attr); - pa->id = PAI_NNPA_BASE + num; - pa->attr.attr.name = paiext_ctrnames[num]; - pa->attr.attr.mode = 0444; - pa->attr.show = cpumf_events_sysfs_show; - pa->attr.store = NULL; - attrs[num] = &pa->attr.attr; - return 0; -} - -/* Create PMU sysfs event attributes on the fly. */ -static int __init attr_event_init(void) -{ - struct attribute **attrs; - int ret, i; - - attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - for (i = 0; i <= paiext_cnt; i++) { - ret = attr_event_init_one(attrs, i); - if (ret) { - attr_event_free(attrs, i); - return ret; - } - } - attrs[i] = NULL; - paiext_events_group.attrs = attrs; - return 0; -} - -static int __init paiext_init(void) -{ - struct qpaci_info_block ib; - int rc = -ENOMEM; - - if (!test_facility(197)) - return 0; - - qpaci(&ib); - paiext_cnt = ib.num_nnpa; - if (paiext_cnt >= PAI_NNPA_MAXCTR) - paiext_cnt = PAI_NNPA_MAXCTR; - if (!paiext_cnt) - return 0; - - rc = attr_event_init(); - if (rc) { - pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n"); - return rc; - } - - /* Setup s390dbf facility */ - paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); - if (!paiext_dbg) { - pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n"); - rc = -ENOMEM; - goto out_init; - } - debug_register_view(paiext_dbg, &debug_sprintf_view); - - rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1); - if (rc) { - pr_err("Registration of " KMSG_COMPONENT " PMU failed with " - "rc=%i\n", rc); - goto out_pmu; - } - - return 0; - -out_pmu: - debug_unregister_view(paiext_dbg, &debug_sprintf_view); - debug_unregister(paiext_dbg); -out_init: - attr_event_free(paiext_events_group.attrs, - ARRAY_SIZE(paiext_ctrnames) + 1); - return rc; -} - -device_initcall(paiext_init); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index a6b058ee4a36..7b305f1456f8 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -44,9 +44,6 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_31BIT)) - return PERF_SAMPLE_REGS_ABI_32; - return PERF_SAMPLE_REGS_ABI_64; } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b107dbca4ed7..0df95dcb2101 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -24,7 +24,6 @@ #include <linux/tick.h> #include <linux/personality.h> #include <linux/syscalls.h> -#include <linux/compat.h> #include <linux/kprobes.h> #include <linux/random.h> #include <linux/init_task.h> @@ -166,12 +165,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { - if (is_compat_task()) { - p->thread.acrs[0] = (unsigned int)tls; - } else { - p->thread.acrs[0] = (unsigned int)(tls >> 32); - p->thread.acrs[1] = (unsigned int)tls; - } + p->thread.acrs[0] = (unsigned int)(tls >> 32); + p->thread.acrs[1] = (unsigned int)tls; } /* * s390 stores the svc return address in arch_data when calling diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 11f70c1e2797..e33a3eccda56 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -4,8 +4,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt #include <linux/stop_machine.h> #include <linux/cpufeature.h> diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 494216c4b4f3..ceaa1726e328 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -22,7 +22,6 @@ #include <linux/elf.h> #include <linux/regset.h> #include <linux/seccomp.h> -#include <linux/compat.h> #include <trace/syscall.h> #include <asm/guarded_storage.h> #include <asm/access-regs.h> @@ -38,10 +37,6 @@ #include "entry.h" -#ifdef CONFIG_COMPAT -#include "compat_ptrace.h" -#endif - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -507,308 +502,6 @@ long arch_ptrace(struct task_struct *child, long request, } } -#ifdef CONFIG_COMPAT -/* - * Now the fun part starts... a 31 bit program running in the - * 31 bit emulation tracing another program. PTRACE_PEEKTEXT, - * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy - * to handle, the difference to the 64 bit versions of the requests - * is that the access is done in multiples of 4 byte instead of - * 8 bytes (sizeof(unsigned long) on 31/64 bit). - * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA, - * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program - * is a 31 bit program too, the content of struct user can be - * emulated. A 31 bit program peeking into the struct user of - * a 64 bit program is a no-no. - */ - -/* - * Same as peek_user_per but for a 31 bit program. - */ -static inline __u32 __peek_user_per_compat(struct task_struct *child, - addr_t addr) -{ - if (addr == offsetof(struct compat_per_struct_kernel, cr9)) - /* Control bits of the active per set. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == offsetof(struct compat_per_struct_kernel, cr10)) - /* Start address of the active per set. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - 0 : child->thread.per_user.start; - else if (addr == offsetof(struct compat_per_struct_kernel, cr11)) - /* End address of the active per set. */ - return test_thread_flag(TIF_SINGLE_STEP) ? - PSW32_ADDR_INSN : child->thread.per_user.end; - else if (addr == offsetof(struct compat_per_struct_kernel, bits)) - /* Single-step bit. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - 0x80000000 : 0; - else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) - /* Start address of the user specified per set. */ - return (__u32) child->thread.per_user.start; - else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) - /* End address of the user specified per set. */ - return (__u32) child->thread.per_user.end; - else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid)) - /* PER code, ATMID and AI of the last PER trap */ - return (__u32) child->thread.per_event.cause << 16; - else if (addr == offsetof(struct compat_per_struct_kernel, address)) - /* Address of the last PER trap */ - return (__u32) child->thread.per_event.address; - else if (addr == offsetof(struct compat_per_struct_kernel, access_id)) - /* Access id of the last PER trap */ - return (__u32) child->thread.per_event.paid << 24; - return 0; -} - -/* - * Same as peek_user but for a 31 bit program. - */ -static u32 __peek_user_compat(struct task_struct *child, addr_t addr) -{ - addr_t offset; - __u32 tmp; - - if (addr < offsetof(struct compat_user, regs.acrs)) { - struct pt_regs *regs = task_pt_regs(child); - /* - * psw and gprs are stored on the stack - */ - if (addr == offsetof(struct compat_user, regs.psw.mask)) { - /* Fake a 31 bit psw mask. */ - tmp = (__u32)(regs->psw.mask >> 32); - tmp &= PSW32_MASK_USER | PSW32_MASK_RI; - tmp |= PSW32_USER_BITS; - } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { - /* Fake a 31 bit psw address. */ - tmp = (__u32) regs->psw.addr | - (__u32)(regs->psw.mask & PSW_MASK_BA); - } else { - /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); - } - } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * access registers are stored in the thread structure - */ - offset = addr - offsetof(struct compat_user, regs.acrs); - tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); - - } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * orig_gpr2 is stored on the kernel stack - */ - tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); - - } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { - /* - * prevent reads of padding hole between - * orig_gpr2 and fp_regs on s390. - */ - tmp = 0; - - } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { - /* - * floating point control reg. is in the thread structure - */ - tmp = child->thread.ufpu.fpc; - - } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { - /* - * floating point regs. are in the child->thread.ufpu.vxrs array - */ - offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); - tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset); - } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { - /* - * Handle access to the per_info structure. - */ - addr -= offsetof(struct compat_user, regs.per_info); - tmp = __peek_user_per_compat(child, addr); - - } else - tmp = 0; - - return tmp; -} - -static int peek_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - __u32 tmp; - - if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) - return -EIO; - - tmp = __peek_user_compat(child, addr); - return put_user(tmp, (__u32 __user *) data); -} - -/* - * Same as poke_user_per but for a 31 bit program. - */ -static inline void __poke_user_per_compat(struct task_struct *child, - addr_t addr, __u32 data) -{ - if (addr == offsetof(struct compat_per_struct_kernel, cr9)) - /* PER event mask of the user specified per set. */ - child->thread.per_user.control = - data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) - /* Starting address of the user specified per set. */ - child->thread.per_user.start = data; - else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) - /* Ending address of the user specified per set. */ - child->thread.per_user.end = data; -} - -/* - * Same as poke_user but for a 31 bit program. - */ -static int __poke_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - __u32 tmp = (__u32) data; - addr_t offset; - - if (addr < offsetof(struct compat_user, regs.acrs)) { - struct pt_regs *regs = task_pt_regs(child); - /* - * psw, gprs, acrs and orig_gpr2 are stored on the stack - */ - if (addr == offsetof(struct compat_user, regs.psw.mask)) { - __u32 mask = PSW32_MASK_USER; - - mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; - /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp ^ PSW32_USER_BITS) & ~mask) - /* Invalid psw mask. */ - return -EINVAL; - if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) - /* Invalid address-space-control bits */ - return -EINVAL; - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (regs->psw.mask & PSW_MASK_BA) | - (__u64)(tmp & mask) << 32; - } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { - /* Build a 64 bit psw address from 31 bit address. */ - regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; - /* Transfer 31 bit amode bit to psw mask. */ - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | - (__u64)(tmp & PSW32_ADDR_AMODE); - } else { - if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct compat_user, regs.gprs[2])) { - struct pt_regs *regs = task_pt_regs(child); - - regs->int_code = 0x20000 | (data & 0xffff); - } - /* gpr 0-15 */ - *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; - } - } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * access registers are stored in the thread structure - */ - offset = addr - offsetof(struct compat_user, regs.acrs); - *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; - - } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { - /* - * orig_gpr2 is stored on the kernel stack - */ - *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; - - } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { - /* - * prevent writess of padding hole between - * orig_gpr2 and fp_regs on s390. - */ - return 0; - - } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { - /* - * floating point control reg. is in the thread structure - */ - child->thread.ufpu.fpc = data; - - } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { - /* - * floating point regs. are in the child->thread.ufpu.vxrs array - */ - offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); - *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp; - } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { - /* - * Handle access to the per_info structure. - */ - addr -= offsetof(struct compat_user, regs.per_info); - __poke_user_per_compat(child, addr, data); - } - - return 0; -} - -static int poke_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - if (!is_compat_task() || (addr & 3) || - addr > sizeof(struct compat_user) - 3) - return -EIO; - - return __poke_user_compat(child, addr, data); -} - -long compat_arch_ptrace(struct task_struct *child, compat_long_t request, - compat_ulong_t caddr, compat_ulong_t cdata) -{ - unsigned long addr = caddr; - unsigned long data = cdata; - compat_ptrace_area parea; - int copied, ret; - - switch (request) { - case PTRACE_PEEKUSR: - /* read the word at location addr in the USER area. */ - return peek_user_compat(child, addr, data); - - case PTRACE_POKEUSR: - /* write the word at location addr in the USER area */ - return poke_user_compat(child, addr, data); - - case PTRACE_PEEKUSR_AREA: - case PTRACE_POKEUSR_AREA: - if (copy_from_user(&parea, (void __force __user *) addr, - sizeof(parea))) - return -EFAULT; - addr = parea.kernel_addr; - data = parea.process_addr; - copied = 0; - while (copied < parea.len) { - if (request == PTRACE_PEEKUSR_AREA) - ret = peek_user_compat(child, addr, data); - else { - __u32 utmp; - if (get_user(utmp, - (__u32 __force __user *) data)) - return -EFAULT; - ret = poke_user_compat(child, addr, utmp); - } - if (ret) - return ret; - addr += sizeof(unsigned int); - data += sizeof(unsigned int); - copied += sizeof(unsigned int); - } - return 0; - case PTRACE_GET_LAST_BREAK: - return put_user(child->thread.last_break, (unsigned int __user *)data); - } - return compat_ptrace_request(child, request, addr, data); -} -#endif - /* * user_regset definitions. */ @@ -1297,225 +990,8 @@ static const struct user_regset_view user_s390_view = { .n = ARRAY_SIZE(s390_regsets) }; -#ifdef CONFIG_COMPAT -static int s390_compat_regs_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - unsigned n; - - if (target == current) - save_access_regs(target->thread.acrs); - - for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t)) - membuf_store(&to, __peek_user_compat(target, n)); - return 0; -} - -static int s390_compat_regs_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int rc = 0; - - if (target == current) - save_access_regs(target->thread.acrs); - - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count > 0 && !rc) { - rc = __poke_user_compat(target, pos, *k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count > 0 && !rc) { - compat_ulong_t word; - rc = __get_user(word, u++); - if (rc) - break; - rc = __poke_user_compat(target, pos, word); - count -= sizeof(*u); - pos += sizeof(*u); - } - } - - if (rc == 0 && target == current) - restore_access_regs(target->thread.acrs); - - return rc; -} - -static int s390_compat_regs_high_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - compat_ulong_t *gprs_high; - int i; - - gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs; - for (i = 0; i < NUM_GPRS; i++, gprs_high += 2) - membuf_store(&to, *gprs_high); - return 0; -} - -static int s390_compat_regs_high_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - compat_ulong_t *gprs_high; - int rc = 0; - - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count > 0) { - *gprs_high = *k++; - *gprs_high += 2; - count -= sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count > 0 && !rc) { - unsigned long word; - rc = __get_user(word, u++); - if (rc) - break; - *gprs_high = word; - *gprs_high += 2; - count -= sizeof(*u); - } - } - - return rc; -} - -static int s390_compat_last_break_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - compat_ulong_t last_break = target->thread.last_break; - - return membuf_store(&to, (unsigned long)last_break); -} - -static int s390_compat_last_break_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return 0; -} - -static const struct user_regset s390_compat_regsets[] = { - { - USER_REGSET_NOTE_TYPE(PRSTATUS), - .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .regset_get = s390_compat_regs_get, - .set = s390_compat_regs_set, - }, - { - USER_REGSET_NOTE_TYPE(PRFPREG), - .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .regset_get = s390_fpregs_get, - .set = s390_fpregs_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL), - .n = 1, - .size = sizeof(compat_uint_t), - .align = sizeof(compat_uint_t), - .regset_get = s390_system_call_get, - .set = s390_system_call_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_LAST_BREAK), - .n = 1, - .size = sizeof(long), - .align = sizeof(long), - .regset_get = s390_compat_last_break_get, - .set = s390_compat_last_break_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_TDB), - .n = 1, - .size = 256, - .align = 1, - .regset_get = s390_tdb_get, - .set = s390_tdb_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_VXRS_LOW), - .n = __NUM_VXRS_LOW, - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_vxrs_low_get, - .set = s390_vxrs_low_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH), - .n = __NUM_VXRS_HIGH, - .size = sizeof(__vector128), - .align = sizeof(__vector128), - .regset_get = s390_vxrs_high_get, - .set = s390_vxrs_high_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_HIGH_GPRS), - .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .regset_get = s390_compat_regs_high_get, - .set = s390_compat_regs_high_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_GS_CB), - .n = sizeof(struct gs_cb) / sizeof(__u64), - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_gs_cb_get, - .set = s390_gs_cb_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_GS_BC), - .n = sizeof(struct gs_cb) / sizeof(__u64), - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_gs_bc_get, - .set = s390_gs_bc_set, - }, - { - USER_REGSET_NOTE_TYPE(S390_RI_CB), - .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), - .size = sizeof(__u64), - .align = sizeof(__u64), - .regset_get = s390_runtime_instr_get, - .set = s390_runtime_instr_set, - }, -}; - -static const struct user_regset_view user_s390_compat_view = { - .name = "s390", - .e_machine = EM_S390, - .regsets = s390_compat_regsets, - .n = ARRAY_SIZE(s390_compat_regsets) -}; -#endif - const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) - return &user_s390_compat_view; -#endif return &user_s390_view; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 892fce2b7549..c1fe0b53c5ac 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -13,8 +13,7 @@ * This file handles the architecture-dependent parts of initialization */ -#define KMSG_COMPONENT "setup" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "setup: " fmt #include <linux/errno.h> #include <linux/export.h> @@ -47,7 +46,6 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/memory.h> -#include <linux/compat.h> #include <linux/start_kernel.h> #include <linux/hugetlb.h> #include <linux/kmemleak.h> @@ -112,7 +110,7 @@ struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amod * Because the AMODE31 sections are relocated below 2G at startup, * the content of control registers CR2, CR5 and CR15 must be updated * with new addresses after the relocation. The initial initialization of - * control registers occurs in head64.S and then gets updated again after AMODE31 + * control registers occurs in head.S and then gets updated again after AMODE31 * relocation. We must access the relevant AMODE31 tables indirectly via * pointers placed in the .amode31.refs linker section. Those pointers get * updated automatically during AMODE31 relocation and always contain a valid diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index e48013cd832c..4874de5edea0 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -27,7 +27,6 @@ #include <linux/personality.h> #include <linux/binfmts.h> #include <linux/syscalls.h> -#include <linux/compat.h> #include <asm/ucontext.h> #include <linux/uaccess.h> #include <asm/vdso-symbols.h> @@ -290,12 +289,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, unsigned long restorer; size_t frame_size; - /* - * gprs_high are only present for a 31-bit task running on - * a 64-bit kernel (see compat_signal.c) but the space for - * gprs_high need to be allocated if vector registers are - * included in the signal frame on a 31-bit system. - */ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); if (cpu_has_vx()) frame_size += sizeof(frame->sregs_ext); @@ -333,7 +326,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ka->sa.sa_restorer; else - restorer = VDSO64_SYMBOL(current, sigreturn); + restorer = VDSO_SYMBOL(current, sigreturn); /* Set up registers for signal handler */ regs->gprs[14] = restorer; @@ -367,12 +360,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, size_t frame_size; frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); - /* - * gprs_high are only present for a 31-bit task running on - * a 64-bit kernel (see compat_signal.c) but the space for - * gprs_high need to be allocated if vector registers are - * included in the signal frame on a 31-bit system. - */ uc_flags = 0; if (cpu_has_vx()) { frame_size += sizeof(_sigregs_ext); @@ -391,7 +378,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ksig->ka.sa.sa_restorer; else - restorer = VDSO64_SYMBOL(current, rt_sigreturn); + restorer = VDSO_SYMBOL(current, rt_sigreturn); /* Create siginfo on the signal stack */ if (copy_siginfo_to_user(&frame->info, &ksig->info)) @@ -490,10 +477,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs) clear_pt_regs_flag(regs, PIF_SYSCALL); rseq_signal_deliver(&ksig, regs); - if (is_compat_task()) - handle_signal32(&ksig, oldset, regs); - else - handle_signal(&ksig, oldset, regs); + handle_signal(&ksig, oldset, regs); return; } @@ -506,10 +490,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs) /* Restart with sys_restart_syscall */ regs->gprs[2] = regs->orig_gpr2; current->restart_block.arch_data = regs->psw.addr; - if (is_compat_task()) - regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall); - else - regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall); + regs->psw.addr = VDSO_SYMBOL(current, restart_syscall); if (test_thread_flag(TIF_SINGLE_STEP)) clear_thread_flag(TIF_PER_TRAP); break; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index da84c0dc6b7e..b7429f30afc1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -15,8 +15,7 @@ * operates on physical cpu numbers needs to go into smp.c. */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt #include <linux/cpufeature.h> #include <linux/workqueue.h> @@ -281,6 +280,9 @@ static void pcpu_attach_task(int cpu, struct task_struct *tsk) lc->hardirq_timer = tsk->thread.hardirq_timer; lc->softirq_timer = tsk->thread.softirq_timer; lc->steal_timer = 0; +#ifdef CONFIG_STACKPROTECTOR + lc->stack_canary = tsk->stack_canary; +#endif } static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) @@ -305,9 +307,9 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, int cpu, - pcpu_delegate_fn *func, - void *data, unsigned long stack) +static void __noreturn pcpu_delegate(struct pcpu *pcpu, int cpu, + pcpu_delegate_fn *func, + void *data, unsigned long stack) { struct lowcore *lc, *abs_lc; unsigned int source_cpu; @@ -370,7 +372,7 @@ static int pcpu_set_smt(unsigned int mtid) /* * Call function on the ipl CPU. */ -void smp_call_ipl_cpu(void (*func)(void *), void *data) +void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data) { struct lowcore *lc = lowcore_ptr[0]; @@ -697,6 +699,7 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) continue; info->core[info->configured].core_id = address >> smp_cpu_mt_shift; + info->core[info->configured].type = boot_core_type; info->configured++; } info->combined = info->configured; diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c new file mode 100644 index 000000000000..d4e40483f008 --- /dev/null +++ b/arch/s390/kernel/stackprotector.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef pr_fmt +#define pr_fmt(fmt) "stackprot: " fmt +#endif + +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/printk.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> +#include <asm/machine.h> +#include <asm/asm-offsets.h> +#include <asm/arch-stackprotector.h> + +#ifdef __DECOMPRESSOR + +#define DEBUGP boot_debug +#define EMERGP boot_emerg +#define PANIC boot_panic + +#else /* __DECOMPRESSOR */ + +#define DEBUGP pr_debug +#define EMERGP pr_emerg +#define PANIC panic + +#endif /* __DECOMPRESSOR */ + +int __bootdata_preserved(stack_protector_debug); + +unsigned long __stack_chk_guard; +EXPORT_SYMBOL(__stack_chk_guard); + +struct insn_ril { + u8 opc1 : 8; + u8 r1 : 4; + u8 opc2 : 4; + u32 imm; +} __packed; + +/* + * Convert a virtual instruction address to a real instruction address. The + * decompressor needs to patch instructions within the kernel image based on + * their virtual addresses, while dynamic address translation is still + * disabled. Therefore a translation from virtual kernel image addresses to + * the corresponding physical addresses is required. + * + * After dynamic address translation is enabled and when the kernel needs to + * patch instructions such a translation is not required since the addresses + * are identical. + */ +static struct insn_ril *vaddress_to_insn(unsigned long vaddress) +{ +#ifdef __DECOMPRESSOR + return (struct insn_ril *)__kernel_pa(vaddress); +#else + return (struct insn_ril *)vaddress; +#endif +} + +static unsigned long insn_to_vaddress(struct insn_ril *insn) +{ +#ifdef __DECOMPRESSOR + return (unsigned long)__kernel_va(insn); +#else + return (unsigned long)insn; +#endif +} + +#define INSN_RIL_STRING_SIZE (sizeof(struct insn_ril) * 2 + 1) + +static void insn_ril_to_string(char *str, struct insn_ril *insn) +{ + u8 *ptr = (u8 *)insn; + int i; + + for (i = 0; i < sizeof(*insn); i++) + hex_byte_pack(&str[2 * i], ptr[i]); + str[2 * i] = 0; +} + +static void stack_protector_dump(struct insn_ril *old, struct insn_ril *new) +{ + char ostr[INSN_RIL_STRING_SIZE]; + char nstr[INSN_RIL_STRING_SIZE]; + + insn_ril_to_string(ostr, old); + insn_ril_to_string(nstr, new); + DEBUGP("%016lx: %s -> %s\n", insn_to_vaddress(old), ostr, nstr); +} + +static int stack_protector_verify(struct insn_ril *insn, unsigned long kernel_start) +{ + char istr[INSN_RIL_STRING_SIZE]; + unsigned long vaddress, offset; + + /* larl */ + if (insn->opc1 == 0xc0 && insn->opc2 == 0x0) + return 0; + /* lgrl */ + if (insn->opc1 == 0xc4 && insn->opc2 == 0x8) + return 0; + insn_ril_to_string(istr, insn); + vaddress = insn_to_vaddress(insn); + if (__is_defined(__DECOMPRESSOR)) { + offset = (unsigned long)insn - kernel_start + TEXT_OFFSET; + EMERGP("Unexpected instruction at %016lx/%016lx: %s\n", vaddress, offset, istr); + PANIC("Stackprotector error\n"); + } else { + EMERGP("Unexpected instruction at %016lx: %s\n", vaddress, istr); + } + return -EINVAL; +} + +int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start) +{ + unsigned long canary, *loc; + struct insn_ril *insn, new; + int rc; + + /* + * Convert LARL/LGRL instructions to LLILF so register R1 contains the + * address of the per-cpu / per-process stack canary: + * + * LARL/LGRL R1,__stack_chk_guard => LLILF R1,__lc_stack_canary + */ + canary = __LC_STACK_CANARY; + if (machine_has_relocated_lowcore()) + canary += LOWCORE_ALT_ADDRESS; + for (loc = start; loc < end; loc++) { + insn = vaddress_to_insn(*loc); + rc = stack_protector_verify(insn, kernel_start); + if (rc) + return rc; + new = *insn; + new.opc1 = 0xc0; + new.opc2 = 0xf; + new.imm = canary; + if (stack_protector_debug) + stack_protector_dump(insn, &new); + s390_kernel_write(insn, &new, sizeof(*insn)); + } + return 0; +} + +#ifdef __DECOMPRESSOR +void __stack_protector_apply_early(unsigned long kernel_start) +{ + unsigned long *start, *end; + + start = (unsigned long *)vmlinux.stack_prot_start; + end = (unsigned long *)vmlinux.stack_prot_end; + __stack_protector_apply(start, end, kernel_start); +} +#endif diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index b153a395f46d..3aae7f70e6ab 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -8,7 +8,6 @@ #include <linux/perf_event.h> #include <linux/stacktrace.h> #include <linux/uaccess.h> -#include <linux/compat.h> #include <asm/asm-offsets.h> #include <asm/stacktrace.h> #include <asm/unwind.h> @@ -107,8 +106,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo unsigned long ip, sp; bool first = true; - if (is_compat_task()) - return; if (!current->mm) return; ip = instruction_pointer(regs); diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index f4ccdbed4b89..5eae2e25997a 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -253,7 +253,7 @@ static void fill_diag_mac(struct sthyi_sctns *sctns, sctns->mac.infmval1 |= MAC_CNT_VLD; } -/* Returns a pointer to the the next partition block. */ +/* Returns a pointer to the next partition block. */ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, bool this_lpar, void *diag224_buf, diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 4fee74553ca2..795b6cca74c9 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -39,6 +39,16 @@ #include "entry.h" +#define __SYSCALL(nr, sym) long __s390x_##sym(struct pt_regs *); +#include <asm/syscall_table.h> +#undef __SYSCALL + +#define __SYSCALL(nr, sym) [nr] = (__s390x_##sym), +const sys_call_ptr_t sys_call_table[__NR_syscalls] = { +#include <asm/syscall_table.h> +}; +#undef __SYSCALL + #ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. @@ -122,7 +132,7 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) goto out; regs->gprs[2] = -ENOSYS; if (likely(nr < NR_syscalls)) - regs->gprs[2] = current->thread.sys_call_table[nr](regs); + regs->gprs[2] = sys_call_table[nr](regs); out: syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile index c5d958a09ff4..d5fca0ca0890 100644 --- a/arch/s390/kernel/syscalls/Makefile +++ b/arch/s390/kernel/syscalls/Makefile @@ -1,48 +1,32 @@ # SPDX-License-Identifier: GPL-2.0 +kapi := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm -gen := arch/$(ARCH)/include/generated -kapi := $(gen)/asm -uapi := $(gen)/uapi/asm - -syscall := $(src)/syscall.tbl -systbl := $(src)/syscalltbl - -gen-y := $(kapi)/syscall_table.h -kapi-hdrs-y := $(kapi)/unistd_nr.h -uapi-hdrs-y := $(uapi)/unistd_32.h -uapi-hdrs-y += $(uapi)/unistd_64.h - -targets += $(addprefix ../../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y)) - -PHONY += kapi uapi - -kapi: $(gen-y) $(kapi-hdrs-y) -uapi: $(uapi-hdrs-y) - - -# Create output directory if not already present $(shell mkdir -p $(uapi) $(kapi)) -quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$@" < $< > $@ - -quiet_cmd_sysnr = SYSNR $@ - cmd_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< > $@ +syscall := $(src)/syscall.tbl +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh -quiet_cmd_syscalls = SYSTBL $@ - cmd_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< > $@ +quiet_cmd_syshdr = SYSHDR $@ + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis common,$* $< $@ -syshdr_abi_unistd_32 := common,32 -$(uapi)/unistd_32.h: $(syscall) $(systbl) FORCE - $(call if_changed,syshdr) +quiet_cmd_systbl = SYSTBL $@ + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis common,$* $< $@ -syshdr_abi_unistd_64 := common,64 -$(uapi)/unistd_64.h: $(syscall) $(systbl) FORCE +$(uapi)/unistd_%.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) $(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE - $(call if_changed,syscalls) + $(call if_changed,systbl) + +uapisyshdr-y += unistd_64.h +kapisyshdr-y += syscall_table.h + +uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) +kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y)) +targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y)) -sysnr_abi_unistd_nr := common,32,64 -$(kapi)/unistd_nr.h: $(syscall) $(systbl) FORCE - $(call if_changed,sysnr) +PHONY += all +all: $(uapisyshdr-y) $(kapisyshdr-y) + @: diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8a6744d658db..417ed16b3c63 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -3,472 +3,397 @@ # System call table for s390 # # Format: +# <nr> <abi> <syscall> <entry> # -# <nr> <abi> <syscall> <entry-64bit> <compat-entry> -# -# where <abi> can be common, 64, or 32 +# <abi> is always common. -1 common exit sys_exit sys_exit -2 common fork sys_fork sys_fork -3 common read sys_read compat_sys_s390_read -4 common write sys_write compat_sys_s390_write -5 common open sys_open compat_sys_open -6 common close sys_close sys_close -7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat sys_creat -9 common link sys_link sys_link -10 common unlink sys_unlink sys_unlink -11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir sys_chdir -13 32 time - sys_time32 -14 common mknod sys_mknod sys_mknod -15 common chmod sys_chmod sys_chmod -16 32 lchown - sys_lchown16 -19 common lseek sys_lseek compat_sys_lseek -20 common getpid sys_getpid sys_getpid -21 common mount sys_mount sys_mount -22 common umount sys_oldumount sys_oldumount -23 32 setuid - sys_setuid16 -24 32 getuid - sys_getuid16 -25 32 stime - sys_stime32 -26 common ptrace sys_ptrace compat_sys_ptrace -27 common alarm sys_alarm sys_alarm -29 common pause sys_pause sys_pause -30 common utime sys_utime sys_utime32 -33 common access sys_access sys_access -34 common nice sys_nice sys_nice -36 common sync sys_sync sys_sync -37 common kill sys_kill sys_kill -38 common rename sys_rename sys_rename -39 common mkdir sys_mkdir sys_mkdir -40 common rmdir sys_rmdir sys_rmdir -41 common dup sys_dup sys_dup -42 common pipe sys_pipe sys_pipe -43 common times sys_times compat_sys_times -45 common brk sys_brk sys_brk -46 32 setgid - sys_setgid16 -47 32 getgid - sys_getgid16 -48 common signal sys_signal sys_signal -49 32 geteuid - sys_geteuid16 -50 32 getegid - sys_getegid16 -51 common acct sys_acct sys_acct -52 common umount2 sys_umount sys_umount -54 common ioctl sys_ioctl compat_sys_ioctl -55 common fcntl sys_fcntl compat_sys_fcntl -57 common setpgid sys_setpgid sys_setpgid -60 common umask sys_umask sys_umask -61 common chroot sys_chroot sys_chroot -62 common ustat sys_ustat compat_sys_ustat -63 common dup2 sys_dup2 sys_dup2 -64 common getppid sys_getppid sys_getppid -65 common getpgrp sys_getpgrp sys_getpgrp -66 common setsid sys_setsid sys_setsid -67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - sys_setreuid16 -71 32 setregid - sys_setregid16 -72 common sigsuspend sys_sigsuspend sys_sigsuspend -73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname sys_sethostname -75 common setrlimit sys_setrlimit compat_sys_setrlimit -76 32 getrlimit - compat_sys_old_getrlimit -77 common getrusage sys_getrusage compat_sys_getrusage -78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday -79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - sys_getgroups16 -81 32 setgroups - sys_setgroups16 -83 common symlink sys_symlink sys_symlink -85 common readlink sys_readlink sys_readlink -86 common uselib sys_uselib sys_uselib -87 common swapon sys_swapon sys_swapon -88 common reboot sys_reboot sys_reboot -89 common readdir - compat_sys_old_readdir -90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap sys_munmap -92 common truncate sys_truncate compat_sys_truncate -93 common ftruncate sys_ftruncate compat_sys_ftruncate -94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - sys_fchown16 -96 common getpriority sys_getpriority sys_getpriority -97 common setpriority sys_setpriority sys_setpriority -99 common statfs sys_statfs compat_sys_statfs -100 common fstatfs sys_fstatfs compat_sys_fstatfs -101 32 ioperm - - -102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog sys_syslog -104 common setitimer sys_setitimer compat_sys_setitimer -105 common getitimer sys_getitimer compat_sys_getitimer -106 common stat sys_newstat compat_sys_newstat -107 common lstat sys_newlstat compat_sys_newlstat -108 common fstat sys_newfstat compat_sys_newfstat -110 common lookup_dcookie - - -111 common vhangup sys_vhangup sys_vhangup -112 common idle - - -114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff sys_swapoff -116 common sysinfo sys_sysinfo compat_sys_sysinfo -117 common ipc sys_s390_ipc compat_sys_s390_ipc -118 common fsync sys_fsync sys_fsync -119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone sys_clone -121 common setdomainname sys_setdomainname sys_setdomainname -122 common uname sys_newuname sys_newuname -124 common adjtimex sys_adjtimex sys_adjtimex_time32 -125 common mprotect sys_mprotect sys_mprotect -126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask -127 common create_module - - -128 common init_module sys_init_module sys_init_module -129 common delete_module sys_delete_module sys_delete_module -130 common get_kernel_syms - - -131 common quotactl sys_quotactl sys_quotactl -132 common getpgid sys_getpgid sys_getpgid -133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_ni_syscall sys_ni_syscall -135 common sysfs sys_sysfs sys_sysfs -136 common personality sys_s390_personality sys_s390_personality -137 common afs_syscall - - -138 32 setfsuid - sys_setfsuid16 -139 32 setfsgid - sys_setfsgid16 -140 32 _llseek - sys_llseek -141 common getdents sys_getdents compat_sys_getdents -142 32 _newselect - compat_sys_select -142 64 select sys_select - -143 common flock sys_flock sys_flock -144 common msync sys_msync sys_msync -145 common readv sys_readv sys_readv -146 common writev sys_writev sys_writev -147 common getsid sys_getsid sys_getsid -148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl - - -150 common mlock sys_mlock sys_mlock -151 common munlock sys_munlock sys_munlock -152 common mlockall sys_mlockall sys_mlockall -153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam sys_sched_setparam -155 common sched_getparam sys_sched_getparam sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler -157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler -158 common sched_yield sys_sched_yield sys_sched_yield -159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max -160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 -162 common nanosleep sys_nanosleep sys_nanosleep_time32 -163 common mremap sys_mremap sys_mremap -164 32 setresuid - sys_setresuid16 -165 32 getresuid - sys_getresuid16 -167 common query_module - - -168 common poll sys_poll sys_poll -169 common nfsservctl - - -170 32 setresgid - sys_setresgid16 -171 32 getresgid - sys_getresgid16 -172 common prctl sys_prctl sys_prctl -173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn -174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction -175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask -176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 -178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo -179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend -180 common pread64 sys_pread64 compat_sys_s390_pread64 -181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - sys_chown16 -183 common getcwd sys_getcwd sys_getcwd -184 common capget sys_capget sys_capget -185 common capset sys_capset sys_capset -186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack -187 common sendfile sys_sendfile64 compat_sys_sendfile -188 common getpmsg - - -189 common putpmsg - - -190 common vfork sys_vfork sys_vfork -191 32 ugetrlimit - compat_sys_getrlimit -191 64 getrlimit sys_getrlimit - -192 32 mmap2 - compat_sys_s390_mmap2 -193 32 truncate64 - compat_sys_s390_truncate64 -194 32 ftruncate64 - compat_sys_s390_ftruncate64 -195 32 stat64 - compat_sys_s390_stat64 -196 32 lstat64 - compat_sys_s390_lstat64 -197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - sys_lchown -198 64 lchown sys_lchown - -199 32 getuid32 - sys_getuid -199 64 getuid sys_getuid - -200 32 getgid32 - sys_getgid -200 64 getgid sys_getgid - -201 32 geteuid32 - sys_geteuid -201 64 geteuid sys_geteuid - -202 32 getegid32 - sys_getegid -202 64 getegid sys_getegid - -203 32 setreuid32 - sys_setreuid -203 64 setreuid sys_setreuid - -204 32 setregid32 - sys_setregid -204 64 setregid sys_setregid - -205 32 getgroups32 - sys_getgroups -205 64 getgroups sys_getgroups - -206 32 setgroups32 - sys_setgroups -206 64 setgroups sys_setgroups - -207 32 fchown32 - sys_fchown -207 64 fchown sys_fchown - -208 32 setresuid32 - sys_setresuid -208 64 setresuid sys_setresuid - -209 32 getresuid32 - sys_getresuid -209 64 getresuid sys_getresuid - -210 32 setresgid32 - sys_setresgid -210 64 setresgid sys_setresgid - -211 32 getresgid32 - sys_getresgid -211 64 getresgid sys_getresgid - -212 32 chown32 - sys_chown -212 64 chown sys_chown - -213 32 setuid32 - sys_setuid -213 64 setuid sys_setuid - -214 32 setgid32 - sys_setgid -214 64 setgid sys_setgid - -215 32 setfsuid32 - sys_setfsuid -215 64 setfsuid sys_setfsuid - -216 32 setfsgid32 - sys_setfsgid -216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root sys_pivot_root -218 common mincore sys_mincore sys_mincore -219 common madvise sys_madvise sys_madvise -220 common getdents64 sys_getdents64 sys_getdents64 -221 32 fcntl64 - compat_sys_fcntl64 -222 common readahead sys_readahead compat_sys_s390_readahead -223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr sys_setxattr -225 common lsetxattr sys_lsetxattr sys_lsetxattr -226 common fsetxattr sys_fsetxattr sys_fsetxattr -227 common getxattr sys_getxattr sys_getxattr -228 common lgetxattr sys_lgetxattr sys_lgetxattr -229 common fgetxattr sys_fgetxattr sys_fgetxattr -230 common listxattr sys_listxattr sys_listxattr -231 common llistxattr sys_llistxattr sys_llistxattr -232 common flistxattr sys_flistxattr sys_flistxattr -233 common removexattr sys_removexattr sys_removexattr -234 common lremovexattr sys_lremovexattr sys_lremovexattr -235 common fremovexattr sys_fremovexattr sys_fremovexattr -236 common gettid sys_gettid sys_gettid -237 common tkill sys_tkill sys_tkill -238 common futex sys_futex sys_futex_time32 -239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity -241 common tgkill sys_tgkill sys_tgkill -243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy sys_io_destroy -245 common io_getevents sys_io_getevents sys_io_getevents_time32 -246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel sys_io_cancel -248 common exit_group sys_exit_group sys_exit_group -249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl -251 common epoll_wait sys_epoll_wait sys_epoll_wait -252 common set_tid_address sys_set_tid_address sys_set_tid_address -253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 -254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime sys_timer_settime32 -256 common timer_gettime sys_timer_gettime sys_timer_gettime32 -257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun -258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime sys_clock_settime32 -260 common clock_gettime sys_clock_gettime sys_clock_gettime32 -261 common clock_getres sys_clock_getres sys_clock_getres_time32 -262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 -264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 -265 common statfs64 sys_statfs64 compat_sys_statfs64 -266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind sys_mbind -269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy -271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 -274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 -275 common mq_notify sys_mq_notify compat_sys_mq_notify -276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr -277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key sys_add_key -279 common request_key sys_request_key sys_request_key -280 common keyctl sys_keyctl compat_sys_keyctl -281 common waitid sys_waitid compat_sys_waitid -282 common ioprio_set sys_ioprio_set sys_ioprio_set -283 common ioprio_get sys_ioprio_get sys_ioprio_get -284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch -286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages sys_migrate_pages -288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat sys_mkdirat -290 common mknodat sys_mknodat sys_mknodat -291 common fchownat sys_fchownat sys_fchownat -292 common futimesat sys_futimesat sys_futimesat_time32 -293 32 fstatat64 - compat_sys_s390_fstatat64 -293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat sys_unlinkat -295 common renameat sys_renameat sys_renameat -296 common linkat sys_linkat sys_linkat -297 common symlinkat sys_symlinkat sys_symlinkat -298 common readlinkat sys_readlinkat sys_readlinkat -299 common fchmodat sys_fchmodat sys_fchmodat -300 common faccessat sys_faccessat sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 -302 common ppoll sys_ppoll compat_sys_ppoll_time32 -303 common unshare sys_unshare sys_unshare -304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice sys_splice -307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee sys_tee -309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages sys_move_pages -311 common getcpu sys_getcpu sys_getcpu -312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes sys_utimes_time32 -314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat sys_utimensat_time32 -316 common signalfd sys_signalfd compat_sys_signalfd -317 common timerfd - - -318 common eventfd sys_eventfd sys_eventfd -319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 -321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 -322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 -323 common eventfd2 sys_eventfd2 sys_eventfd2 -324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 sys_pipe2 -326 common dup3 sys_dup3 sys_dup3 -327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 -328 common preadv sys_preadv compat_sys_preadv -329 common pwritev sys_pwritev compat_sys_pwritev -330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open sys_perf_event_open -332 common fanotify_init sys_fanotify_init sys_fanotify_init -333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at -336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 -338 common syncfs sys_syncfs sys_syncfs -339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev -342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp sys_kcmp -344 common finit_module sys_finit_module sys_finit_module -345 common sched_setattr sys_sched_setattr sys_sched_setattr -346 common sched_getattr sys_sched_getattr sys_sched_getattr -347 common renameat2 sys_renameat2 sys_renameat2 -348 common seccomp sys_seccomp sys_seccomp -349 common getrandom sys_getrandom sys_getrandom -350 common memfd_create sys_memfd_create sys_memfd_create -351 common bpf sys_bpf sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read -354 common execveat sys_execveat compat_sys_execveat -355 common userfaultfd sys_userfaultfd sys_userfaultfd -356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 -358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg -359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair sys_socketpair -361 common bind sys_bind sys_bind -362 common connect sys_connect sys_connect -363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 sys_accept4 -365 common getsockopt sys_getsockopt sys_getsockopt -366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname sys_getsockname -368 common getpeername sys_getpeername sys_getpeername -369 common sendto sys_sendto sys_sendto -370 common sendmsg sys_sendmsg compat_sys_sendmsg -371 common recvfrom sys_recvfrom compat_sys_recvfrom -372 common recvmsg sys_recvmsg compat_sys_recvmsg -373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 sys_mlock2 -375 common copy_file_range sys_copy_file_range sys_copy_file_range -376 common preadv2 sys_preadv2 compat_sys_preadv2 -377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage -379 common statx sys_statx sys_statx -380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi -381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load -382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents -383 common rseq sys_rseq sys_rseq -384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect -385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc -386 common pkey_free sys_pkey_free sys_pkey_free +1 common exit sys_exit +2 common fork sys_fork +3 common read sys_read +4 common write sys_write +5 common open sys_open +6 common close sys_close +7 common restart_syscall sys_restart_syscall +8 common creat sys_creat +9 common link sys_link +10 common unlink sys_unlink +11 common execve sys_execve +12 common chdir sys_chdir +14 common mknod sys_mknod +15 common chmod sys_chmod +19 common lseek sys_lseek +20 common getpid sys_getpid +21 common mount sys_mount +22 common umount sys_oldumount +26 common ptrace sys_ptrace +27 common alarm sys_alarm +29 common pause sys_pause +30 common utime sys_utime +33 common access sys_access +34 common nice sys_nice +36 common sync sys_sync +37 common kill sys_kill +38 common rename sys_rename +39 common mkdir sys_mkdir +40 common rmdir sys_rmdir +41 common dup sys_dup +42 common pipe sys_pipe +43 common times sys_times +45 common brk sys_brk +48 common signal sys_signal +51 common acct sys_acct +52 common umount2 sys_umount +54 common ioctl sys_ioctl +55 common fcntl sys_fcntl +57 common setpgid sys_setpgid +60 common umask sys_umask +61 common chroot sys_chroot +62 common ustat sys_ustat +63 common dup2 sys_dup2 +64 common getppid sys_getppid +65 common getpgrp sys_getpgrp +66 common setsid sys_setsid +67 common sigaction sys_sigaction +72 common sigsuspend sys_sigsuspend +73 common sigpending sys_sigpending +74 common sethostname sys_sethostname +75 common setrlimit sys_setrlimit +77 common getrusage sys_getrusage +78 common gettimeofday sys_gettimeofday +79 common settimeofday sys_settimeofday +83 common symlink sys_symlink +85 common readlink sys_readlink +86 common uselib sys_uselib +87 common swapon sys_swapon +88 common reboot sys_reboot +89 common readdir sys_ni_syscall +90 common mmap sys_old_mmap +91 common munmap sys_munmap +92 common truncate sys_truncate +93 common ftruncate sys_ftruncate +94 common fchmod sys_fchmod +96 common getpriority sys_getpriority +97 common setpriority sys_setpriority +99 common statfs sys_statfs +100 common fstatfs sys_fstatfs +102 common socketcall sys_socketcall +103 common syslog sys_syslog +104 common setitimer sys_setitimer +105 common getitimer sys_getitimer +106 common stat sys_newstat +107 common lstat sys_newlstat +108 common fstat sys_newfstat +110 common lookup_dcookie sys_ni_syscall +111 common vhangup sys_vhangup +112 common idle sys_ni_syscall +114 common wait4 sys_wait4 +115 common swapoff sys_swapoff +116 common sysinfo sys_sysinfo +117 common ipc sys_s390_ipc +118 common fsync sys_fsync +119 common sigreturn sys_sigreturn +120 common clone sys_clone +121 common setdomainname sys_setdomainname +122 common uname sys_newuname +124 common adjtimex sys_adjtimex +125 common mprotect sys_mprotect +126 common sigprocmask sys_sigprocmask +127 common create_module sys_ni_syscall +128 common init_module sys_init_module +129 common delete_module sys_delete_module +130 common get_kernel_syms sys_ni_syscall +131 common quotactl sys_quotactl +132 common getpgid sys_getpgid +133 common fchdir sys_fchdir +134 common bdflush sys_ni_syscall +135 common sysfs sys_sysfs +136 common personality sys_s390_personality +137 common afs_syscall sys_ni_syscall +141 common getdents sys_getdents +142 common select sys_select +143 common flock sys_flock +144 common msync sys_msync +145 common readv sys_readv +146 common writev sys_writev +147 common getsid sys_getsid +148 common fdatasync sys_fdatasync +149 common _sysctl sys_ni_syscall +150 common mlock sys_mlock +151 common munlock sys_munlock +152 common mlockall sys_mlockall +153 common munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep +163 common mremap sys_mremap +167 common query_module sys_ni_syscall +168 common poll sys_poll +169 common nfsservctl sys_ni_syscall +172 common prctl sys_prctl +173 common rt_sigreturn sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend +180 common pread64 sys_pread64 +181 common pwrite64 sys_pwrite64 +183 common getcwd sys_getcwd +184 common capget sys_capget +185 common capset sys_capset +186 common sigaltstack sys_sigaltstack +187 common sendfile sys_sendfile64 +188 common getpmsg sys_ni_syscall +189 common putpmsg sys_ni_syscall +190 common vfork sys_vfork +191 common getrlimit sys_getrlimit +198 common lchown sys_lchown +199 common getuid sys_getuid +200 common getgid sys_getgid +201 common geteuid sys_geteuid +202 common getegid sys_getegid +203 common setreuid sys_setreuid +204 common setregid sys_setregid +205 common getgroups sys_getgroups +206 common setgroups sys_setgroups +207 common fchown sys_fchown +208 common setresuid sys_setresuid +209 common getresuid sys_getresuid +210 common setresgid sys_setresgid +211 common getresgid sys_getresgid +212 common chown sys_chown +213 common setuid sys_setuid +214 common setgid sys_setgid +215 common setfsuid sys_setfsuid +216 common setfsgid sys_setfsgid +217 common pivot_root sys_pivot_root +218 common mincore sys_mincore +219 common madvise sys_madvise +220 common getdents64 sys_getdents64 +222 common readahead sys_readahead +224 common setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr +231 common llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr +233 common removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr +236 common gettid sys_gettid +237 common tkill sys_tkill +238 common futex sys_futex +239 common sched_setaffinity sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity +241 common tgkill sys_tgkill +243 common io_setup sys_io_setup +244 common io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents +246 common io_submit sys_io_submit +247 common io_cancel sys_io_cancel +248 common exit_group sys_exit_group +249 common epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 +254 common timer_create sys_timer_create +255 common timer_settime sys_timer_settime +256 common timer_gettime sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime +260 common clock_gettime sys_clock_gettime +261 common clock_getres sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep +265 common statfs64 sys_statfs64 +266 common fstatfs64 sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages +268 common mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy +271 common mq_open sys_mq_open +272 common mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive +275 common mq_notify sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr +277 common kexec_load sys_kexec_load +278 common add_key sys_add_key +279 common request_key sys_request_key +280 common keyctl sys_keyctl +281 common waitid sys_waitid +282 common ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages +288 common openat sys_openat +289 common mkdirat sys_mkdirat +290 common mknodat sys_mknodat +291 common fchownat sys_fchownat +292 common futimesat sys_futimesat +293 common newfstatat sys_newfstatat +294 common unlinkat sys_unlinkat +295 common renameat sys_renameat +296 common linkat sys_linkat +297 common symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat +300 common faccessat sys_faccessat +301 common pselect6 sys_pselect6 +302 common ppoll sys_ppoll +303 common unshare sys_unshare +304 common set_robust_list sys_set_robust_list +305 common get_robust_list sys_get_robust_list +306 common splice sys_splice +307 common sync_file_range sys_sync_file_range +308 common tee sys_tee +309 common vmsplice sys_vmsplice +310 common move_pages sys_move_pages +311 common getcpu sys_getcpu +312 common epoll_pwait sys_epoll_pwait +313 common utimes sys_utimes +314 common fallocate sys_fallocate +315 common utimensat sys_utimensat +316 common signalfd sys_signalfd +317 common timerfd sys_ni_syscall +318 common eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 +323 common eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 +326 common dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv +329 common pwritev sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open +332 common fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark +334 common prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime +338 common syncfs sys_syncfs +339 common setns sys_setns +340 common process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp +344 common finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 +348 common seccomp sys_seccomp +349 common getrandom sys_getrandom +350 common memfd_create sys_memfd_create +351 common bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read +354 common execveat sys_execveat +355 common userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg +358 common sendmmsg sys_sendmmsg +359 common socket sys_socket +360 common socketpair sys_socketpair +361 common bind sys_bind +362 common connect sys_connect +363 common listen sys_listen +364 common accept4 sys_accept4 +365 common getsockopt sys_getsockopt +366 common setsockopt sys_setsockopt +367 common getsockname sys_getsockname +368 common getpeername sys_getpeername +369 common sendto sys_sendto +370 common sendmsg sys_sendmsg +371 common recvfrom sys_recvfrom +372 common recvmsg sys_recvmsg +373 common shutdown sys_shutdown +374 common mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range +376 common preadv2 sys_preadv2 +377 common pwritev2 sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx +380 common s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents +383 common rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free # room for arch specific syscalls -392 64 semtimedop sys_semtimedop - -393 common semget sys_semget sys_semget -394 common semctl sys_semctl compat_sys_semctl -395 common shmget sys_shmget sys_shmget -396 common shmctl sys_shmctl compat_sys_shmctl -397 common shmat sys_shmat compat_sys_shmat -398 common shmdt sys_shmdt sys_shmdt -399 common msgget sys_msgget sys_msgget -400 common msgsnd sys_msgsnd compat_sys_msgsnd -401 common msgrcv sys_msgrcv compat_sys_msgrcv -402 common msgctl sys_msgctl compat_sys_msgctl -403 32 clock_gettime64 - sys_clock_gettime -404 32 clock_settime64 - sys_clock_settime -405 32 clock_adjtime64 - sys_clock_adjtime -406 32 clock_getres_time64 - sys_clock_getres -407 32 clock_nanosleep_time64 - sys_clock_nanosleep -408 32 timer_gettime64 - sys_timer_gettime -409 32 timer_settime64 - sys_timer_settime -410 32 timerfd_gettime64 - sys_timerfd_gettime -411 32 timerfd_settime64 - sys_timerfd_settime -412 32 utimensat_time64 - sys_utimensat -413 32 pselect6_time64 - compat_sys_pselect6_time64 -414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 -417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 -418 32 mq_timedsend_time64 - sys_mq_timedsend -419 32 mq_timedreceive_time64 - sys_mq_timedreceive -420 32 semtimedop_time64 - sys_semtimedop -421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 -422 32 futex_time64 - sys_futex -423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval -424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal -425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup -426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter -427 common io_uring_register sys_io_uring_register sys_io_uring_register -428 common open_tree sys_open_tree sys_open_tree -429 common move_mount sys_move_mount sys_move_mount -430 common fsopen sys_fsopen sys_fsopen -431 common fsconfig sys_fsconfig sys_fsconfig -432 common fsmount sys_fsmount sys_fsmount -433 common fspick sys_fspick sys_fspick -434 common pidfd_open sys_pidfd_open sys_pidfd_open -435 common clone3 sys_clone3 sys_clone3 -436 common close_range sys_close_range sys_close_range -437 common openat2 sys_openat2 sys_openat2 -438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd -439 common faccessat2 sys_faccessat2 sys_faccessat2 -440 common process_madvise sys_process_madvise sys_process_madvise -441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd -444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset -445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule -446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self -447 common memfd_secret sys_memfd_secret sys_memfd_secret -448 common process_mrelease sys_process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node -451 common cachestat sys_cachestat sys_cachestat -452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 -453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack -454 common futex_wake sys_futex_wake sys_futex_wake -455 common futex_wait sys_futex_wait sys_futex_wait -456 common futex_requeue sys_futex_requeue sys_futex_requeue -457 common statmount sys_statmount sys_statmount -458 common listmount sys_listmount sys_listmount -459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr -460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr -461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules -462 common mseal sys_mseal sys_mseal -463 common setxattrat sys_setxattrat sys_setxattrat -464 common getxattrat sys_getxattrat sys_getxattrat -465 common listxattrat sys_listxattrat sys_listxattrat -466 common removexattrat sys_removexattrat sys_removexattrat -467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr -468 common file_getattr sys_file_getattr sys_file_getattr -469 common file_setattr sys_file_setattr sys_file_setattr +392 common semtimedop sys_semtimedop +393 common semget sys_semget +394 common semctl sys_semctl +395 common shmget sys_shmget +396 common shmctl sys_shmctl +397 common shmat sys_shmat +398 common shmdt sys_shmdt +399 common msgget sys_msgget +400 common msgsnd sys_msgsnd +401 common msgrcv sys_msgrcv +402 common msgctl sys_msgctl +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl deleted file mode 100755 index fbac1732f874..000000000000 --- a/arch/s390/kernel/syscalls/syscalltbl +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# Generate system call table and header files -# -# Copyright IBM Corp. 2018 -# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - -# -# File path to the system call table definition. -# You can set the path with the -i option. If omitted, -# system call table definitions are read from standard input. -# -SYSCALL_TBL="" - - -create_syscall_table_entries() -{ - local nr abi name entry64 entry32 _ignore - local temp=$(mktemp ${TMPDIR:-/tmp}/syscalltbl-common.XXXXXXXXX) - - ( - # - # Initialize with 0 to create an NI_SYSCALL for 0 - # - local prev_nr=0 prev_32=sys_ni_syscall prev_64=sys_ni_syscall - while read nr abi name entry64 entry32 _ignore; do - test x$entry32 = x- && entry32=sys_ni_syscall - test x$entry64 = x- && entry64=sys_ni_syscall - - if test $prev_nr -eq $nr; then - # - # Same syscall but different ABI, just update - # the respective entry point - # - case $abi in - 32) - prev_32=$entry32 - ;; - 64) - prev_64=$entry64 - ;; - esac - continue; - else - printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32 - fi - - prev_nr=$nr - prev_64=$entry64 - prev_32=$entry32 - done - printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32 - ) >> $temp - - # - # Check for duplicate syscall numbers - # - if ! cat $temp |cut -f1 |uniq -d 2>&1; then - echo "Error: generated system call table contains duplicate entries: $temp" >&2 - exit 1 - fi - - # - # Generate syscall table - # - prev_nr=0 - while read nr entry64 entry32; do - while test $prev_nr -lt $((nr - 1)); do - printf "NI_SYSCALL\n" - prev_nr=$((prev_nr + 1)) - done - if test x$entry64 = xsys_ni_syscall && - test x$entry32 = xsys_ni_syscall; then - printf "NI_SYSCALL\n" - else - printf "SYSCALL(%s,%s)\n" $entry64 $entry32 - fi - prev_nr=$nr - done < $temp - rm $temp -} - -generate_syscall_table() -{ - cat <<-EoHEADER - /* SPDX-License-Identifier: GPL-2.0 */ - /* - * Definitions for sys_call_table, each line represents an - * entry in the table in the form - * SYSCALL(64 bit syscall, 31 bit emulated syscall) - * - * This file is meant to be included from entry.S. - */ - - #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall) - -EoHEADER - grep -Ev '^(#|[[:blank:]]*$)' $SYSCALL_TBL \ - |sort -k1 -n \ - |create_syscall_table_entries -} - -create_header_defines() -{ - local nr abi name _ignore - - while read nr abi name _ignore; do - printf "#define __NR_%s %d\n" $name $nr - done -} - -normalize_fileguard() -{ - local fileguard="$1" - - echo "$1" |tr '[[:lower:]]' '[[:upper:]]' \ - |sed -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g' -} - -generate_syscall_header() -{ - local abis=$(echo "($1)" | tr ',' '|') - local filename="$2" - local fileguard suffix - - if test "$filename"; then - fileguard=$(normalize_fileguard "__UAPI_ASM_S390_$2") - else - case "$abis" in - *64*) suffix=64 ;; - *32*) suffix=32 ;; - esac - fileguard=$(normalize_fileguard "__UAPI_ASM_S390_SYSCALLS_$suffix") - fi - - cat <<-EoHEADER - /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - #ifndef ${fileguard} - #define ${fileguard} - -EoHEADER - - grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \ - |sort -k1 -n \ - |create_header_defines - - cat <<-EoFOOTER - - #endif /* ${fileguard} */ -EoFOOTER -} - -__max_syscall_nr() -{ - local abis=$(echo "($1)" | tr ',' '|') - - grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \ - |sed -ne 's/^\([[:digit:]]*\)[[:space:]].*/\1/p' \ - |sort -n \ - |tail -1 -} - - -generate_syscall_nr() -{ - local abis="$1" - local max_syscall_nr num_syscalls - - max_syscall_nr=$(__max_syscall_nr "$abis") - num_syscalls=$((max_syscall_nr + 1)) - - cat <<-EoHEADER - /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - #ifndef __ASM_S390_SYSCALLS_NR - #define __ASM_S390_SYSCALLS_NR - - #define NR_syscalls ${num_syscalls} - - #endif /* __ASM_S390_SYSCALLS_NR */ -EoHEADER -} - - -# -# Parse command line arguments -# -do_syscall_header="" -do_syscall_table="" -do_syscall_nr="" -output_file="" -abi_list="common,64" -filename="" -while getopts ":HNSXi:a:f:" arg; do - case $arg in - a) - abi_list="$OPTARG" - ;; - i) - SYSCALL_TBL="$OPTARG" - ;; - f) - filename=${OPTARG##*/} - ;; - H) - do_syscall_header=1 - ;; - N) - do_syscall_nr=1 - ;; - S) - do_syscall_table=1 - ;; - X) - set -x - ;; - :) - echo "Missing argument for -$OPTARG" >&2 - exit 1 - ;; - \?) - echo "Invalid option specified" >&2 - exit 1 - ;; - esac -done - -test "$do_syscall_header" && generate_syscall_header "$abi_list" "$filename" -test "$do_syscall_table" && generate_syscall_table -test "$do_syscall_nr" && generate_syscall_nr "$abi_list" - -exit 0 diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 1ea84e942bd4..33ca3e47a0e6 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -526,7 +526,7 @@ static __init int stsi_init_debugfs(void) if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; - sprintf(link_to, "15_1_%d", topology_mnest_limit()); + snprintf(link_to, sizeof(link_to), "15_1_%d", topology_mnest_limit()); debugfs_create_symlink("topology", stsi_root, link_to); } return 0; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 63517b85f4c9..bd0df61d1907 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -12,8 +12,7 @@ * Copyright (C) 1991, 1992, 1995 Linus Torvalds */ -#define KMSG_COMPONENT "time" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "time: " fmt #include <linux/kernel_stat.h> #include <linux/errno.h> diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 1594c80e9bc4..1913a5566ac2 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -3,8 +3,7 @@ * Copyright IBM Corp. 2007, 2011 */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt #include <linux/cpufeature.h> #include <linux/workqueue.h> diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 5b0633ea8d93..c624f3361e43 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -8,7 +8,6 @@ #include <linux/uaccess.h> #include <linux/uprobes.h> -#include <linux/compat.h> #include <linux/kdebug.h> #include <linux/sched/task_stack.h> @@ -29,7 +28,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) return -EINVAL; - if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) + if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; clear_thread_flag(TIF_PER_TRAP); auprobe->saved_per = psw_bits(regs->psw).per; @@ -161,11 +160,6 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, /* Instruction Emulation */ -static void adjust_psw_addr(psw_t *psw, unsigned long len) -{ - psw->addr = __rewind_psw(*psw, -len); -} - #define EMU_ILLEGAL_OP 1 #define EMU_SPECIFICATION 2 #define EMU_ADDRESSING 3 @@ -353,7 +347,7 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) } break; } - adjust_psw_addr(®s->psw, ilen); + regs->psw.addr = __forward_psw(regs->psw, ilen); switch (rc) { case EMU_ILLEGAL_OP: regs->int_code = ilen << 16 | 0x0001; @@ -373,8 +367,7 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) || - ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) && - !is_compat_task())) { + (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)) { regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); return true; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 5d17609bcfe1..ed46950be86f 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -4,8 +4,7 @@ * * Copyright IBM Corp. 2019, 2024 */ -#define KMSG_COMPONENT "prot_virt" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "prot_virt: " fmt #include <linux/export.h> #include <linux/kernel.h> diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 430feb1a5013..a27a90a199be 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -7,7 +7,6 @@ */ #include <linux/binfmts.h> -#include <linux/compat.h> #include <linux/elf.h> #include <linux/errno.h> #include <linux/init.h> @@ -23,8 +22,7 @@ #include <asm/alternative.h> #include <asm/vdso.h> -extern char vdso64_start[], vdso64_end[]; -extern char vdso32_start[], vdso32_end[]; +extern char vdso_start[], vdso_end[]; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) @@ -33,12 +31,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vdso64_mapping = { - .name = "[vdso]", - .mremap = vdso_mremap, -}; - -static struct vm_special_mapping vdso32_mapping = { +static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", .mremap = vdso_mremap, }; @@ -53,7 +46,6 @@ early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) { unsigned long vvar_start, vdso_text_start, vdso_text_len; - struct vm_special_mapping *vdso_mapping; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc; @@ -62,13 +54,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) if (mmap_write_lock_killable(mm)) return -EINTR; - if (is_compat_task()) { - vdso_text_len = vdso32_end - vdso32_start; - vdso_mapping = &vdso32_mapping; - } else { - vdso_text_len = vdso64_end - vdso64_start; - vdso_mapping = &vdso64_mapping; - } + vdso_text_len = vdso_end - vdso_start; vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0); rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) @@ -82,7 +68,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_mapping); + &vdso_mapping); if (IS_ERR(vma)) { do_munmap(mm, vvar_start, PAGE_SIZE, NULL); rc = PTR_ERR(vma); @@ -122,13 +108,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len) unsigned long vdso_text_size(void) { - unsigned long size; - - if (is_compat_task()) - size = vdso32_end - vdso32_start; - else - size = vdso64_end - vdso64_start; - return PAGE_ALIGN(size); + return PAGE_ALIGN(vdso_end - vdso_start); } unsigned long vdso_size(void) @@ -166,7 +146,7 @@ static void vdso_apply_alternatives(void) struct alt_instr *start, *end; const struct elf64_hdr *hdr; - hdr = (struct elf64_hdr *)vdso64_start; + hdr = (struct elf64_hdr *)vdso_start; shdr = (void *)hdr + hdr->e_shoff; alt = find_section(hdr, shdr, ".altinstructions"); if (!alt) @@ -179,9 +159,7 @@ static void vdso_apply_alternatives(void) static int __init vdso_init(void) { vdso_apply_alternatives(); - vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end); - if (IS_ENABLED(CONFIG_COMPAT)) - vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end); + vdso_mapping.pages = vdso_setup_pages(vdso_start, vdso_end); return 0; } arch_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso/.gitignore index 5167384843b9..652e31d82582 100644 --- a/arch/s390/kernel/vdso32/.gitignore +++ b/arch/s390/kernel/vdso/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso32.lds +vdso.lds diff --git a/arch/s390/kernel/vdso/Makefile b/arch/s390/kernel/vdso/Makefile new file mode 100644 index 000000000000..2fa12d4ac106 --- /dev/null +++ b/arch/s390/kernel/vdso/Makefile @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: GPL-2.0 +# List of files in the vdso + +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile.include +obj-vdso = vdso_user_wrapper.o note.o vgetrandom-chacha.o +obj-cvdso = vdso_generic.o getcpu.o vgetrandom.o +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) +CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vdso_generic.o = $(VDSO_CFLAGS_REMOVE) + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + +# Build rules + +targets := $(obj-vdso) $(obj-cvdso) vdso.so vdso.so.dbg +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +obj-cvdso := $(addprefix $(obj)/, $(obj-cvdso)) + +KBUILD_AFLAGS_VDSO := $(KBUILD_AFLAGS) -DBUILD_VDSO + +KBUILD_CFLAGS_VDSO := $(KBUILD_CFLAGS) -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_VDSO := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO += -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables +KBUILD_CFLAGS_VDSO += -fno-stack-protector +ldflags-y := -shared -soname=linux-vdso.so.1 \ + --hash-style=both --build-id=sha1 -T + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_VDSO) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_VDSO) + +obj-y += vdso_wrapper.o +targets += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso_wrapper.o : $(obj)/vdso.so + +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) + +# link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) $(obj-cvdso) FORCE + $(call if_changed,vdso_and_check) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +$(obj-cvdso): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) + +# actual build commands +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = $(CC) $(c_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso/gen_vdso_offsets.sh index 37f05cb38dad..359982fb002d 100755 --- a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh +++ b/arch/s390/kernel/vdso/gen_vdso_offsets.sh @@ -12,4 +12,4 @@ # LC_ALL=C -sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p' +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso/getcpu.c index 5c5d4a848b76..5c5d4a848b76 100644 --- a/arch/s390/kernel/vdso64/getcpu.c +++ b/arch/s390/kernel/vdso/getcpu.c diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso/note.S index db19d0680a0a..db19d0680a0a 100644 --- a/arch/s390/kernel/vdso32/note.S +++ b/arch/s390/kernel/vdso/note.S diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso/vdso.h index 9e5397e7b590..8cff033dd854 100644 --- a/arch/s390/kernel/vdso64/vdso.h +++ b/arch/s390/kernel/vdso/vdso.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H -#define __ARCH_S390_KERNEL_VDSO64_VDSO_H +#ifndef __ARCH_S390_KERNEL_VDSO_VDSO_H +#define __ARCH_S390_KERNEL_VDSO_VDSO_H #include <vdso/datapage.h> @@ -12,4 +12,4 @@ int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); -#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */ +#endif /* __ARCH_S390_KERNEL_VDSO_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso/vdso.lds.S index e4f6551ae898..7bec4de0e8e0 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso/vdso.lds.S @@ -7,6 +7,7 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> #include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") @@ -59,47 +60,9 @@ SECTIONS _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } + STABS_DEBUG + DWARF_DEBUG .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } /DISCARD/ : { diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso/vdso_generic.c index a9aa75643c08..a9aa75643c08 100644 --- a/arch/s390/kernel/vdso64/vdso64_generic.c +++ b/arch/s390/kernel/vdso/vdso_generic.c diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso/vdso_user_wrapper.S index aa06c85bcbd3..aa06c85bcbd3 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso/vdso_user_wrapper.S diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso/vdso_wrapper.S index de2fb930471a..f69e62a14978 100644 --- a/arch/s390/kernel/vdso32/vdso32_wrapper.S +++ b/arch/s390/kernel/vdso/vdso_wrapper.S @@ -5,11 +5,11 @@ __PAGE_ALIGNED_DATA - .globl vdso32_start, vdso32_end + .globl vdso_start, vdso_end .balign PAGE_SIZE -vdso32_start: - .incbin "arch/s390/kernel/vdso32/vdso32.so" +vdso_start: + .incbin "arch/s390/kernel/vdso/vdso.so" .balign PAGE_SIZE -vdso32_end: +vdso_end: .previous diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso/vgetrandom-chacha.S index 09c034c2f853..09c034c2f853 100644 --- a/arch/s390/kernel/vdso64/vgetrandom-chacha.S +++ b/arch/s390/kernel/vdso/vgetrandom-chacha.S diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso/vgetrandom.c index b5268b507fb5..b5268b507fb5 100644 --- a/arch/s390/kernel/vdso64/vgetrandom.c +++ b/arch/s390/kernel/vdso/vgetrandom.c diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile deleted file mode 100644 index 1e4ddd1a683f..000000000000 --- a/arch/s390/kernel/vdso32/Makefile +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso - -# Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile.include -obj-vdso32 = vdso_user_wrapper-32.o note-32.o - -# Build rules - -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING - -KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_32 += -m31 -s - -KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables - -LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \ - --hash-style=both --build-id=sha1 -melf_s390 -T - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) - -obj-y += vdso32_wrapper.o -targets += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) - -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) - -$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso_and_check) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -$(obj-vdso32): %-32.o: %.S FORCE - $(call if_changed_dep,vdso32as) - -# actual build commands -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< -quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $< - -# Generate VDSO offsets using helper script -gen-vdsosym := $(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh deleted file mode 100755 index 9c4f951e227d..000000000000 --- a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# -# Match symbols in the DSO that look like VDSO_*; produce a header file -# of constant offsets into the shared object. -# -# Doing this inside the Makefile will break the $(filter-out) function, -# causing Kbuild to rebuild the vdso-offsets header file every time. -# -# Inspired by arm64 version. -# - -LC_ALL=C -sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S deleted file mode 100644 index 9630d58c2080..000000000000 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ /dev/null @@ -1,140 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is the infamous ld script for the 64 bits vdso - * library - */ - -#include <asm/page.h> -#include <asm/vdso.h> -#include <vdso/datapage.h> - -OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390:31-bit) - -SECTIONS -{ - VDSO_VVAR_SYMS - - . = SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - . = ALIGN(16); - .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) - } :text - PROVIDE(__etext = .); - PROVIDE(_etext = .); - PROVIDE(etext = .); - - /* - * Other stuff is appended to the text segment: - */ - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - - .dynamic : { *(.dynamic) } :text :dynamic - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } - - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .got ALIGN(8) : { *(.got .toc) } - .got.plt ALIGN(8) : { *(.got.plt) } - - _end = .; - PROVIDE(end = .); - - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - - /DISCARD/ : { - *(.note.GNU-stack) - *(.branch_lt) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * Very old versions of ld do not recognize this name token; use the constant. - */ -#define PT_GNU_EH_FRAME 0x6474e550 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - VDSO_VERSION_STRING { - global: - /* - * Has to be there for the kernel to find - */ - __kernel_compat_restart_syscall; - __kernel_compat_rt_sigreturn; - __kernel_compat_sigreturn; - local: *; - }; -} diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S deleted file mode 100644 index 2e645003fdaf..000000000000 --- a/arch/s390/kernel/vdso32/vdso_user_wrapper.S +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include <linux/linkage.h> -#include <asm/unistd.h> -#include <asm/dwarf.h> - -.macro vdso_syscall func,syscall - .globl __kernel_compat_\func - .type __kernel_compat_\func,@function - __ALIGN -__kernel_compat_\func: - CFI_STARTPROC - svc \syscall - /* Make sure we notice when a syscall returns, which shouldn't happen */ - .word 0 - CFI_ENDPROC - .size __kernel_compat_\func,.-__kernel_compat_\func -.endm - -vdso_syscall restart_syscall,__NR_restart_syscall -vdso_syscall sigreturn,__NR_sigreturn -vdso_syscall rt_sigreturn,__NR_rt_sigreturn diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore deleted file mode 100644 index 4ec80685fecc..000000000000 --- a/arch/s390/kernel/vdso64/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -vdso64.lds diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile deleted file mode 100644 index d8f0df742809..000000000000 --- a/arch/s390/kernel/vdso64/Makefile +++ /dev/null @@ -1,79 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso - -# Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile.include -obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o -obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o -VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) -CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) -CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) -CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) - -ifneq ($(c-getrandom-y),) - CFLAGS_vgetrandom.o += -include $(c-getrandom-y) -endif - -# Build rules - -targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) - -KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING - -KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_64 += -m64 - -KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64)) -KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables -ldflags-y := -shared -soname=linux-vdso64.so.1 \ - --hash-style=both --build-id=sha1 -T - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) - -obj-y += vdso64_wrapper.o -targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE - $(call if_changed,vdso_and_check) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -# assembly rules for the .S files -$(obj-vdso64): %.o: %.S FORCE - $(call if_changed_dep,vdso64as) - -$(obj-cvdso64): %.o: %.c FORCE - $(call if_changed_dep,vdso64cc) - -# actual build commands -quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< -quiet_cmd_vdso64cc = VDSO64C $@ - cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $< - -# Generate VDSO offsets using helper script -gen-vdsosym := $(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE - $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S deleted file mode 100644 index db19d0680a0a..000000000000 --- a/arch/s390/kernel/vdso64/note.S +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include <linux/uts.h> -#include <linux/version.h> -#include <linux/elfnote.h> - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S deleted file mode 100644 index 672184998623..000000000000 --- a/arch/s390/kernel/vdso64/vdso64_wrapper.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/page.h> - - __PAGE_ALIGNED_DATA - - .globl vdso64_start, vdso64_end - .balign PAGE_SIZE -vdso64_start: - .incbin "arch/s390/kernel/vdso64/vdso64.so" - .balign PAGE_SIZE -vdso64_end: - - .previous diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 8609126961dc..53bcbb91bb9b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -150,6 +150,15 @@ SECTIONS *(.altinstr_replacement) } +#ifdef CONFIG_STACKPROTECTOR + . = ALIGN(8); + .stack_prot_table : { + __stack_prot_start = .; + KEEP(*(__stack_protector_loc)) + __stack_prot_end = .; + } +#endif + /* * Table with the patch locations to undo expolines */ @@ -257,6 +266,10 @@ SECTIONS QUAD(invalid_pg_dir) QUAD(__alt_instructions) QUAD(__alt_instructions_end) +#ifdef CONFIG_STACKPROTECTOR + QUAD(__stack_prot_start) + QUAD(__stack_prot_end) +#endif #ifdef CONFIG_KASAN QUAD(kasan_early_shadow_page) QUAD(kasan_early_shadow_pte) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c62a868cf2b6..f55574af98cc 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -7,8 +7,7 @@ * Author(s): Carsten Otte <cotte@de.ibm.com> */ -#define KMSG_COMPONENT "kvm-s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "kvm-s390: " fmt #include <linux/cpufeature.h> #include <linux/interrupt.h> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 16ba04062854..ff3a185f156c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -10,8 +10,7 @@ * Jason J. Herne <jjherne@us.ibm.com> */ -#define KMSG_COMPONENT "kvm-s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "kvm-s390: " fmt #include <linux/compiler.h> #include <linux/export.h> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9a71b6e00948..0b14d894f38a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -754,7 +754,7 @@ int is_valid_psw(psw_t *psw) int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) { psw_t *gpsw = &vcpu->arch.sie_block->gpsw; - psw_compat_t new_psw; + psw32_t new_psw; u64 addr; int rc; u8 ar; diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index e2a6eb92420f..eb7ef63fab1e 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -321,8 +321,8 @@ static int cmm_timeout_handler(const struct ctl_table *ctl, int write, cmm_set_timeout(nr, seconds); *ppos += *lenp; } else { - len = sprintf(buf, "%ld %ld\n", - cmm_timeout_pages, cmm_timeout_seconds); + len = scnprintf(buf, sizeof(buf), "%ld %ld\n", + cmm_timeout_pages, cmm_timeout_seconds); if (len > *lenp) len = *lenp; memcpy(buffer, buf, len); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 528d7c70979f..89badbe72ae7 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -51,7 +51,7 @@ struct pg_state { struct seq_file *__m = (m); \ \ if (__m) \ - seq_printf(__m, fmt); \ + seq_puts(__m, fmt); \ }) static void print_prot(struct seq_file *m, unsigned int pr, int level) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index f7da53e212f5..6cc33c705de2 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -7,8 +7,7 @@ * Copyright IBM Corp. 2002, 2004 */ -#define KMSG_COMPONENT "extmem" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "extmem: " fmt #include <linux/kernel.h> #include <linux/string.h> @@ -598,14 +597,16 @@ segment_save(char *name) goto out; } - sprintf(cmd1, "DEFSEG %s", name); + snprintf(cmd1, sizeof(cmd1), "DEFSEG %s", name); for (i=0; i<seg->segcnt; i++) { - sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", - seg->range[i].start >> PAGE_SHIFT, - seg->range[i].end >> PAGE_SHIFT, - segtype_string[seg->range[i].start & 0xff]); + size_t len = strlen(cmd1); + + snprintf(cmd1 + len, sizeof(cmd1) - len, " %lX-%lX %s", + seg->range[i].start >> PAGE_SHIFT, + seg->range[i].end >> PAGE_SHIFT, + segtype_string[seg->range[i].start & 0xff]); } - sprintf(cmd2, "SAVESEG %s", name); + snprintf(cmd2, sizeof(cmd2), "SAVESEG %s", name); response = 0; cpcmd(cmd1, NULL, 0, &response); if (response) { diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e1ad05bfd28a..e2e13778c36a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -23,7 +23,6 @@ #include <linux/ptrace.h> #include <linux/mman.h> #include <linux/mm.h> -#include <linux/compat.h> #include <linux/smp.h> #include <linux/kdebug.h> #include <linux/init.h> @@ -133,8 +132,17 @@ static void dump_fault_info(struct pt_regs *regs) union teid teid = { .val = regs->int_parm_long }; unsigned long asce; - pr_alert("Failing address: %016lx TEID: %016lx\n", + pr_alert("Failing address: %016lx TEID: %016lx", get_fault_address(regs), teid.val); + if (test_facility(131)) + pr_cont(" ESOP-2"); + else if (machine_has_esop()) + pr_cont(" ESOP-1"); + else + pr_cont(" SOP"); + if (test_facility(75)) + pr_cont(" FSI"); + pr_cont("\n"); pr_alert("Fault in "); switch (teid.as) { case PSW_BITS_AS_HOME: @@ -365,23 +373,20 @@ void do_protection_exception(struct pt_regs *regs) * The exception to this rule are aborted transactions, for these * the PSW already points to the correct location. */ - if (!(regs->int_code & 0x200)) + if (!(regs->int_code & 0x200)) { regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + set_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED); + } /* - * Check for low-address protection. This needs to be treated - * as a special case because the translation exception code - * field is not guaranteed to contain valid data in this case. + * If bit 61 if the TEID is not set, the remainder of the + * TEID is unpredictable. Special handling is required. */ if (unlikely(!teid.b61)) { if (user_mode(regs)) { - /* Low-address protection in user mode: cannot happen */ dump_fault_info(regs); - die(regs, "Low-address protection"); + die(regs, "Unexpected TEID"); } - /* - * Low-address protection in kernel mode means - * NULL pointer write access in kernel mode. - */ + /* Assume low-address protection in kernel mode. */ return handle_fault_error_nolock(regs, 0); } if (unlikely(cpu_has_nx() && teid.b56)) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 8ff6bba107e8..603d9e5febb5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -138,10 +138,7 @@ EXPORT_SYMBOL_GPL(gmap_create); static void gmap_flush_tlb(struct gmap *gmap) { - if (cpu_has_idte()) - __tlb_flush_idte(gmap->asce); - else - __tlb_flush_global(); + __tlb_flush_idte(gmap->asce); } static void gmap_radix_tree_free(struct radix_tree_root *root) @@ -1988,10 +1985,8 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, if (machine_has_tlb_guest()) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (cpu_has_idte()) - __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else - __pmdp_csp(pmdp); + __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); set_pmd(pmdp, new); } @@ -2012,7 +2007,7 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, _SEGMENT_ENTRY_GMAP_UC | _SEGMENT_ENTRY)); if (purge) - __pmdp_csp(pmdp); + __pmdp_cspg(pmdp); set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } spin_unlock(&gmap->guest_table_lock); @@ -2033,17 +2028,6 @@ void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); /** - * gmap_pmdp_csp - csp all affected guest pmd entries - * @mm: pointer to the process mm_struct - * @vmaddr: virtual address in the process address space - */ -void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) -{ - gmap_pmdp_clear(mm, vmaddr, 1); -} -EXPORT_SYMBOL_GPL(gmap_pmdp_csp); - -/** * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry * @mm: pointer to the process mm_struct * @vmaddr: virtual address in the process address space @@ -2066,7 +2050,7 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); - else if (cpu_has_idte()) + else __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } @@ -2099,10 +2083,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (cpu_has_idte()) - __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else - __pmdp_csp(pmdp); + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 72e8fa136af5..d42e61c7594e 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -6,8 +6,7 @@ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#define KMSG_COMPONENT "hugetlb" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "hugetlb: " fmt #include <linux/cpufeature.h> #include <linux/mm.h> diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 197c1d9497a7..2a222a7e14f4 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -15,7 +15,6 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/random.h> -#include <linux/compat.h> #include <linux/security.h> #include <linux/hugetlb.h> #include <asm/elf.h> diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 348e759840e7..3042647c9dbf 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -78,10 +78,8 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, } table = (unsigned long *)((unsigned long)old & mask); crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); - } else if (cpu_has_idte()) { - cspg(old, *old, new); } else { - csp((unsigned int *)old + 1, *old, new); + cspg(old, *old, new); } } diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index e6175d75e4b0..2f829448c719 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -199,8 +199,7 @@ block: * return to userspace schedule() to block. */ __set_current_state(TASK_UNINTERRUPTIBLE); - set_tsk_need_resched(tsk); - set_preempt_need_resched(); + set_need_resched_current(); } } out: diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 626fca116cd7..7df23528c01b 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -164,6 +164,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) { struct ptdesc *ptdesc = virt_to_ptdesc(table); + if (pagetable_is_reserved(ptdesc)) + return free_reserved_ptdesc(ptdesc); pagetable_dtor_free(ptdesc); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0fde20bbc50b..7ae77df276b5 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep, preempt_disable(); atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_rdp(addr, ptep, 0, 0, 1); + __ptep_rdp(addr, ptep, 1); else - __ptep_rdp(addr, ptep, 0, 0, 0); + __ptep_rdp(addr, ptep, 0); /* * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That * means it is still valid and active, and must not be changed according @@ -360,14 +360,10 @@ static inline void pmdp_idte_global(struct mm_struct *mm, mm->context.asce, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); - } else if (cpu_has_idte()) { + } else { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); - } else { - __pmdp_csp(pmdp); - if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) - gmap_pmdp_csp(mm, addr); } } @@ -487,14 +483,8 @@ static inline void pudp_idte_global(struct mm_struct *mm, if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (cpu_has_idte()) - __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); else - /* - * Invalid bit position is the same for pmd and pud, so we can - * reuse _pmd_csp() here - */ - __pmdp_csp((pmd_t *) pudp); + __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); } static inline pud_t pudp_flush_direct(struct mm_struct *mm, diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index f48ef361bc83..d96587b84e81 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,6 +4,7 @@ */ #include <linux/memory_hotplug.h> +#include <linux/bootmem_info.h> #include <linux/cpufeature.h> #include <linux/memblock.h> #include <linux/pfn.h> @@ -39,15 +40,21 @@ static void __ref *vmem_alloc_pages(unsigned int order) static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap) { + unsigned int nr_pages = 1 << order; + struct page *page; + if (altmap) { vmem_altmap_free(altmap, 1 << order); return; } - /* We don't expect boot memory to be removed ever. */ - if (!slab_is_available() || - WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr)))) - return; - free_pages(addr, order); + page = virt_to_page((void *)addr); + if (PageReserved(page)) { + /* allocated from memblock */ + while (nr_pages--) + free_bootmem_page(page++); + } else { + free_pages(addr, order); + } } void *vmem_crst_alloc(unsigned long val) @@ -79,10 +86,6 @@ pte_t __ref *vmem_pte_alloc(void) static void vmem_pte_free(unsigned long *table) { - /* We don't expect boot memory to be removed ever. */ - if (!slab_is_available() || - WARN_ON_ONCE(PageReserved(virt_to_page(table)))) - return; page_table_free(&init_mm, table); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index cf461d76e9da..3238c178bed8 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -15,8 +15,7 @@ * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "bpf_jit" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "bpf_jit: " fmt #include <linux/netdevice.h> #include <linux/filter.h> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index c82c577db2bc..93d2c9c780fc 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -16,8 +16,7 @@ * Thomas Klein */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/slab.h> diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 45a1c36c5a54..72adc8f6e94f 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -7,8 +7,7 @@ * */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/slab.h> @@ -45,8 +44,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) { rc = zpci_enable_device(zdev); - if (rc) + if (rc) { + pr_err("Enabling PCI function %08x failed\n", zdev->fid); return rc; + } } if (!zdev->has_resources) { diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 241f7251c873..177aa0214547 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -6,10 +6,8 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt -#include <linux/compat.h> #include <linux/kernel.h> #include <linux/miscdevice.h> #include <linux/slab.h> @@ -651,7 +649,7 @@ static long clp_misc_ioctl(struct file *filp, unsigned int cmd, if (cmd != CLP_SYNC) return -EINVAL; - argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg; + argp = (void __user *)arg; if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; if (req.r != 0) @@ -669,7 +667,6 @@ static const struct file_operations clp_misc_fops = { .open = nonseekable_open, .release = clp_misc_release, .unlocked_ioctl = clp_misc_ioctl, - .compat_ioctl = clp_misc_ioctl, }; static struct miscdevice clp_misc_device = { diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 38014206c16b..c7ed7bf254b5 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -6,8 +6,7 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/seq_file.h> diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 27db1e72c623..839bd91c056e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -6,8 +6,7 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/pci.h> diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index 191e56a623f6..13050ce5c3e9 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -7,8 +7,7 @@ * */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/pci.h> diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index e73be96ce5fe..2a06df8c2498 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/irq.h> diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c index 1b494e5ecc4d..7030f7052926 100644 --- a/arch/s390/pci/pci_report.c +++ b/arch/s390/pci/pci_report.c @@ -7,8 +7,7 @@ * */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/sprintf.h> diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 12060870e2aa..c2444a23e26c 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -6,8 +6,7 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "zpci" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "zpci: " fmt #include <linux/kernel.h> #include <linux/stat.h> diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bd39b36e7bd6..0c196a5b194a 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -13,7 +13,7 @@ CFLAGS_sha256.o := -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fms-extensions -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common @@ -21,6 +21,7 @@ KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += $(CLANG_FLAGS) +KBUILD_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) KBUILD_AFLAGS += -D__DISABLE_EXPORTS diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index d5c68ade71ab..2d28a569f793 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -29,6 +29,7 @@ static struct facility_def facility_defs[] = { .bits = (int[]){ 0, /* N3 instructions */ 1, /* z/Arch mode installed */ + 3, /* dat-enhancement 1 */ 18, /* long displacement facility */ 21, /* extended-immediate facility */ 25, /* store clock fast */ diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 5e9c9eff5539..969c11325ade 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ebb7d06d1044..39aa26b6a50b 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 31c54d6480e0..34fb46d5341b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -298,6 +298,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_UNWIND_USER_FP if X86_64 select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO select VDSO_GETRANDOM if X86_64 diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures index 250c10627ab3..733d5aff2456 100644 --- a/arch/x86/Kconfig.cpufeatures +++ b/arch/x86/Kconfig.cpufeatures @@ -124,6 +124,10 @@ config X86_DISABLED_FEATURE_PCID def_bool y depends on !X86_64 +config X86_DISABLED_FEATURE_LASS + def_bool y + depends on X86_32 + config X86_DISABLED_FEATURE_PKU def_bool y depends on !X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a27efcf3c20..1d403a3612ea 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -48,7 +48,8 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -std=gnu11 -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ +REALMODE_CFLAGS := -std=gnu11 -fms-extensions -m16 -g -Os \ + -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -60,6 +61,7 @@ REALMODE_CFLAGS += $(cc_stack_align4) REALMODE_CFLAGS += $(CLANG_FLAGS) ifdef CONFIG_CC_IS_CLANG REALMODE_CFLAGS += -Wno-gnu +REALMODE_CFLAGS += -Wno-microsoft-anon-tag endif export REALMODE_CFLAGS diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index a2b6b428922a..bda042933a05 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -135,29 +135,29 @@ int enable_a20(void) (legacy free, etc.) */ if (a20_test_short()) return 0; - + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ enable_a20_bios(); if (a20_test_short()) return 0; - + /* Try enabling A20 through the keyboard controller */ kbc_err = empty_8042(); if (a20_test_short()) return 0; /* BIOS worked, but with delayed reaction */ - + if (!kbc_err) { enable_a20_kbc(); if (a20_test_long()) return 0; } - + /* Finally, try enabling the "fast A20 gate" */ enable_a20_fast(); if (a20_test_long()) return 0; } - + return -1; } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index a3c58ebe3662..8e3eab34dff4 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -193,8 +193,6 @@ static inline bool heap_free(size_t n) void copy_to_fs(addr_t dst, void *src, size_t len); void *copy_from_fs(void *dst, addr_t src, size_t len); -void copy_to_gs(addr_t dst, void *src, size_t len); -void *copy_from_gs(void *dst, addr_t src, size_t len); /* a20.c */ int enable_a20(void); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 74657589264d..68f9d7a1683b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -25,7 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) -KBUILD_CFLAGS += -std=gnu11 +KBUILD_CFLAGS += -std=gnu11 -fms-extensions KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -36,7 +36,10 @@ KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += -ffreestanding -fshort-wchar KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-gnu +KBUILD_CFLAGS += -Wno-microsoft-anon-tag +endif KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -D__DISABLE_EXPORTS diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index db1048621ea2..fd855e32c9b9 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -152,17 +152,6 @@ bool insn_has_rep_prefix(struct insn *insn); void sev_insn_decode_init(void); bool early_setup_ghcb(void); #else -static inline void sev_enable(struct boot_params *bp) -{ - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 unconditionally (thus here in this stub too) to - * ensure that uninitialized values from buggy bootloaders aren't - * propagated. - */ - if (bp) - bp->cc_blob_address = 0; -} static inline void snp_check_features(void) { } static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index bdd26050dff7..0e89e197e112 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -3,6 +3,7 @@ #include <asm/bootparam.h> #include <asm/bootparam_utils.h> #include <asm/e820/types.h> +#include <asm/pgtable.h> #include <asm/processor.h> #include "../string.h" #include "efi.h" @@ -168,9 +169,10 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = native_read_cr3_pa() | _PAGE_TABLE_NOENC; } else { - unsigned long src; + u64 *new_cr3; + pgd_t *pgdp; /* * For 5- to 4-level paging transition, copy page table pointed @@ -180,8 +182,9 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) * We cannot just point to the page table from trampoline as it * may be above 4G. */ - src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); + pgdp = (pgd_t *)native_read_cr3_pa(); + new_cr3 = (u64 *)(native_pgd_val(pgdp[0]) & PTE_PFN_MASK); + memcpy(trampoline_32bit, new_cr3, PAGE_SIZE); } toggle_la57(trampoline_32bit); diff --git a/arch/x86/boot/compressed/sev-handle-vc.c b/arch/x86/boot/compressed/sev-handle-vc.c index 7530ad8b768b..030001b46554 100644 --- a/arch/x86/boot/compressed/sev-handle-vc.c +++ b/arch/x86/boot/compressed/sev-handle-vc.c @@ -29,11 +29,10 @@ bool insn_has_rep_prefix(struct insn *insn) { insn_byte_t p; - int i; insn_get_prefixes(insn); - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { if (p == 0xf2 || p == 0xf3) return true; } diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 6e5c32a53d03..c8c1464b3a56 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -14,6 +14,7 @@ #include <asm/bootparam.h> #include <asm/pgtable_types.h> +#include <asm/shared/msr.h> #include <asm/sev.h> #include <asm/trapnr.h> #include <asm/trap_pf.h> @@ -397,7 +398,7 @@ void sev_enable(struct boot_params *bp) } /* Set the SME mask if this is an SEV guest. */ - boot_rdmsr(MSR_AMD64_SEV, &m); + raw_rdmsr(MSR_AMD64_SEV, &m); sev_status = m.q; if (!(sev_status & MSR_AMD64_SEV_ENABLED)) return; @@ -446,7 +447,7 @@ u64 sev_get_status(void) if (sev_check_cpu_support() < 0) return 0; - boot_rdmsr(MSR_AMD64_SEV, &m); + raw_rdmsr(MSR_AMD64_SEV, &m); return m.q; } @@ -496,7 +497,7 @@ bool early_is_sevsnp_guest(void) struct msr m; /* Obtain the address of the calling area to use */ - boot_rdmsr(MSR_SVSM_CAA, &m); + raw_rdmsr(MSR_SVSM_CAA, &m); boot_svsm_caa_pa = m.q; /* diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index 92f79c21939c..22637b416b46 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -10,7 +10,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT -#include "../msr.h" +#include <asm/shared/msr.h> void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 sev_get_status(void); @@ -20,7 +20,7 @@ static inline u64 sev_es_rd_ghcb_msr(void) { struct msr m; - boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m); + raw_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m); return m.q; } @@ -30,7 +30,7 @@ static inline void sev_es_wr_ghcb_msr(u64 val) struct msr m; m.q = val; - boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m); + raw_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m); } #else diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index f82de8de5dc6..2e1bb936cba2 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -26,9 +26,9 @@ #include <asm/intel-family.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> +#include <asm/shared/msr.h> #include "string.h" -#include "msr.h" static u32 err_flags[NCAPINTS]; @@ -134,9 +134,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) struct msr m; - boot_rdmsr(MSR_K7_HWCR, &m); + raw_rdmsr(MSR_K7_HWCR, &m); m.l &= ~(1 << 15); - boot_wrmsr(MSR_K7_HWCR, &m); + raw_wrmsr(MSR_K7_HWCR, &m); get_cpuflags(); /* Make sure it really did something */ err = check_cpuflags(); @@ -148,9 +148,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) struct msr m; - boot_rdmsr(MSR_VIA_FCR, &m); + raw_rdmsr(MSR_VIA_FCR, &m); m.l |= (1 << 1) | (1 << 7); - boot_wrmsr(MSR_VIA_FCR, &m); + raw_wrmsr(MSR_VIA_FCR, &m); set_bit(X86_FEATURE_CX8, cpu.flags); err = check_cpuflags(); @@ -160,14 +160,14 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) struct msr m, m_tmp; u32 level = 1; - boot_rdmsr(0x80860004, &m); + raw_rdmsr(0x80860004, &m); m_tmp = m; m_tmp.l = ~0; - boot_wrmsr(0x80860004, &m_tmp); + raw_wrmsr(0x80860004, &m_tmp); asm("cpuid" : "+a" (level), "=d" (cpu.flags[0]) : : "ecx", "ebx"); - boot_wrmsr(0x80860004, &m); + raw_wrmsr(0x80860004, &m); err = check_cpuflags(); } else if (err == 0x01 && diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h deleted file mode 100644 index aed66f7ae199..000000000000 --- a/arch/x86/boot/msr.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Helpers/definitions related to MSR access. - */ - -#ifndef BOOT_MSR_H -#define BOOT_MSR_H - -#include <asm/shared/msr.h> - -/* - * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the - * boot kernel since they rely on tracepoint/exception handling infrastructure - * that's not available here. - */ -static inline void boot_rdmsr(unsigned int reg, struct msr *m) -{ - asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); -} - -static inline void boot_wrmsr(unsigned int reg, const struct msr *m) -{ - asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); -} - -#endif /* BOOT_MSR_H */ diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 4e22ffd73516..a0fa8bb2b945 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -12,7 +12,7 @@ #include <asm/setup_data.h> #ifndef __BOOT_COMPRESSED -#define has_cpuflag(f) boot_cpu_has(f) +#define has_cpuflag(f) cpu_feature_enabled(f) #else #undef WARN #define WARN(condition, format...) (!!(condition)) diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index 7fc136a35334..f08c7505ed82 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -352,7 +352,6 @@ fault: #define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__) #define error(v) -#define has_cpuflag(f) boot_cpu_has(f) #include "vc-shared.c" diff --git a/arch/x86/coco/sev/vc-shared.c b/arch/x86/coco/sev/vc-shared.c index 9b01c9ad81be..58b2f985d546 100644 --- a/arch/x86/coco/sev/vc-shared.c +++ b/arch/x86/coco/sev/vc-shared.c @@ -1,5 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef __BOOT_COMPRESSED +#define has_cpuflag(f) cpu_feature_enabled(f) +#endif + static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, unsigned long exit_code) { @@ -546,6 +550,13 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, /* xgetbv will cause #GP - use reset value for xcr0 */ ghcb_set_xcr0(ghcb, 1); + if (has_cpuflag(X86_FEATURE_SHSTK) && regs->ax == 0xd && regs->cx == 1) { + struct msr m; + + raw_rdmsr(MSR_IA32_XSS, &m); + ghcb_set_xss(ghcb, m.q); + } + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); if (ret != ES_OK) return ret; diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 48d3076b6053..3fd2423d3cf8 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -353,16 +353,6 @@ config CRYPTO_NHPOLY1305_AVX2 Architecture: x86_64 using: - AVX2 (Advanced Vector Extensions 2) -config CRYPTO_POLYVAL_CLMUL_NI - tristate "Hash functions: POLYVAL (CLMUL-NI)" - depends on 64BIT - select CRYPTO_POLYVAL - help - POLYVAL hash function for HCTR2 - - Architecture: x86_64 using: - - CLMUL-NI (carry-less multiplication new instructions) - config CRYPTO_SM3_AVX_X86_64 tristate "Hash functions: SM3 (AVX)" depends on 64BIT diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 2d30d5d36145..5f2fb4f148fe 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -46,15 +46,13 @@ obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \ aes-gcm-aesni-x86_64.o \ - aes-xts-avx-x86_64.o \ - aes-gcm-avx10-x86_64.o + aes-gcm-vaes-avx2.o \ + aes-gcm-vaes-avx512.o \ + aes-xts-avx-x86_64.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o -obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o -polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o - obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o diff --git a/arch/x86/crypto/aes-gcm-aesni-x86_64.S b/arch/x86/crypto/aes-gcm-aesni-x86_64.S index 45940e2883a0..7c8a8a32bd3c 100644 --- a/arch/x86/crypto/aes-gcm-aesni-x86_64.S +++ b/arch/x86/crypto/aes-gcm-aesni-x86_64.S @@ -61,15 +61,15 @@ // for the *_aesni functions or AVX for the *_aesni_avx ones. (But it seems // there are no CPUs that support AES-NI without also PCLMULQDQ and SSE4.1.) // -// The design generally follows that of aes-gcm-avx10-x86_64.S, and that file is +// The design generally follows that of aes-gcm-vaes-avx512.S, and that file is // more thoroughly commented. This file has the following notable changes: // // - The vector length is fixed at 128-bit, i.e. xmm registers. This means // there is only one AES block (and GHASH block) per register. // -// - Without AVX512 / AVX10, only 16 SIMD registers are available instead of -// 32. We work around this by being much more careful about using -// registers, relying heavily on loads to load values as they are needed. +// - Without AVX512, only 16 SIMD registers are available instead of 32. We +// work around this by being much more careful about using registers, +// relying heavily on loads to load values as they are needed. // // - Masking is not available either. We work around this by implementing // partial block loads and stores using overlapping scalar loads and stores @@ -90,8 +90,8 @@ // multiplication instead of schoolbook multiplication. This saves one // pclmulqdq instruction per block, at the cost of one 64-bit load, one // pshufd, and 0.25 pxors per block. (This is without the three-argument -// XOR support that would be provided by AVX512 / AVX10, which would be -// more beneficial to schoolbook than Karatsuba.) +// XOR support that would be provided by AVX512, which would be more +// beneficial to schoolbook than Karatsuba.) // // As a rough approximation, we can assume that Karatsuba multiplication is // faster than schoolbook multiplication in this context if one pshufd and diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S new file mode 100644 index 000000000000..93c9504a488f --- /dev/null +++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S @@ -0,0 +1,1146 @@ +/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ +// +// AES-GCM implementation for x86_64 CPUs that support the following CPU +// features: VAES && VPCLMULQDQ && AVX2 +// +// Copyright 2025 Google LLC +// +// Author: Eric Biggers <ebiggers@google.com> +// +//------------------------------------------------------------------------------ +// +// This file is dual-licensed, meaning that you can use it under your choice of +// either of the following two licenses: +// +// Licensed under the Apache License 2.0 (the "License"). You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// or +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// This is similar to aes-gcm-vaes-avx512.S, but it uses AVX2 instead of AVX512. +// This means it can only use 16 vector registers instead of 32, the maximum +// vector length is 32 bytes, and some instructions such as vpternlogd and +// masked loads/stores are unavailable. However, it is able to run on CPUs that +// have VAES without AVX512, namely AMD Zen 3 (including "Milan" server CPUs), +// various Intel client CPUs such as Alder Lake, and Intel Sierra Forest. +// +// This implementation also uses Karatsuba multiplication instead of schoolbook +// multiplication for GHASH in its main loop. This does not help much on Intel, +// but it improves performance by ~5% on AMD Zen 3. Other factors weighing +// slightly in favor of Karatsuba multiplication in this implementation are the +// lower maximum vector length (which means there are fewer key powers, so we +// can cache the halves of each key power XOR'd together and still use less +// memory than the AVX512 implementation), and the unavailability of the +// vpternlogd instruction (which helped schoolbook a bit more than Karatsuba). + +#include <linux/linkage.h> + +.section .rodata +.p2align 4 + + // The below three 16-byte values must be in the order that they are, as + // they are really two 32-byte tables and a 16-byte value that overlap: + // + // - The first 32-byte table begins at .Lselect_high_bytes_table. + // For 0 <= len <= 16, the 16-byte value at + // '.Lselect_high_bytes_table + len' selects the high 'len' bytes of + // another 16-byte value when AND'ed with it. + // + // - The second 32-byte table begins at .Lrshift_and_bswap_table. + // For 0 <= len <= 16, the 16-byte value at + // '.Lrshift_and_bswap_table + len' is a vpshufb mask that does the + // following operation: right-shift by '16 - len' bytes (shifting in + // zeroes), then reflect all 16 bytes. + // + // - The 16-byte value at .Lbswap_mask is a vpshufb mask that reflects + // all 16 bytes. +.Lselect_high_bytes_table: + .octa 0 +.Lrshift_and_bswap_table: + .octa 0xffffffffffffffffffffffffffffffff +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f + + // Sixteen 0x0f bytes. By XOR'ing an entry of .Lrshift_and_bswap_table + // with this, we get a mask that left-shifts by '16 - len' bytes. +.Lfifteens: + .octa 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f + + // This is the GHASH reducing polynomial without its constant term, i.e. + // x^128 + x^7 + x^2 + x, represented using the backwards mapping + // between bits and polynomial coefficients. + // + // Alternatively, it can be interpreted as the naturally-ordered + // representation of the polynomial x^127 + x^126 + x^121 + 1, i.e. the + // "reversed" GHASH reducing polynomial without its x^128 term. +.Lgfpoly: + .octa 0xc2000000000000000000000000000001 + + // Same as above, but with the (1 << 64) bit set. +.Lgfpoly_and_internal_carrybit: + .octa 0xc2000000000000010000000000000001 + + // Values needed to prepare the initial vector of counter blocks. +.Lctr_pattern: + .octa 0 + .octa 1 + + // The number of AES blocks per vector, as a 128-bit value. +.Linc_2blocks: + .octa 2 + +// Offsets in struct aes_gcm_key_vaes_avx2 +#define OFFSETOF_AESKEYLEN 480 +#define OFFSETOF_H_POWERS 512 +#define NUM_H_POWERS 8 +#define OFFSETOFEND_H_POWERS (OFFSETOF_H_POWERS + (NUM_H_POWERS * 16)) +#define OFFSETOF_H_POWERS_XORED OFFSETOFEND_H_POWERS + +.text + +// Do one step of GHASH-multiplying the 128-bit lanes of \a by the 128-bit lanes +// of \b and storing the reduced products in \dst. Uses schoolbook +// multiplication. +.macro _ghash_mul_step i, a, b, dst, gfpoly, t0, t1, t2 +.if \i == 0 + vpclmulqdq $0x00, \a, \b, \t0 // LO = a_L * b_L + vpclmulqdq $0x01, \a, \b, \t1 // MI_0 = a_L * b_H +.elseif \i == 1 + vpclmulqdq $0x10, \a, \b, \t2 // MI_1 = a_H * b_L +.elseif \i == 2 + vpxor \t2, \t1, \t1 // MI = MI_0 + MI_1 +.elseif \i == 3 + vpclmulqdq $0x01, \t0, \gfpoly, \t2 // LO_L*(x^63 + x^62 + x^57) +.elseif \i == 4 + vpshufd $0x4e, \t0, \t0 // Swap halves of LO +.elseif \i == 5 + vpxor \t0, \t1, \t1 // Fold LO into MI (part 1) + vpxor \t2, \t1, \t1 // Fold LO into MI (part 2) +.elseif \i == 6 + vpclmulqdq $0x11, \a, \b, \dst // HI = a_H * b_H +.elseif \i == 7 + vpclmulqdq $0x01, \t1, \gfpoly, \t0 // MI_L*(x^63 + x^62 + x^57) +.elseif \i == 8 + vpshufd $0x4e, \t1, \t1 // Swap halves of MI +.elseif \i == 9 + vpxor \t1, \dst, \dst // Fold MI into HI (part 1) + vpxor \t0, \dst, \dst // Fold MI into HI (part 2) +.endif +.endm + +// GHASH-multiply the 128-bit lanes of \a by the 128-bit lanes of \b and store +// the reduced products in \dst. See _ghash_mul_step for full explanation. +.macro _ghash_mul a, b, dst, gfpoly, t0, t1, t2 +.irp i, 0,1,2,3,4,5,6,7,8,9 + _ghash_mul_step \i, \a, \b, \dst, \gfpoly, \t0, \t1, \t2 +.endr +.endm + +// GHASH-multiply the 128-bit lanes of \a by the 128-bit lanes of \b and add the +// *unreduced* products to \lo, \mi, and \hi. +.macro _ghash_mul_noreduce a, b, lo, mi, hi, t0 + vpclmulqdq $0x00, \a, \b, \t0 // a_L * b_L + vpxor \t0, \lo, \lo + vpclmulqdq $0x01, \a, \b, \t0 // a_L * b_H + vpxor \t0, \mi, \mi + vpclmulqdq $0x10, \a, \b, \t0 // a_H * b_L + vpxor \t0, \mi, \mi + vpclmulqdq $0x11, \a, \b, \t0 // a_H * b_H + vpxor \t0, \hi, \hi +.endm + +// Reduce the unreduced products from \lo, \mi, and \hi and store the 128-bit +// reduced products in \hi. See _ghash_mul_step for explanation of reduction. +.macro _ghash_reduce lo, mi, hi, gfpoly, t0 + vpclmulqdq $0x01, \lo, \gfpoly, \t0 + vpshufd $0x4e, \lo, \lo + vpxor \lo, \mi, \mi + vpxor \t0, \mi, \mi + vpclmulqdq $0x01, \mi, \gfpoly, \t0 + vpshufd $0x4e, \mi, \mi + vpxor \mi, \hi, \hi + vpxor \t0, \hi, \hi +.endm + +// This is a specialized version of _ghash_mul that computes \a * \a, i.e. it +// squares \a. It skips computing MI = (a_L * a_H) + (a_H * a_L) = 0. +.macro _ghash_square a, dst, gfpoly, t0, t1 + vpclmulqdq $0x00, \a, \a, \t0 // LO = a_L * a_L + vpclmulqdq $0x11, \a, \a, \dst // HI = a_H * a_H + vpclmulqdq $0x01, \t0, \gfpoly, \t1 // LO_L*(x^63 + x^62 + x^57) + vpshufd $0x4e, \t0, \t0 // Swap halves of LO + vpxor \t0, \t1, \t1 // Fold LO into MI + vpclmulqdq $0x01, \t1, \gfpoly, \t0 // MI_L*(x^63 + x^62 + x^57) + vpshufd $0x4e, \t1, \t1 // Swap halves of MI + vpxor \t1, \dst, \dst // Fold MI into HI (part 1) + vpxor \t0, \dst, \dst // Fold MI into HI (part 2) +.endm + +// void aes_gcm_precompute_vaes_avx2(struct aes_gcm_key_vaes_avx2 *key); +// +// Given the expanded AES key |key->base.aes_key|, derive the GHASH subkey and +// initialize |key->h_powers| and |key->h_powers_xored|. +// +// We use h_powers[0..7] to store H^8 through H^1, and h_powers_xored[0..7] to +// store the 64-bit halves of the key powers XOR'd together (for Karatsuba +// multiplication) in the order 8,6,7,5,4,2,3,1. +SYM_FUNC_START(aes_gcm_precompute_vaes_avx2) + + // Function arguments + .set KEY, %rdi + + // Additional local variables + .set POWERS_PTR, %rsi + .set RNDKEYLAST_PTR, %rdx + .set TMP0, %ymm0 + .set TMP0_XMM, %xmm0 + .set TMP1, %ymm1 + .set TMP1_XMM, %xmm1 + .set TMP2, %ymm2 + .set TMP2_XMM, %xmm2 + .set H_CUR, %ymm3 + .set H_CUR_XMM, %xmm3 + .set H_CUR2, %ymm4 + .set H_INC, %ymm5 + .set H_INC_XMM, %xmm5 + .set GFPOLY, %ymm6 + .set GFPOLY_XMM, %xmm6 + + // Encrypt an all-zeroes block to get the raw hash subkey. + movl OFFSETOF_AESKEYLEN(KEY), %eax + lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR + vmovdqu (KEY), H_CUR_XMM // Zero-th round key XOR all-zeroes block + lea 16(KEY), %rax +1: + vaesenc (%rax), H_CUR_XMM, H_CUR_XMM + add $16, %rax + cmp %rax, RNDKEYLAST_PTR + jne 1b + vaesenclast (RNDKEYLAST_PTR), H_CUR_XMM, H_CUR_XMM + + // Reflect the bytes of the raw hash subkey. + vpshufb .Lbswap_mask(%rip), H_CUR_XMM, H_CUR_XMM + + // Finish preprocessing the byte-reflected hash subkey by multiplying it + // by x^-1 ("standard" interpretation of polynomial coefficients) or + // equivalently x^1 (natural interpretation). This gets the key into a + // format that avoids having to bit-reflect the data blocks later. + vpshufd $0xd3, H_CUR_XMM, TMP0_XMM + vpsrad $31, TMP0_XMM, TMP0_XMM + vpaddq H_CUR_XMM, H_CUR_XMM, H_CUR_XMM + vpand .Lgfpoly_and_internal_carrybit(%rip), TMP0_XMM, TMP0_XMM + vpxor TMP0_XMM, H_CUR_XMM, H_CUR_XMM + + // Load the gfpoly constant. + vbroadcasti128 .Lgfpoly(%rip), GFPOLY + + // Square H^1 to get H^2. + _ghash_square H_CUR_XMM, H_INC_XMM, GFPOLY_XMM, TMP0_XMM, TMP1_XMM + + // Create H_CUR = [H^2, H^1] and H_INC = [H^2, H^2]. + vinserti128 $1, H_CUR_XMM, H_INC, H_CUR + vinserti128 $1, H_INC_XMM, H_INC, H_INC + + // Compute H_CUR2 = [H^4, H^3]. + _ghash_mul H_INC, H_CUR, H_CUR2, GFPOLY, TMP0, TMP1, TMP2 + + // Store [H^2, H^1] and [H^4, H^3]. + vmovdqu H_CUR, OFFSETOF_H_POWERS+3*32(KEY) + vmovdqu H_CUR2, OFFSETOF_H_POWERS+2*32(KEY) + + // For Karatsuba multiplication: compute and store the two 64-bit halves + // of each key power XOR'd together. Order is 4,2,3,1. + vpunpcklqdq H_CUR, H_CUR2, TMP0 + vpunpckhqdq H_CUR, H_CUR2, TMP1 + vpxor TMP1, TMP0, TMP0 + vmovdqu TMP0, OFFSETOF_H_POWERS_XORED+32(KEY) + + // Compute and store H_CUR = [H^6, H^5] and H_CUR2 = [H^8, H^7]. + _ghash_mul H_INC, H_CUR2, H_CUR, GFPOLY, TMP0, TMP1, TMP2 + _ghash_mul H_INC, H_CUR, H_CUR2, GFPOLY, TMP0, TMP1, TMP2 + vmovdqu H_CUR, OFFSETOF_H_POWERS+1*32(KEY) + vmovdqu H_CUR2, OFFSETOF_H_POWERS+0*32(KEY) + + // Again, compute and store the two 64-bit halves of each key power + // XOR'd together. Order is 8,6,7,5. + vpunpcklqdq H_CUR, H_CUR2, TMP0 + vpunpckhqdq H_CUR, H_CUR2, TMP1 + vpxor TMP1, TMP0, TMP0 + vmovdqu TMP0, OFFSETOF_H_POWERS_XORED(KEY) + + vzeroupper + RET +SYM_FUNC_END(aes_gcm_precompute_vaes_avx2) + +// Do one step of the GHASH update of four vectors of data blocks. +// \i: the step to do, 0 through 9 +// \ghashdata_ptr: pointer to the data blocks (ciphertext or AAD) +// KEY: pointer to struct aes_gcm_key_vaes_avx2 +// BSWAP_MASK: mask for reflecting the bytes of blocks +// H_POW[2-1]_XORED: cached values from KEY->h_powers_xored +// TMP[0-2]: temporary registers. TMP[1-2] must be preserved across steps. +// LO, MI: working state for this macro that must be preserved across steps +// GHASH_ACC: the GHASH accumulator (input/output) +.macro _ghash_step_4x i, ghashdata_ptr + .set HI, GHASH_ACC # alias + .set HI_XMM, GHASH_ACC_XMM +.if \i == 0 + // First vector + vmovdqu 0*32(\ghashdata_ptr), TMP1 + vpshufb BSWAP_MASK, TMP1, TMP1 + vmovdqu OFFSETOF_H_POWERS+0*32(KEY), TMP2 + vpxor GHASH_ACC, TMP1, TMP1 + vpclmulqdq $0x00, TMP2, TMP1, LO + vpclmulqdq $0x11, TMP2, TMP1, HI + vpunpckhqdq TMP1, TMP1, TMP0 + vpxor TMP1, TMP0, TMP0 + vpclmulqdq $0x00, H_POW2_XORED, TMP0, MI +.elseif \i == 1 +.elseif \i == 2 + // Second vector + vmovdqu 1*32(\ghashdata_ptr), TMP1 + vpshufb BSWAP_MASK, TMP1, TMP1 + vmovdqu OFFSETOF_H_POWERS+1*32(KEY), TMP2 + vpclmulqdq $0x00, TMP2, TMP1, TMP0 + vpxor TMP0, LO, LO + vpclmulqdq $0x11, TMP2, TMP1, TMP0 + vpxor TMP0, HI, HI + vpunpckhqdq TMP1, TMP1, TMP0 + vpxor TMP1, TMP0, TMP0 + vpclmulqdq $0x10, H_POW2_XORED, TMP0, TMP0 + vpxor TMP0, MI, MI +.elseif \i == 3 + // Third vector + vmovdqu 2*32(\ghashdata_ptr), TMP1 + vpshufb BSWAP_MASK, TMP1, TMP1 + vmovdqu OFFSETOF_H_POWERS+2*32(KEY), TMP2 +.elseif \i == 4 + vpclmulqdq $0x00, TMP2, TMP1, TMP0 + vpxor TMP0, LO, LO + vpclmulqdq $0x11, TMP2, TMP1, TMP0 + vpxor TMP0, HI, HI +.elseif \i == 5 + vpunpckhqdq TMP1, TMP1, TMP0 + vpxor TMP1, TMP0, TMP0 + vpclmulqdq $0x00, H_POW1_XORED, TMP0, TMP0 + vpxor TMP0, MI, MI + + // Fourth vector + vmovdqu 3*32(\ghashdata_ptr), TMP1 + vpshufb BSWAP_MASK, TMP1, TMP1 +.elseif \i == 6 + vmovdqu OFFSETOF_H_POWERS+3*32(KEY), TMP2 + vpclmulqdq $0x00, TMP2, TMP1, TMP0 + vpxor TMP0, LO, LO + vpclmulqdq $0x11, TMP2, TMP1, TMP0 + vpxor TMP0, HI, HI + vpunpckhqdq TMP1, TMP1, TMP0 + vpxor TMP1, TMP0, TMP0 + vpclmulqdq $0x10, H_POW1_XORED, TMP0, TMP0 + vpxor TMP0, MI, MI +.elseif \i == 7 + // Finalize 'mi' following Karatsuba multiplication. + vpxor LO, MI, MI + vpxor HI, MI, MI + + // Fold lo into mi. + vbroadcasti128 .Lgfpoly(%rip), TMP2 + vpclmulqdq $0x01, LO, TMP2, TMP0 + vpshufd $0x4e, LO, LO + vpxor LO, MI, MI + vpxor TMP0, MI, MI +.elseif \i == 8 + // Fold mi into hi. + vpclmulqdq $0x01, MI, TMP2, TMP0 + vpshufd $0x4e, MI, MI + vpxor MI, HI, HI + vpxor TMP0, HI, HI +.elseif \i == 9 + vextracti128 $1, HI, TMP0_XMM + vpxor TMP0_XMM, HI_XMM, GHASH_ACC_XMM +.endif +.endm + +// Update GHASH with four vectors of data blocks. See _ghash_step_4x for full +// explanation. +.macro _ghash_4x ghashdata_ptr +.irp i, 0,1,2,3,4,5,6,7,8,9 + _ghash_step_4x \i, \ghashdata_ptr +.endr +.endm + +// Load 1 <= %ecx <= 16 bytes from the pointer \src into the xmm register \dst +// and zeroize any remaining bytes. Clobbers %rax, %rcx, and \tmp{64,32}. +.macro _load_partial_block src, dst, tmp64, tmp32 + sub $8, %ecx // LEN - 8 + jle .Lle8\@ + + // Load 9 <= LEN <= 16 bytes. + vmovq (\src), \dst // Load first 8 bytes + mov (\src, %rcx), %rax // Load last 8 bytes + neg %ecx + shl $3, %ecx + shr %cl, %rax // Discard overlapping bytes + vpinsrq $1, %rax, \dst, \dst + jmp .Ldone\@ + +.Lle8\@: + add $4, %ecx // LEN - 4 + jl .Llt4\@ + + // Load 4 <= LEN <= 8 bytes. + mov (\src), %eax // Load first 4 bytes + mov (\src, %rcx), \tmp32 // Load last 4 bytes + jmp .Lcombine\@ + +.Llt4\@: + // Load 1 <= LEN <= 3 bytes. + add $2, %ecx // LEN - 2 + movzbl (\src), %eax // Load first byte + jl .Lmovq\@ + movzwl (\src, %rcx), \tmp32 // Load last 2 bytes +.Lcombine\@: + shl $3, %ecx + shl %cl, \tmp64 + or \tmp64, %rax // Combine the two parts +.Lmovq\@: + vmovq %rax, \dst +.Ldone\@: +.endm + +// Store 1 <= %ecx <= 16 bytes from the xmm register \src to the pointer \dst. +// Clobbers %rax, %rcx, and \tmp{64,32}. +.macro _store_partial_block src, dst, tmp64, tmp32 + sub $8, %ecx // LEN - 8 + jl .Llt8\@ + + // Store 8 <= LEN <= 16 bytes. + vpextrq $1, \src, %rax + mov %ecx, \tmp32 + shl $3, %ecx + ror %cl, %rax + mov %rax, (\dst, \tmp64) // Store last LEN - 8 bytes + vmovq \src, (\dst) // Store first 8 bytes + jmp .Ldone\@ + +.Llt8\@: + add $4, %ecx // LEN - 4 + jl .Llt4\@ + + // Store 4 <= LEN <= 7 bytes. + vpextrd $1, \src, %eax + mov %ecx, \tmp32 + shl $3, %ecx + ror %cl, %eax + mov %eax, (\dst, \tmp64) // Store last LEN - 4 bytes + vmovd \src, (\dst) // Store first 4 bytes + jmp .Ldone\@ + +.Llt4\@: + // Store 1 <= LEN <= 3 bytes. + vpextrb $0, \src, 0(\dst) + cmp $-2, %ecx // LEN - 4 == -2, i.e. LEN == 2? + jl .Ldone\@ + vpextrb $1, \src, 1(\dst) + je .Ldone\@ + vpextrb $2, \src, 2(\dst) +.Ldone\@: +.endm + +// void aes_gcm_aad_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, +// u8 ghash_acc[16], +// const u8 *aad, int aadlen); +// +// This function processes the AAD (Additional Authenticated Data) in GCM. +// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the +// data given by |aad| and |aadlen|. On the first call, |ghash_acc| must be all +// zeroes. |aadlen| must be a multiple of 16, except on the last call where it +// can be any length. The caller must do any buffering needed to ensure this. +// +// This handles large amounts of AAD efficiently, while also keeping overhead +// low for small amounts which is the common case. TLS and IPsec use less than +// one block of AAD, but (uncommonly) other use cases may use much more. +SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2) + + // Function arguments + .set KEY, %rdi + .set GHASH_ACC_PTR, %rsi + .set AAD, %rdx + .set AADLEN, %ecx // Must be %ecx for _load_partial_block + .set AADLEN64, %rcx // Zero-extend AADLEN before using! + + // Additional local variables. + // %rax and %r8 are used as temporary registers. + .set TMP0, %ymm0 + .set TMP0_XMM, %xmm0 + .set TMP1, %ymm1 + .set TMP1_XMM, %xmm1 + .set TMP2, %ymm2 + .set TMP2_XMM, %xmm2 + .set LO, %ymm3 + .set LO_XMM, %xmm3 + .set MI, %ymm4 + .set MI_XMM, %xmm4 + .set GHASH_ACC, %ymm5 + .set GHASH_ACC_XMM, %xmm5 + .set BSWAP_MASK, %ymm6 + .set BSWAP_MASK_XMM, %xmm6 + .set GFPOLY, %ymm7 + .set GFPOLY_XMM, %xmm7 + .set H_POW2_XORED, %ymm8 + .set H_POW1_XORED, %ymm9 + + // Load the bswap_mask and gfpoly constants. Since AADLEN is usually + // small, usually only 128-bit vectors will be used. So as an + // optimization, don't broadcast these constants to both 128-bit lanes + // quite yet. + vmovdqu .Lbswap_mask(%rip), BSWAP_MASK_XMM + vmovdqu .Lgfpoly(%rip), GFPOLY_XMM + + // Load the GHASH accumulator. + vmovdqu (GHASH_ACC_PTR), GHASH_ACC_XMM + + // Check for the common case of AADLEN <= 16, as well as AADLEN == 0. + test AADLEN, AADLEN + jz .Laad_done + cmp $16, AADLEN + jle .Laad_lastblock + + // AADLEN > 16, so we'll operate on full vectors. Broadcast bswap_mask + // and gfpoly to both 128-bit lanes. + vinserti128 $1, BSWAP_MASK_XMM, BSWAP_MASK, BSWAP_MASK + vinserti128 $1, GFPOLY_XMM, GFPOLY, GFPOLY + + // If AADLEN >= 128, update GHASH with 128 bytes of AAD at a time. + add $-128, AADLEN // 128 is 4 bytes, -128 is 1 byte + jl .Laad_loop_4x_done + vmovdqu OFFSETOF_H_POWERS_XORED(KEY), H_POW2_XORED + vmovdqu OFFSETOF_H_POWERS_XORED+32(KEY), H_POW1_XORED +.Laad_loop_4x: + _ghash_4x AAD + sub $-128, AAD + add $-128, AADLEN + jge .Laad_loop_4x +.Laad_loop_4x_done: + + // If AADLEN >= 32, update GHASH with 32 bytes of AAD at a time. + add $96, AADLEN + jl .Laad_loop_1x_done +.Laad_loop_1x: + vmovdqu (AAD), TMP0 + vpshufb BSWAP_MASK, TMP0, TMP0 + vpxor TMP0, GHASH_ACC, GHASH_ACC + vmovdqu OFFSETOFEND_H_POWERS-32(KEY), TMP0 + _ghash_mul TMP0, GHASH_ACC, GHASH_ACC, GFPOLY, TMP1, TMP2, LO + vextracti128 $1, GHASH_ACC, TMP0_XMM + vpxor TMP0_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + add $32, AAD + sub $32, AADLEN + jge .Laad_loop_1x +.Laad_loop_1x_done: + add $32, AADLEN + // Now 0 <= AADLEN < 32. + + jz .Laad_done + cmp $16, AADLEN + jle .Laad_lastblock + + // Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD. + mov AADLEN, AADLEN // Zero-extend AADLEN to AADLEN64. + vmovdqu (AAD), TMP0_XMM + vmovdqu -16(AAD, AADLEN64), TMP1_XMM + vpshufb BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM + vpxor TMP0_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + lea .Lrshift_and_bswap_table(%rip), %rax + vpshufb -16(%rax, AADLEN64), TMP1_XMM, TMP1_XMM + vinserti128 $1, TMP1_XMM, GHASH_ACC, GHASH_ACC + vmovdqu OFFSETOFEND_H_POWERS-32(KEY), TMP0 + _ghash_mul TMP0, GHASH_ACC, GHASH_ACC, GFPOLY, TMP1, TMP2, LO + vextracti128 $1, GHASH_ACC, TMP0_XMM + vpxor TMP0_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + jmp .Laad_done + +.Laad_lastblock: + // Update GHASH with the remaining 1 <= AADLEN <= 16 bytes of AAD. + _load_partial_block AAD, TMP0_XMM, %r8, %r8d + vpshufb BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM + vpxor TMP0_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + vmovdqu OFFSETOFEND_H_POWERS-16(KEY), TMP0_XMM + _ghash_mul TMP0_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM, GFPOLY_XMM, \ + TMP1_XMM, TMP2_XMM, LO_XMM + +.Laad_done: + // Store the updated GHASH accumulator back to memory. + vmovdqu GHASH_ACC_XMM, (GHASH_ACC_PTR) + + vzeroupper + RET +SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2) + +// Do one non-last round of AES encryption on the blocks in the given AESDATA +// vectors using the round key that has been broadcast to all 128-bit lanes of +// \round_key. +.macro _vaesenc round_key, vecs:vararg +.irp i, \vecs + vaesenc \round_key, AESDATA\i, AESDATA\i +.endr +.endm + +// Generate counter blocks in the given AESDATA vectors, then do the zero-th AES +// round on them. Clobbers TMP0. +.macro _ctr_begin vecs:vararg + vbroadcasti128 .Linc_2blocks(%rip), TMP0 +.irp i, \vecs + vpshufb BSWAP_MASK, LE_CTR, AESDATA\i + vpaddd TMP0, LE_CTR, LE_CTR +.endr +.irp i, \vecs + vpxor RNDKEY0, AESDATA\i, AESDATA\i +.endr +.endm + +// Generate and encrypt counter blocks in the given AESDATA vectors, excluding +// the last AES round. Clobbers %rax and TMP0. +.macro _aesenc_loop vecs:vararg + _ctr_begin \vecs + lea 16(KEY), %rax +.Laesenc_loop\@: + vbroadcasti128 (%rax), TMP0 + _vaesenc TMP0, \vecs + add $16, %rax + cmp %rax, RNDKEYLAST_PTR + jne .Laesenc_loop\@ +.endm + +// Finalize the keystream blocks in the given AESDATA vectors by doing the last +// AES round, then XOR those keystream blocks with the corresponding data. +// Reduce latency by doing the XOR before the vaesenclast, utilizing the +// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). Clobbers TMP0. +.macro _aesenclast_and_xor vecs:vararg +.irp i, \vecs + vpxor \i*32(SRC), RNDKEYLAST, TMP0 + vaesenclast TMP0, AESDATA\i, AESDATA\i +.endr +.irp i, \vecs + vmovdqu AESDATA\i, \i*32(DST) +.endr +.endm + +// void aes_gcm_{enc,dec}_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, +// const u32 le_ctr[4], u8 ghash_acc[16], +// const u8 *src, u8 *dst, int datalen); +// +// This macro generates a GCM encryption or decryption update function with the +// above prototype (with \enc selecting which one). The function computes the +// next portion of the CTR keystream, XOR's it with |datalen| bytes from |src|, +// and writes the resulting encrypted or decrypted data to |dst|. It also +// updates the GHASH accumulator |ghash_acc| using the next |datalen| ciphertext +// bytes. +// +// |datalen| must be a multiple of 16, except on the last call where it can be +// any length. The caller must do any buffering needed to ensure this. Both +// in-place and out-of-place en/decryption are supported. +// +// |le_ctr| must give the current counter in little-endian format. This +// function loads the counter from |le_ctr| and increments the loaded counter as +// needed, but it does *not* store the updated counter back to |le_ctr|. The +// caller must update |le_ctr| if any more data segments follow. Internally, +// only the low 32-bit word of the counter is incremented, following the GCM +// standard. +.macro _aes_gcm_update enc + + // Function arguments + .set KEY, %rdi + .set LE_CTR_PTR, %rsi + .set LE_CTR_PTR32, %esi + .set GHASH_ACC_PTR, %rdx + .set SRC, %rcx // Assumed to be %rcx. + // See .Ltail_xor_and_ghash_1to16bytes + .set DST, %r8 + .set DATALEN, %r9d + .set DATALEN64, %r9 // Zero-extend DATALEN before using! + + // Additional local variables + + // %rax is used as a temporary register. LE_CTR_PTR is also available + // as a temporary register after the counter is loaded. + + // AES key length in bytes + .set AESKEYLEN, %r10d + .set AESKEYLEN64, %r10 + + // Pointer to the last AES round key for the chosen AES variant + .set RNDKEYLAST_PTR, %r11 + + // BSWAP_MASK is the shuffle mask for byte-reflecting 128-bit values + // using vpshufb, copied to all 128-bit lanes. + .set BSWAP_MASK, %ymm0 + .set BSWAP_MASK_XMM, %xmm0 + + // GHASH_ACC is the accumulator variable for GHASH. When fully reduced, + // only the lowest 128-bit lane can be nonzero. When not fully reduced, + // more than one lane may be used, and they need to be XOR'd together. + .set GHASH_ACC, %ymm1 + .set GHASH_ACC_XMM, %xmm1 + + // TMP[0-2] are temporary registers. + .set TMP0, %ymm2 + .set TMP0_XMM, %xmm2 + .set TMP1, %ymm3 + .set TMP1_XMM, %xmm3 + .set TMP2, %ymm4 + .set TMP2_XMM, %xmm4 + + // LO and MI are used to accumulate unreduced GHASH products. + .set LO, %ymm5 + .set LO_XMM, %xmm5 + .set MI, %ymm6 + .set MI_XMM, %xmm6 + + // H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored. The + // descending numbering reflects the order of the key powers. + .set H_POW2_XORED, %ymm7 + .set H_POW2_XORED_XMM, %xmm7 + .set H_POW1_XORED, %ymm8 + + // RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one. + .set RNDKEY0, %ymm9 + .set RNDKEYLAST, %ymm10 + + // LE_CTR contains the next set of little-endian counter blocks. + .set LE_CTR, %ymm11 + + // AESDATA[0-3] hold the counter blocks that are being encrypted by AES. + .set AESDATA0, %ymm12 + .set AESDATA0_XMM, %xmm12 + .set AESDATA1, %ymm13 + .set AESDATA1_XMM, %xmm13 + .set AESDATA2, %ymm14 + .set AESDATA3, %ymm15 + +.if \enc + .set GHASHDATA_PTR, DST +.else + .set GHASHDATA_PTR, SRC +.endif + + vbroadcasti128 .Lbswap_mask(%rip), BSWAP_MASK + + // Load the GHASH accumulator and the starting counter. + vmovdqu (GHASH_ACC_PTR), GHASH_ACC_XMM + vbroadcasti128 (LE_CTR_PTR), LE_CTR + + // Load the AES key length in bytes. + movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN + + // Make RNDKEYLAST_PTR point to the last AES round key. This is the + // round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256 + // respectively. Then load the zero-th and last round keys. + lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR + vbroadcasti128 (KEY), RNDKEY0 + vbroadcasti128 (RNDKEYLAST_PTR), RNDKEYLAST + + // Finish initializing LE_CTR by adding 1 to the second block. + vpaddd .Lctr_pattern(%rip), LE_CTR, LE_CTR + + // If there are at least 128 bytes of data, then continue into the loop + // that processes 128 bytes of data at a time. Otherwise skip it. + add $-128, DATALEN // 128 is 4 bytes, -128 is 1 byte + jl .Lcrypt_loop_4x_done\@ + + vmovdqu OFFSETOF_H_POWERS_XORED(KEY), H_POW2_XORED + vmovdqu OFFSETOF_H_POWERS_XORED+32(KEY), H_POW1_XORED + + // Main loop: en/decrypt and hash 4 vectors (128 bytes) at a time. + +.if \enc + // Encrypt the first 4 vectors of plaintext blocks. + _aesenc_loop 0,1,2,3 + _aesenclast_and_xor 0,1,2,3 + sub $-128, SRC // 128 is 4 bytes, -128 is 1 byte + add $-128, DATALEN + jl .Lghash_last_ciphertext_4x\@ +.endif + +.align 16 +.Lcrypt_loop_4x\@: + + // Start the AES encryption of the counter blocks. + _ctr_begin 0,1,2,3 + cmp $24, AESKEYLEN + jl 128f // AES-128? + je 192f // AES-192? + // AES-256 + vbroadcasti128 -13*16(RNDKEYLAST_PTR), TMP0 + _vaesenc TMP0, 0,1,2,3 + vbroadcasti128 -12*16(RNDKEYLAST_PTR), TMP0 + _vaesenc TMP0, 0,1,2,3 +192: + vbroadcasti128 -11*16(RNDKEYLAST_PTR), TMP0 + _vaesenc TMP0, 0,1,2,3 + vbroadcasti128 -10*16(RNDKEYLAST_PTR), TMP0 + _vaesenc TMP0, 0,1,2,3 +128: + + // Finish the AES encryption of the counter blocks in AESDATA[0-3], + // interleaved with the GHASH update of the ciphertext blocks. +.irp i, 9,8,7,6,5,4,3,2,1 + _ghash_step_4x (9 - \i), GHASHDATA_PTR + vbroadcasti128 -\i*16(RNDKEYLAST_PTR), TMP0 + _vaesenc TMP0, 0,1,2,3 +.endr + _ghash_step_4x 9, GHASHDATA_PTR +.if \enc + sub $-128, DST // 128 is 4 bytes, -128 is 1 byte +.endif + _aesenclast_and_xor 0,1,2,3 + sub $-128, SRC +.if !\enc + sub $-128, DST +.endif + add $-128, DATALEN + jge .Lcrypt_loop_4x\@ + +.if \enc +.Lghash_last_ciphertext_4x\@: + // Update GHASH with the last set of ciphertext blocks. + _ghash_4x DST + sub $-128, DST +.endif + +.Lcrypt_loop_4x_done\@: + + // Undo the extra subtraction by 128 and check whether data remains. + sub $-128, DATALEN // 128 is 4 bytes, -128 is 1 byte + jz .Ldone\@ + + // The data length isn't a multiple of 128 bytes. Process the remaining + // data of length 1 <= DATALEN < 128. + // + // Since there are enough key powers available for all remaining data, + // there is no need to do a GHASH reduction after each iteration. + // Instead, multiply each remaining block by its own key power, and only + // do a GHASH reduction at the very end. + + // Make POWERS_PTR point to the key powers [H^N, H^(N-1), ...] where N + // is the number of blocks that remain. + .set POWERS_PTR, LE_CTR_PTR // LE_CTR_PTR is free to be reused. + .set POWERS_PTR32, LE_CTR_PTR32 + mov DATALEN, %eax + neg %rax + and $~15, %rax // -round_up(DATALEN, 16) + lea OFFSETOFEND_H_POWERS(KEY,%rax), POWERS_PTR + + // Start collecting the unreduced GHASH intermediate value LO, MI, HI. + .set HI, H_POW2_XORED // H_POW2_XORED is free to be reused. + .set HI_XMM, H_POW2_XORED_XMM + vpxor LO_XMM, LO_XMM, LO_XMM + vpxor MI_XMM, MI_XMM, MI_XMM + vpxor HI_XMM, HI_XMM, HI_XMM + + // 1 <= DATALEN < 128. Generate 2 or 4 more vectors of keystream blocks + // excluding the last AES round, depending on the remaining DATALEN. + cmp $64, DATALEN + jg .Ltail_gen_4_keystream_vecs\@ + _aesenc_loop 0,1 + cmp $32, DATALEN + jge .Ltail_xor_and_ghash_full_vec_loop\@ + jmp .Ltail_xor_and_ghash_partial_vec\@ +.Ltail_gen_4_keystream_vecs\@: + _aesenc_loop 0,1,2,3 + + // XOR the remaining data and accumulate the unreduced GHASH products + // for DATALEN >= 32, starting with one full 32-byte vector at a time. +.Ltail_xor_and_ghash_full_vec_loop\@: +.if \enc + _aesenclast_and_xor 0 + vpshufb BSWAP_MASK, AESDATA0, AESDATA0 +.else + vmovdqu (SRC), TMP1 + vpxor TMP1, RNDKEYLAST, TMP0 + vaesenclast TMP0, AESDATA0, AESDATA0 + vmovdqu AESDATA0, (DST) + vpshufb BSWAP_MASK, TMP1, AESDATA0 +.endif + // The ciphertext blocks (i.e. GHASH input data) are now in AESDATA0. + vpxor GHASH_ACC, AESDATA0, AESDATA0 + vmovdqu (POWERS_PTR), TMP2 + _ghash_mul_noreduce TMP2, AESDATA0, LO, MI, HI, TMP0 + vmovdqa AESDATA1, AESDATA0 + vmovdqa AESDATA2, AESDATA1 + vmovdqa AESDATA3, AESDATA2 + vpxor GHASH_ACC_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + add $32, SRC + add $32, DST + add $32, POWERS_PTR + sub $32, DATALEN + cmp $32, DATALEN + jge .Ltail_xor_and_ghash_full_vec_loop\@ + test DATALEN, DATALEN + jz .Ltail_ghash_reduce\@ + +.Ltail_xor_and_ghash_partial_vec\@: + // XOR the remaining data and accumulate the unreduced GHASH products, + // for 1 <= DATALEN < 32. + vaesenclast RNDKEYLAST, AESDATA0, AESDATA0 + cmp $16, DATALEN + jle .Ltail_xor_and_ghash_1to16bytes\@ + + // Handle 17 <= DATALEN < 32. + + // Load a vpshufb mask that will right-shift by '32 - DATALEN' bytes + // (shifting in zeroes), then reflect all 16 bytes. + lea .Lrshift_and_bswap_table(%rip), %rax + vmovdqu -16(%rax, DATALEN64), TMP2_XMM + + // Move the second keystream block to its own register and left-align it + vextracti128 $1, AESDATA0, AESDATA1_XMM + vpxor .Lfifteens(%rip), TMP2_XMM, TMP0_XMM + vpshufb TMP0_XMM, AESDATA1_XMM, AESDATA1_XMM + + // Using overlapping loads and stores, XOR the source data with the + // keystream and write the destination data. Then prepare the GHASH + // input data: the full ciphertext block and the zero-padded partial + // ciphertext block, both byte-reflected, in AESDATA0. +.if \enc + vpxor -16(SRC, DATALEN64), AESDATA1_XMM, AESDATA1_XMM + vpxor (SRC), AESDATA0_XMM, AESDATA0_XMM + vmovdqu AESDATA1_XMM, -16(DST, DATALEN64) + vmovdqu AESDATA0_XMM, (DST) + vpshufb TMP2_XMM, AESDATA1_XMM, AESDATA1_XMM + vpshufb BSWAP_MASK_XMM, AESDATA0_XMM, AESDATA0_XMM +.else + vmovdqu -16(SRC, DATALEN64), TMP1_XMM + vmovdqu (SRC), TMP0_XMM + vpxor TMP1_XMM, AESDATA1_XMM, AESDATA1_XMM + vpxor TMP0_XMM, AESDATA0_XMM, AESDATA0_XMM + vmovdqu AESDATA1_XMM, -16(DST, DATALEN64) + vmovdqu AESDATA0_XMM, (DST) + vpshufb TMP2_XMM, TMP1_XMM, AESDATA1_XMM + vpshufb BSWAP_MASK_XMM, TMP0_XMM, AESDATA0_XMM +.endif + vpxor GHASH_ACC_XMM, AESDATA0_XMM, AESDATA0_XMM + vinserti128 $1, AESDATA1_XMM, AESDATA0, AESDATA0 + vmovdqu (POWERS_PTR), TMP2 + jmp .Ltail_ghash_last_vec\@ + +.Ltail_xor_and_ghash_1to16bytes\@: + // Handle 1 <= DATALEN <= 16. Carefully load and store the + // possibly-partial block, which we mustn't access out of bounds. + vmovdqu (POWERS_PTR), TMP2_XMM + mov SRC, KEY // Free up %rcx, assuming SRC == %rcx + mov DATALEN, %ecx + _load_partial_block KEY, TMP0_XMM, POWERS_PTR, POWERS_PTR32 + vpxor TMP0_XMM, AESDATA0_XMM, AESDATA0_XMM + mov DATALEN, %ecx + _store_partial_block AESDATA0_XMM, DST, POWERS_PTR, POWERS_PTR32 +.if \enc + lea .Lselect_high_bytes_table(%rip), %rax + vpshufb BSWAP_MASK_XMM, AESDATA0_XMM, AESDATA0_XMM + vpand (%rax, DATALEN64), AESDATA0_XMM, AESDATA0_XMM +.else + vpshufb BSWAP_MASK_XMM, TMP0_XMM, AESDATA0_XMM +.endif + vpxor GHASH_ACC_XMM, AESDATA0_XMM, AESDATA0_XMM + +.Ltail_ghash_last_vec\@: + // Accumulate the unreduced GHASH products for the last 1-2 blocks. The + // GHASH input data is in AESDATA0. If only one block remains, then the + // second block in AESDATA0 is zero and does not affect the result. + _ghash_mul_noreduce TMP2, AESDATA0, LO, MI, HI, TMP0 + +.Ltail_ghash_reduce\@: + // Finally, do the GHASH reduction. + vbroadcasti128 .Lgfpoly(%rip), TMP0 + _ghash_reduce LO, MI, HI, TMP0, TMP1 + vextracti128 $1, HI, GHASH_ACC_XMM + vpxor HI_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + +.Ldone\@: + // Store the updated GHASH accumulator back to memory. + vmovdqu GHASH_ACC_XMM, (GHASH_ACC_PTR) + + vzeroupper + RET +.endm + +// void aes_gcm_enc_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, +// const u32 le_ctr[4], u8 ghash_acc[16], +// u64 total_aadlen, u64 total_datalen); +// bool aes_gcm_dec_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, +// const u32 le_ctr[4], const u8 ghash_acc[16], +// u64 total_aadlen, u64 total_datalen, +// const u8 tag[16], int taglen); +// +// This macro generates one of the above two functions (with \enc selecting +// which one). Both functions finish computing the GCM authentication tag by +// updating GHASH with the lengths block and encrypting the GHASH accumulator. +// |total_aadlen| and |total_datalen| must be the total length of the additional +// authenticated data and the en/decrypted data in bytes, respectively. +// +// The encryption function then stores the full-length (16-byte) computed +// authentication tag to |ghash_acc|. The decryption function instead loads the +// expected authentication tag (the one that was transmitted) from the 16-byte +// buffer |tag|, compares the first 4 <= |taglen| <= 16 bytes of it to the +// computed tag in constant time, and returns true if and only if they match. +.macro _aes_gcm_final enc + + // Function arguments + .set KEY, %rdi + .set LE_CTR_PTR, %rsi + .set GHASH_ACC_PTR, %rdx + .set TOTAL_AADLEN, %rcx + .set TOTAL_DATALEN, %r8 + .set TAG, %r9 + .set TAGLEN, %r10d // Originally at 8(%rsp) + .set TAGLEN64, %r10 + + // Additional local variables. + // %rax and %xmm0-%xmm3 are used as temporary registers. + .set AESKEYLEN, %r11d + .set AESKEYLEN64, %r11 + .set GFPOLY, %xmm4 + .set BSWAP_MASK, %xmm5 + .set LE_CTR, %xmm6 + .set GHASH_ACC, %xmm7 + .set H_POW1, %xmm8 + + // Load some constants. + vmovdqa .Lgfpoly(%rip), GFPOLY + vmovdqa .Lbswap_mask(%rip), BSWAP_MASK + + // Load the AES key length in bytes. + movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN + + // Set up a counter block with 1 in the low 32-bit word. This is the + // counter that produces the ciphertext needed to encrypt the auth tag. + // GFPOLY has 1 in the low word, so grab the 1 from there using a blend. + vpblendd $0xe, (LE_CTR_PTR), GFPOLY, LE_CTR + + // Build the lengths block and XOR it with the GHASH accumulator. + // Although the lengths block is defined as the AAD length followed by + // the en/decrypted data length, both in big-endian byte order, a byte + // reflection of the full block is needed because of the way we compute + // GHASH (see _ghash_mul_step). By using little-endian values in the + // opposite order, we avoid having to reflect any bytes here. + vmovq TOTAL_DATALEN, %xmm0 + vpinsrq $1, TOTAL_AADLEN, %xmm0, %xmm0 + vpsllq $3, %xmm0, %xmm0 // Bytes to bits + vpxor (GHASH_ACC_PTR), %xmm0, GHASH_ACC + + // Load the first hash key power (H^1), which is stored last. + vmovdqu OFFSETOFEND_H_POWERS-16(KEY), H_POW1 + + // Load TAGLEN if decrypting. +.if !\enc + movl 8(%rsp), TAGLEN +.endif + + // Make %rax point to the last AES round key for the chosen AES variant. + lea 6*16(KEY,AESKEYLEN64,4), %rax + + // Start the AES encryption of the counter block by swapping the counter + // block to big-endian and XOR-ing it with the zero-th AES round key. + vpshufb BSWAP_MASK, LE_CTR, %xmm0 + vpxor (KEY), %xmm0, %xmm0 + + // Complete the AES encryption and multiply GHASH_ACC by H^1. + // Interleave the AES and GHASH instructions to improve performance. + cmp $24, AESKEYLEN + jl 128f // AES-128? + je 192f // AES-192? + // AES-256 + vaesenc -13*16(%rax), %xmm0, %xmm0 + vaesenc -12*16(%rax), %xmm0, %xmm0 +192: + vaesenc -11*16(%rax), %xmm0, %xmm0 + vaesenc -10*16(%rax), %xmm0, %xmm0 +128: +.irp i, 0,1,2,3,4,5,6,7,8 + _ghash_mul_step \i, H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ + %xmm1, %xmm2, %xmm3 + vaesenc (\i-9)*16(%rax), %xmm0, %xmm0 +.endr + _ghash_mul_step 9, H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ + %xmm1, %xmm2, %xmm3 + + // Undo the byte reflection of the GHASH accumulator. + vpshufb BSWAP_MASK, GHASH_ACC, GHASH_ACC + + // Do the last AES round and XOR the resulting keystream block with the + // GHASH accumulator to produce the full computed authentication tag. + // + // Reduce latency by taking advantage of the property vaesenclast(key, + // a) ^ b == vaesenclast(key ^ b, a). I.e., XOR GHASH_ACC into the last + // round key, instead of XOR'ing the final AES output with GHASH_ACC. + // + // enc_final then returns the computed auth tag, while dec_final + // compares it with the transmitted one and returns a bool. To compare + // the tags, dec_final XORs them together and uses vptest to check + // whether the result is all-zeroes. This should be constant-time. + // dec_final applies the vaesenclast optimization to this additional + // value XOR'd too. +.if \enc + vpxor (%rax), GHASH_ACC, %xmm1 + vaesenclast %xmm1, %xmm0, GHASH_ACC + vmovdqu GHASH_ACC, (GHASH_ACC_PTR) +.else + vpxor (TAG), GHASH_ACC, GHASH_ACC + vpxor (%rax), GHASH_ACC, GHASH_ACC + vaesenclast GHASH_ACC, %xmm0, %xmm0 + lea .Lselect_high_bytes_table(%rip), %rax + vmovdqu (%rax, TAGLEN64), %xmm1 + vpshufb BSWAP_MASK, %xmm1, %xmm1 // select low bytes, not high + xor %eax, %eax + vptest %xmm1, %xmm0 + sete %al +.endif + // No need for vzeroupper here, since only used xmm registers were used. + RET +.endm + +SYM_FUNC_START(aes_gcm_enc_update_vaes_avx2) + _aes_gcm_update 1 +SYM_FUNC_END(aes_gcm_enc_update_vaes_avx2) +SYM_FUNC_START(aes_gcm_dec_update_vaes_avx2) + _aes_gcm_update 0 +SYM_FUNC_END(aes_gcm_dec_update_vaes_avx2) + +SYM_FUNC_START(aes_gcm_enc_final_vaes_avx2) + _aes_gcm_final 1 +SYM_FUNC_END(aes_gcm_enc_final_vaes_avx2) +SYM_FUNC_START(aes_gcm_dec_final_vaes_avx2) + _aes_gcm_final 0 +SYM_FUNC_END(aes_gcm_dec_final_vaes_avx2) diff --git a/arch/x86/crypto/aes-gcm-avx10-x86_64.S b/arch/x86/crypto/aes-gcm-vaes-avx512.S index 02ee11083d4f..06b71314d65c 100644 --- a/arch/x86/crypto/aes-gcm-avx10-x86_64.S +++ b/arch/x86/crypto/aes-gcm-vaes-avx512.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ // -// VAES and VPCLMULQDQ optimized AES-GCM for x86_64 +// AES-GCM implementation for x86_64 CPUs that support the following CPU +// features: VAES && VPCLMULQDQ && AVX512BW && AVX512VL && BMI2 // // Copyright 2024 Google LLC // @@ -45,41 +46,6 @@ // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ -// -// This file implements AES-GCM (Galois/Counter Mode) for x86_64 CPUs that -// support VAES (vector AES), VPCLMULQDQ (vector carryless multiplication), and -// either AVX512 or AVX10. Some of the functions, notably the encryption and -// decryption update functions which are the most performance-critical, are -// provided in two variants generated from a macro: one using 256-bit vectors -// (suffix: vaes_avx10_256) and one using 512-bit vectors (vaes_avx10_512). The -// other, "shared" functions (vaes_avx10) use at most 256-bit vectors. -// -// The functions that use 512-bit vectors are intended for CPUs that support -// 512-bit vectors *and* where using them doesn't cause significant -// downclocking. They require the following CPU features: -// -// VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/512) -// -// The other functions require the following CPU features: -// -// VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/256) -// -// All functions use the "System V" ABI. The Windows ABI is not supported. -// -// Note that we use "avx10" in the names of the functions as a shorthand to -// really mean "AVX10 or a certain set of AVX512 features". Due to Intel's -// introduction of AVX512 and then its replacement by AVX10, there doesn't seem -// to be a simple way to name things that makes sense on all CPUs. -// -// Note that the macros that support both 256-bit and 512-bit vectors could -// fairly easily be changed to support 128-bit too. However, this would *not* -// be sufficient to allow the code to run on CPUs without AVX512 or AVX10, -// because the code heavily uses several features of these extensions other than -// the vector length: the increase in the number of SIMD registers from 16 to -// 32, masking support, and new instructions such as vpternlogd (which can do a -// three-argument XOR). These features are very useful for AES-GCM. #include <linux/linkage.h> @@ -104,16 +70,14 @@ .Lgfpoly_and_internal_carrybit: .octa 0xc2000000000000010000000000000001 - // The below constants are used for incrementing the counter blocks. - // ctr_pattern points to the four 128-bit values [0, 1, 2, 3]. - // inc_2blocks and inc_4blocks point to the single 128-bit values 2 and - // 4. Note that the same '2' is reused in ctr_pattern and inc_2blocks. + // Values needed to prepare the initial vector of counter blocks. .Lctr_pattern: .octa 0 .octa 1 -.Linc_2blocks: .octa 2 .octa 3 + + // The number of AES blocks per vector, as a 128-bit value. .Linc_4blocks: .octa 4 @@ -130,29 +94,13 @@ // Offset to end of hash key powers array in the key struct. // // This is immediately followed by three zeroized padding blocks, which are -// included so that partial vectors can be handled more easily. E.g. if VL=64 -// and two blocks remain, we load the 4 values [H^2, H^1, 0, 0]. The most -// padding blocks needed is 3, which occurs if [H^1, 0, 0, 0] is loaded. +// included so that partial vectors can be handled more easily. E.g. if two +// blocks remain, we load the 4 values [H^2, H^1, 0, 0]. The most padding +// blocks needed is 3, which occurs if [H^1, 0, 0, 0] is loaded. #define OFFSETOFEND_H_POWERS (OFFSETOF_H_POWERS + (NUM_H_POWERS * 16)) .text -// Set the vector length in bytes. This sets the VL variable and defines -// register aliases V0-V31 that map to the ymm or zmm registers. -.macro _set_veclen vl - .set VL, \vl -.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 -.if VL == 32 - .set V\i, %ymm\i -.elseif VL == 64 - .set V\i, %zmm\i -.else - .error "Unsupported vector length" -.endif -.endr -.endm - // The _ghash_mul_step macro does one step of GHASH multiplication of the // 128-bit lanes of \a by the corresponding 128-bit lanes of \b and storing the // reduced products in \dst. \t0, \t1, and \t2 are temporary registers of the @@ -312,39 +260,44 @@ vpternlogd $0x96, \t0, \mi, \hi .endm -// void aes_gcm_precompute_##suffix(struct aes_gcm_key_avx10 *key); -// -// Given the expanded AES key |key->aes_key|, this function derives the GHASH -// subkey and initializes |key->ghash_key_powers| with powers of it. -// -// The number of key powers initialized is NUM_H_POWERS, and they are stored in -// the order H^NUM_H_POWERS to H^1. The zeroized padding blocks after the key -// powers themselves are also initialized. +// This is a specialized version of _ghash_mul that computes \a * \a, i.e. it +// squares \a. It skips computing MI = (a_L * a_H) + (a_H * a_L) = 0. +.macro _ghash_square a, dst, gfpoly, t0, t1 + vpclmulqdq $0x00, \a, \a, \t0 // LO = a_L * a_L + vpclmulqdq $0x11, \a, \a, \dst // HI = a_H * a_H + vpclmulqdq $0x01, \t0, \gfpoly, \t1 // LO_L*(x^63 + x^62 + x^57) + vpshufd $0x4e, \t0, \t0 // Swap halves of LO + vpxord \t0, \t1, \t1 // Fold LO into MI + vpclmulqdq $0x01, \t1, \gfpoly, \t0 // MI_L*(x^63 + x^62 + x^57) + vpshufd $0x4e, \t1, \t1 // Swap halves of MI + vpternlogd $0x96, \t0, \t1, \dst // Fold MI into HI +.endm + +// void aes_gcm_precompute_vaes_avx512(struct aes_gcm_key_vaes_avx512 *key); // -// This macro supports both VL=32 and VL=64. _set_veclen must have been invoked -// with the desired length. In the VL=32 case, the function computes twice as -// many key powers than are actually used by the VL=32 GCM update functions. -// This is done to keep the key format the same regardless of vector length. -.macro _aes_gcm_precompute +// Given the expanded AES key |key->base.aes_key|, derive the GHASH subkey and +// initialize |key->h_powers| and |key->padding|. +SYM_FUNC_START(aes_gcm_precompute_vaes_avx512) // Function arguments .set KEY, %rdi - // Additional local variables. V0-V2 and %rax are used as temporaries. + // Additional local variables. + // %zmm[0-2] and %rax are used as temporaries. .set POWERS_PTR, %rsi .set RNDKEYLAST_PTR, %rdx - .set H_CUR, V3 + .set H_CUR, %zmm3 .set H_CUR_YMM, %ymm3 .set H_CUR_XMM, %xmm3 - .set H_INC, V4 + .set H_INC, %zmm4 .set H_INC_YMM, %ymm4 .set H_INC_XMM, %xmm4 - .set GFPOLY, V5 + .set GFPOLY, %zmm5 .set GFPOLY_YMM, %ymm5 .set GFPOLY_XMM, %xmm5 // Get pointer to lowest set of key powers (located at end of array). - lea OFFSETOFEND_H_POWERS-VL(KEY), POWERS_PTR + lea OFFSETOFEND_H_POWERS-64(KEY), POWERS_PTR // Encrypt an all-zeroes block to get the raw hash subkey. movl OFFSETOF_AESKEYLEN(KEY), %eax @@ -363,8 +316,8 @@ // Zeroize the padding blocks. vpxor %xmm0, %xmm0, %xmm0 - vmovdqu %ymm0, VL(POWERS_PTR) - vmovdqu %xmm0, VL+2*16(POWERS_PTR) + vmovdqu %ymm0, 64(POWERS_PTR) + vmovdqu %xmm0, 64+2*16(POWERS_PTR) // Finish preprocessing the first key power, H^1. Since this GHASH // implementation operates directly on values with the backwards bit @@ -397,54 +350,44 @@ // special needs to be done to make this happen, though: H^1 * H^1 would // end up with two factors of x^-1, but the multiplication consumes one. // So the product H^2 ends up with the desired one factor of x^-1. - _ghash_mul H_CUR_XMM, H_CUR_XMM, H_INC_XMM, GFPOLY_XMM, \ - %xmm0, %xmm1, %xmm2 + _ghash_square H_CUR_XMM, H_INC_XMM, GFPOLY_XMM, %xmm0, %xmm1 // Create H_CUR_YMM = [H^2, H^1] and H_INC_YMM = [H^2, H^2]. vinserti128 $1, H_CUR_XMM, H_INC_YMM, H_CUR_YMM vinserti128 $1, H_INC_XMM, H_INC_YMM, H_INC_YMM -.if VL == 64 // Create H_CUR = [H^4, H^3, H^2, H^1] and H_INC = [H^4, H^4, H^4, H^4]. _ghash_mul H_INC_YMM, H_CUR_YMM, H_INC_YMM, GFPOLY_YMM, \ %ymm0, %ymm1, %ymm2 vinserti64x4 $1, H_CUR_YMM, H_INC, H_CUR vshufi64x2 $0, H_INC, H_INC, H_INC -.endif // Store the lowest set of key powers. vmovdqu8 H_CUR, (POWERS_PTR) - // Compute and store the remaining key powers. With VL=32, repeatedly - // multiply [H^(i+1), H^i] by [H^2, H^2] to get [H^(i+3), H^(i+2)]. - // With VL=64, repeatedly multiply [H^(i+3), H^(i+2), H^(i+1), H^i] by + // Compute and store the remaining key powers. + // Repeatedly multiply [H^(i+3), H^(i+2), H^(i+1), H^i] by // [H^4, H^4, H^4, H^4] to get [H^(i+7), H^(i+6), H^(i+5), H^(i+4)]. - mov $(NUM_H_POWERS*16/VL) - 1, %eax -.Lprecompute_next\@: - sub $VL, POWERS_PTR - _ghash_mul H_INC, H_CUR, H_CUR, GFPOLY, V0, V1, V2 + mov $3, %eax +.Lprecompute_next: + sub $64, POWERS_PTR + _ghash_mul H_INC, H_CUR, H_CUR, GFPOLY, %zmm0, %zmm1, %zmm2 vmovdqu8 H_CUR, (POWERS_PTR) dec %eax - jnz .Lprecompute_next\@ + jnz .Lprecompute_next vzeroupper // This is needed after using ymm or zmm registers. RET -.endm +SYM_FUNC_END(aes_gcm_precompute_vaes_avx512) // XOR together the 128-bit lanes of \src (whose low lane is \src_xmm) and store // the result in \dst_xmm. This implicitly zeroizes the other lanes of dst. .macro _horizontal_xor src, src_xmm, dst_xmm, t0_xmm, t1_xmm, t2_xmm vextracti32x4 $1, \src, \t0_xmm -.if VL == 32 - vpxord \t0_xmm, \src_xmm, \dst_xmm -.elseif VL == 64 vextracti32x4 $2, \src, \t1_xmm vextracti32x4 $3, \src, \t2_xmm vpxord \t0_xmm, \src_xmm, \dst_xmm vpternlogd $0x96, \t1_xmm, \t2_xmm, \dst_xmm -.else - .error "Unsupported vector length" -.endif .endm // Do one step of the GHASH update of the data blocks given in the vector @@ -458,25 +401,21 @@ // // The GHASH update does: GHASH_ACC = H_POW4*(GHASHDATA0 + GHASH_ACC) + // H_POW3*GHASHDATA1 + H_POW2*GHASHDATA2 + H_POW1*GHASHDATA3, where the -// operations are vectorized operations on vectors of 16-byte blocks. E.g., -// with VL=32 there are 2 blocks per vector and the vectorized terms correspond -// to the following non-vectorized terms: -// -// H_POW4*(GHASHDATA0 + GHASH_ACC) => H^8*(blk0 + GHASH_ACC_XMM) and H^7*(blk1 + 0) -// H_POW3*GHASHDATA1 => H^6*blk2 and H^5*blk3 -// H_POW2*GHASHDATA2 => H^4*blk4 and H^3*blk5 -// H_POW1*GHASHDATA3 => H^2*blk6 and H^1*blk7 +// operations are vectorized operations on 512-bit vectors of 128-bit blocks. +// The vectorized terms correspond to the following non-vectorized terms: // -// With VL=64, we use 4 blocks/vector, H^16 through H^1, and blk0 through blk15. +// H_POW4*(GHASHDATA0 + GHASH_ACC) => H^16*(blk0 + GHASH_ACC_XMM), +// H^15*(blk1 + 0), H^14*(blk2 + 0), and H^13*(blk3 + 0) +// H_POW3*GHASHDATA1 => H^12*blk4, H^11*blk5, H^10*blk6, and H^9*blk7 +// H_POW2*GHASHDATA2 => H^8*blk8, H^7*blk9, H^6*blk10, and H^5*blk11 +// H_POW1*GHASHDATA3 => H^4*blk12, H^3*blk13, H^2*blk14, and H^1*blk15 // // More concretely, this code does: // - Do vectorized "schoolbook" multiplications to compute the intermediate // 256-bit product of each block and its corresponding hash key power. -// There are 4*VL/16 of these intermediate products. -// - Sum (XOR) the intermediate 256-bit products across vectors. This leaves -// VL/16 256-bit intermediate values. +// - Sum (XOR) the intermediate 256-bit products across vectors. // - Do a vectorized reduction of these 256-bit intermediate values to -// 128-bits each. This leaves VL/16 128-bit intermediate values. +// 128-bits each. // - Sum (XOR) these values and store the 128-bit result in GHASH_ACC_XMM. // // See _ghash_mul_step for the full explanation of the operations performed for @@ -532,85 +471,224 @@ .endif .endm -// Do one non-last round of AES encryption on the counter blocks in V0-V3 using -// the round key that has been broadcast to all 128-bit lanes of \round_key. +// Update GHASH with four vectors of data blocks. See _ghash_step_4x for full +// explanation. +.macro _ghash_4x +.irp i, 0,1,2,3,4,5,6,7,8,9 + _ghash_step_4x \i +.endr +.endm + +// void aes_gcm_aad_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, +// u8 ghash_acc[16], +// const u8 *aad, int aadlen); +// +// This function processes the AAD (Additional Authenticated Data) in GCM. +// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the +// data given by |aad| and |aadlen|. On the first call, |ghash_acc| must be all +// zeroes. |aadlen| must be a multiple of 16, except on the last call where it +// can be any length. The caller must do any buffering needed to ensure this. +// +// This handles large amounts of AAD efficiently, while also keeping overhead +// low for small amounts which is the common case. TLS and IPsec use less than +// one block of AAD, but (uncommonly) other use cases may use much more. +SYM_FUNC_START(aes_gcm_aad_update_vaes_avx512) + + // Function arguments + .set KEY, %rdi + .set GHASH_ACC_PTR, %rsi + .set AAD, %rdx + .set AADLEN, %ecx + .set AADLEN64, %rcx // Zero-extend AADLEN before using! + + // Additional local variables. + // %rax and %k1 are used as temporary registers. + .set GHASHDATA0, %zmm0 + .set GHASHDATA0_XMM, %xmm0 + .set GHASHDATA1, %zmm1 + .set GHASHDATA1_XMM, %xmm1 + .set GHASHDATA2, %zmm2 + .set GHASHDATA2_XMM, %xmm2 + .set GHASHDATA3, %zmm3 + .set BSWAP_MASK, %zmm4 + .set BSWAP_MASK_XMM, %xmm4 + .set GHASH_ACC, %zmm5 + .set GHASH_ACC_XMM, %xmm5 + .set H_POW4, %zmm6 + .set H_POW3, %zmm7 + .set H_POW2, %zmm8 + .set H_POW1, %zmm9 + .set H_POW1_XMM, %xmm9 + .set GFPOLY, %zmm10 + .set GFPOLY_XMM, %xmm10 + .set GHASHTMP0, %zmm11 + .set GHASHTMP1, %zmm12 + .set GHASHTMP2, %zmm13 + + // Load the GHASH accumulator. + vmovdqu (GHASH_ACC_PTR), GHASH_ACC_XMM + + // Check for the common case of AADLEN <= 16, as well as AADLEN == 0. + cmp $16, AADLEN + jg .Laad_more_than_16bytes + test AADLEN, AADLEN + jz .Laad_done + + // Fast path: update GHASH with 1 <= AADLEN <= 16 bytes of AAD. + vmovdqu .Lbswap_mask(%rip), BSWAP_MASK_XMM + vmovdqu .Lgfpoly(%rip), GFPOLY_XMM + mov $-1, %eax + bzhi AADLEN, %eax, %eax + kmovd %eax, %k1 + vmovdqu8 (AAD), GHASHDATA0_XMM{%k1}{z} + vmovdqu OFFSETOFEND_H_POWERS-16(KEY), H_POW1_XMM + vpshufb BSWAP_MASK_XMM, GHASHDATA0_XMM, GHASHDATA0_XMM + vpxor GHASHDATA0_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM + _ghash_mul H_POW1_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM, GFPOLY_XMM, \ + GHASHDATA0_XMM, GHASHDATA1_XMM, GHASHDATA2_XMM + jmp .Laad_done + +.Laad_more_than_16bytes: + vbroadcasti32x4 .Lbswap_mask(%rip), BSWAP_MASK + vbroadcasti32x4 .Lgfpoly(%rip), GFPOLY + + // If AADLEN >= 256, update GHASH with 256 bytes of AAD at a time. + sub $256, AADLEN + jl .Laad_loop_4x_done + vmovdqu8 OFFSETOFEND_H_POWERS-4*64(KEY), H_POW4 + vmovdqu8 OFFSETOFEND_H_POWERS-3*64(KEY), H_POW3 + vmovdqu8 OFFSETOFEND_H_POWERS-2*64(KEY), H_POW2 + vmovdqu8 OFFSETOFEND_H_POWERS-1*64(KEY), H_POW1 +.Laad_loop_4x: + vmovdqu8 0*64(AAD), GHASHDATA0 + vmovdqu8 1*64(AAD), GHASHDATA1 + vmovdqu8 2*64(AAD), GHASHDATA2 + vmovdqu8 3*64(AAD), GHASHDATA3 + _ghash_4x + add $256, AAD + sub $256, AADLEN + jge .Laad_loop_4x +.Laad_loop_4x_done: + + // If AADLEN >= 64, update GHASH with 64 bytes of AAD at a time. + add $192, AADLEN + jl .Laad_loop_1x_done + vmovdqu8 OFFSETOFEND_H_POWERS-1*64(KEY), H_POW1 +.Laad_loop_1x: + vmovdqu8 (AAD), GHASHDATA0 + vpshufb BSWAP_MASK, GHASHDATA0, GHASHDATA0 + vpxord GHASHDATA0, GHASH_ACC, GHASH_ACC + _ghash_mul H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ + GHASHDATA0, GHASHDATA1, GHASHDATA2 + _horizontal_xor GHASH_ACC, GHASH_ACC_XMM, GHASH_ACC_XMM, \ + GHASHDATA0_XMM, GHASHDATA1_XMM, GHASHDATA2_XMM + add $64, AAD + sub $64, AADLEN + jge .Laad_loop_1x +.Laad_loop_1x_done: + + // Update GHASH with the remaining 0 <= AADLEN < 64 bytes of AAD. + add $64, AADLEN + jz .Laad_done + mov $-1, %rax + bzhi AADLEN64, %rax, %rax + kmovq %rax, %k1 + vmovdqu8 (AAD), GHASHDATA0{%k1}{z} + neg AADLEN64 + and $~15, AADLEN64 // -round_up(AADLEN, 16) + vmovdqu8 OFFSETOFEND_H_POWERS(KEY,AADLEN64), H_POW1 + vpshufb BSWAP_MASK, GHASHDATA0, GHASHDATA0 + vpxord GHASHDATA0, GHASH_ACC, GHASH_ACC + _ghash_mul H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ + GHASHDATA0, GHASHDATA1, GHASHDATA2 + _horizontal_xor GHASH_ACC, GHASH_ACC_XMM, GHASH_ACC_XMM, \ + GHASHDATA0_XMM, GHASHDATA1_XMM, GHASHDATA2_XMM + +.Laad_done: + // Store the updated GHASH accumulator back to memory. + vmovdqu GHASH_ACC_XMM, (GHASH_ACC_PTR) + + vzeroupper // This is needed after using ymm or zmm registers. + RET +SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512) + +// Do one non-last round of AES encryption on the blocks in %zmm[0-3] using the +// round key that has been broadcast to all 128-bit lanes of \round_key. .macro _vaesenc_4x round_key - vaesenc \round_key, V0, V0 - vaesenc \round_key, V1, V1 - vaesenc \round_key, V2, V2 - vaesenc \round_key, V3, V3 + vaesenc \round_key, %zmm0, %zmm0 + vaesenc \round_key, %zmm1, %zmm1 + vaesenc \round_key, %zmm2, %zmm2 + vaesenc \round_key, %zmm3, %zmm3 .endm // Start the AES encryption of four vectors of counter blocks. .macro _ctr_begin_4x // Increment LE_CTR four times to generate four vectors of little-endian - // counter blocks, swap each to big-endian, and store them in V0-V3. - vpshufb BSWAP_MASK, LE_CTR, V0 + // counter blocks, swap each to big-endian, and store them in %zmm[0-3]. + vpshufb BSWAP_MASK, LE_CTR, %zmm0 vpaddd LE_CTR_INC, LE_CTR, LE_CTR - vpshufb BSWAP_MASK, LE_CTR, V1 + vpshufb BSWAP_MASK, LE_CTR, %zmm1 vpaddd LE_CTR_INC, LE_CTR, LE_CTR - vpshufb BSWAP_MASK, LE_CTR, V2 + vpshufb BSWAP_MASK, LE_CTR, %zmm2 vpaddd LE_CTR_INC, LE_CTR, LE_CTR - vpshufb BSWAP_MASK, LE_CTR, V3 + vpshufb BSWAP_MASK, LE_CTR, %zmm3 vpaddd LE_CTR_INC, LE_CTR, LE_CTR // AES "round zero": XOR in the zero-th round key. - vpxord RNDKEY0, V0, V0 - vpxord RNDKEY0, V1, V1 - vpxord RNDKEY0, V2, V2 - vpxord RNDKEY0, V3, V3 + vpxord RNDKEY0, %zmm0, %zmm0 + vpxord RNDKEY0, %zmm1, %zmm1 + vpxord RNDKEY0, %zmm2, %zmm2 + vpxord RNDKEY0, %zmm3, %zmm3 .endm -// Do the last AES round for four vectors of counter blocks V0-V3, XOR source -// data with the resulting keystream, and write the result to DST and +// Do the last AES round for four vectors of counter blocks %zmm[0-3], XOR +// source data with the resulting keystream, and write the result to DST and // GHASHDATA[0-3]. (Implementation differs slightly, but has the same effect.) .macro _aesenclast_and_xor_4x // XOR the source data with the last round key, saving the result in // GHASHDATA[0-3]. This reduces latency by taking advantage of the // property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). - vpxord 0*VL(SRC), RNDKEYLAST, GHASHDATA0 - vpxord 1*VL(SRC), RNDKEYLAST, GHASHDATA1 - vpxord 2*VL(SRC), RNDKEYLAST, GHASHDATA2 - vpxord 3*VL(SRC), RNDKEYLAST, GHASHDATA3 + vpxord 0*64(SRC), RNDKEYLAST, GHASHDATA0 + vpxord 1*64(SRC), RNDKEYLAST, GHASHDATA1 + vpxord 2*64(SRC), RNDKEYLAST, GHASHDATA2 + vpxord 3*64(SRC), RNDKEYLAST, GHASHDATA3 // Do the last AES round. This handles the XOR with the source data // too, as per the optimization described above. - vaesenclast GHASHDATA0, V0, GHASHDATA0 - vaesenclast GHASHDATA1, V1, GHASHDATA1 - vaesenclast GHASHDATA2, V2, GHASHDATA2 - vaesenclast GHASHDATA3, V3, GHASHDATA3 + vaesenclast GHASHDATA0, %zmm0, GHASHDATA0 + vaesenclast GHASHDATA1, %zmm1, GHASHDATA1 + vaesenclast GHASHDATA2, %zmm2, GHASHDATA2 + vaesenclast GHASHDATA3, %zmm3, GHASHDATA3 // Store the en/decrypted data to DST. - vmovdqu8 GHASHDATA0, 0*VL(DST) - vmovdqu8 GHASHDATA1, 1*VL(DST) - vmovdqu8 GHASHDATA2, 2*VL(DST) - vmovdqu8 GHASHDATA3, 3*VL(DST) + vmovdqu8 GHASHDATA0, 0*64(DST) + vmovdqu8 GHASHDATA1, 1*64(DST) + vmovdqu8 GHASHDATA2, 2*64(DST) + vmovdqu8 GHASHDATA3, 3*64(DST) .endm -// void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_avx10 *key, -// const u32 le_ctr[4], u8 ghash_acc[16], -// const u8 *src, u8 *dst, int datalen); +// void aes_gcm_{enc,dec}_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, +// const u32 le_ctr[4], u8 ghash_acc[16], +// const u8 *src, u8 *dst, int datalen); // // This macro generates a GCM encryption or decryption update function with the -// above prototype (with \enc selecting which one). This macro supports both -// VL=32 and VL=64. _set_veclen must have been invoked with the desired length. -// -// This function computes the next portion of the CTR keystream, XOR's it with -// |datalen| bytes from |src|, and writes the resulting encrypted or decrypted -// data to |dst|. It also updates the GHASH accumulator |ghash_acc| using the -// next |datalen| ciphertext bytes. +// above prototype (with \enc selecting which one). The function computes the +// next portion of the CTR keystream, XOR's it with |datalen| bytes from |src|, +// and writes the resulting encrypted or decrypted data to |dst|. It also +// updates the GHASH accumulator |ghash_acc| using the next |datalen| ciphertext +// bytes. // // |datalen| must be a multiple of 16, except on the last call where it can be // any length. The caller must do any buffering needed to ensure this. Both // in-place and out-of-place en/decryption are supported. // -// |le_ctr| must give the current counter in little-endian format. For a new -// message, the low word of the counter must be 2. This function loads the -// counter from |le_ctr| and increments the loaded counter as needed, but it -// does *not* store the updated counter back to |le_ctr|. The caller must -// update |le_ctr| if any more data segments follow. Internally, only the low -// 32-bit word of the counter is incremented, following the GCM standard. +// |le_ctr| must give the current counter in little-endian format. This +// function loads the counter from |le_ctr| and increments the loaded counter as +// needed, but it does *not* store the updated counter back to |le_ctr|. The +// caller must update |le_ctr| if any more data segments follow. Internally, +// only the low 32-bit word of the counter is incremented, following the GCM +// standard. .macro _aes_gcm_update enc // Function arguments @@ -634,69 +712,69 @@ // Pointer to the last AES round key for the chosen AES variant .set RNDKEYLAST_PTR, %r11 - // In the main loop, V0-V3 are used as AES input and output. Elsewhere - // they are used as temporary registers. + // In the main loop, %zmm[0-3] are used as AES input and output. + // Elsewhere they are used as temporary registers. // GHASHDATA[0-3] hold the ciphertext blocks and GHASH input data. - .set GHASHDATA0, V4 + .set GHASHDATA0, %zmm4 .set GHASHDATA0_XMM, %xmm4 - .set GHASHDATA1, V5 + .set GHASHDATA1, %zmm5 .set GHASHDATA1_XMM, %xmm5 - .set GHASHDATA2, V6 + .set GHASHDATA2, %zmm6 .set GHASHDATA2_XMM, %xmm6 - .set GHASHDATA3, V7 + .set GHASHDATA3, %zmm7 // BSWAP_MASK is the shuffle mask for byte-reflecting 128-bit values // using vpshufb, copied to all 128-bit lanes. - .set BSWAP_MASK, V8 + .set BSWAP_MASK, %zmm8 // RNDKEY temporarily holds the next AES round key. - .set RNDKEY, V9 + .set RNDKEY, %zmm9 // GHASH_ACC is the accumulator variable for GHASH. When fully reduced, // only the lowest 128-bit lane can be nonzero. When not fully reduced, // more than one lane may be used, and they need to be XOR'd together. - .set GHASH_ACC, V10 + .set GHASH_ACC, %zmm10 .set GHASH_ACC_XMM, %xmm10 // LE_CTR_INC is the vector of 32-bit words that need to be added to a // vector of little-endian counter blocks to advance it forwards. - .set LE_CTR_INC, V11 + .set LE_CTR_INC, %zmm11 // LE_CTR contains the next set of little-endian counter blocks. - .set LE_CTR, V12 + .set LE_CTR, %zmm12 // RNDKEY0, RNDKEYLAST, and RNDKEY_M[9-1] contain cached AES round keys, // copied to all 128-bit lanes. RNDKEY0 is the zero-th round key, // RNDKEYLAST the last, and RNDKEY_M\i the one \i-th from the last. - .set RNDKEY0, V13 - .set RNDKEYLAST, V14 - .set RNDKEY_M9, V15 - .set RNDKEY_M8, V16 - .set RNDKEY_M7, V17 - .set RNDKEY_M6, V18 - .set RNDKEY_M5, V19 - .set RNDKEY_M4, V20 - .set RNDKEY_M3, V21 - .set RNDKEY_M2, V22 - .set RNDKEY_M1, V23 + .set RNDKEY0, %zmm13 + .set RNDKEYLAST, %zmm14 + .set RNDKEY_M9, %zmm15 + .set RNDKEY_M8, %zmm16 + .set RNDKEY_M7, %zmm17 + .set RNDKEY_M6, %zmm18 + .set RNDKEY_M5, %zmm19 + .set RNDKEY_M4, %zmm20 + .set RNDKEY_M3, %zmm21 + .set RNDKEY_M2, %zmm22 + .set RNDKEY_M1, %zmm23 // GHASHTMP[0-2] are temporary variables used by _ghash_step_4x. These // cannot coincide with anything used for AES encryption, since for // performance reasons GHASH and AES encryption are interleaved. - .set GHASHTMP0, V24 - .set GHASHTMP1, V25 - .set GHASHTMP2, V26 + .set GHASHTMP0, %zmm24 + .set GHASHTMP1, %zmm25 + .set GHASHTMP2, %zmm26 - // H_POW[4-1] contain the powers of the hash key H^(4*VL/16)...H^1. The + // H_POW[4-1] contain the powers of the hash key H^16...H^1. The // descending numbering reflects the order of the key powers. - .set H_POW4, V27 - .set H_POW3, V28 - .set H_POW2, V29 - .set H_POW1, V30 + .set H_POW4, %zmm27 + .set H_POW3, %zmm28 + .set H_POW2, %zmm29 + .set H_POW1, %zmm30 // GFPOLY contains the .Lgfpoly constant, copied to all 128-bit lanes. - .set GFPOLY, V31 + .set GFPOLY, %zmm31 // Load some constants. vbroadcasti32x4 .Lbswap_mask(%rip), BSWAP_MASK @@ -719,29 +797,23 @@ // Finish initializing LE_CTR by adding [0, 1, ...] to its low words. vpaddd .Lctr_pattern(%rip), LE_CTR, LE_CTR - // Initialize LE_CTR_INC to contain VL/16 in all 128-bit lanes. -.if VL == 32 - vbroadcasti32x4 .Linc_2blocks(%rip), LE_CTR_INC -.elseif VL == 64 + // Load 4 into all 128-bit lanes of LE_CTR_INC. vbroadcasti32x4 .Linc_4blocks(%rip), LE_CTR_INC -.else - .error "Unsupported vector length" -.endif - // If there are at least 4*VL bytes of data, then continue into the loop - // that processes 4*VL bytes of data at a time. Otherwise skip it. + // If there are at least 256 bytes of data, then continue into the loop + // that processes 256 bytes of data at a time. Otherwise skip it. // - // Pre-subtracting 4*VL from DATALEN saves an instruction from the main + // Pre-subtracting 256 from DATALEN saves an instruction from the main // loop and also ensures that at least one write always occurs to // DATALEN, zero-extending it and allowing DATALEN64 to be used later. - add $-4*VL, DATALEN // shorter than 'sub 4*VL' when VL=32 + sub $256, DATALEN jl .Lcrypt_loop_4x_done\@ // Load powers of the hash key. - vmovdqu8 OFFSETOFEND_H_POWERS-4*VL(KEY), H_POW4 - vmovdqu8 OFFSETOFEND_H_POWERS-3*VL(KEY), H_POW3 - vmovdqu8 OFFSETOFEND_H_POWERS-2*VL(KEY), H_POW2 - vmovdqu8 OFFSETOFEND_H_POWERS-1*VL(KEY), H_POW1 + vmovdqu8 OFFSETOFEND_H_POWERS-4*64(KEY), H_POW4 + vmovdqu8 OFFSETOFEND_H_POWERS-3*64(KEY), H_POW3 + vmovdqu8 OFFSETOFEND_H_POWERS-2*64(KEY), H_POW2 + vmovdqu8 OFFSETOFEND_H_POWERS-1*64(KEY), H_POW1 // Main loop: en/decrypt and hash 4 vectors at a time. // @@ -770,9 +842,9 @@ cmp %rax, RNDKEYLAST_PTR jne 1b _aesenclast_and_xor_4x - sub $-4*VL, SRC // shorter than 'add 4*VL' when VL=32 - sub $-4*VL, DST - add $-4*VL, DATALEN + add $256, SRC + add $256, DST + sub $256, DATALEN jl .Lghash_last_ciphertext_4x\@ .endif @@ -786,10 +858,10 @@ // If decrypting, load more ciphertext blocks into GHASHDATA[0-3]. If // encrypting, GHASHDATA[0-3] already contain the previous ciphertext. .if !\enc - vmovdqu8 0*VL(SRC), GHASHDATA0 - vmovdqu8 1*VL(SRC), GHASHDATA1 - vmovdqu8 2*VL(SRC), GHASHDATA2 - vmovdqu8 3*VL(SRC), GHASHDATA3 + vmovdqu8 0*64(SRC), GHASHDATA0 + vmovdqu8 1*64(SRC), GHASHDATA1 + vmovdqu8 2*64(SRC), GHASHDATA2 + vmovdqu8 3*64(SRC), GHASHDATA3 .endif // Start the AES encryption of the counter blocks. @@ -809,44 +881,44 @@ _vaesenc_4x RNDKEY 128: - // Finish the AES encryption of the counter blocks in V0-V3, interleaved - // with the GHASH update of the ciphertext blocks in GHASHDATA[0-3]. + // Finish the AES encryption of the counter blocks in %zmm[0-3], + // interleaved with the GHASH update of the ciphertext blocks in + // GHASHDATA[0-3]. .irp i, 9,8,7,6,5,4,3,2,1 _ghash_step_4x (9 - \i) _vaesenc_4x RNDKEY_M\i .endr _ghash_step_4x 9 _aesenclast_and_xor_4x - sub $-4*VL, SRC // shorter than 'add 4*VL' when VL=32 - sub $-4*VL, DST - add $-4*VL, DATALEN + add $256, SRC + add $256, DST + sub $256, DATALEN jge .Lcrypt_loop_4x\@ .if \enc .Lghash_last_ciphertext_4x\@: // Update GHASH with the last set of ciphertext blocks. -.irp i, 0,1,2,3,4,5,6,7,8,9 - _ghash_step_4x \i -.endr + _ghash_4x .endif .Lcrypt_loop_4x_done\@: - // Undo the extra subtraction by 4*VL and check whether data remains. - sub $-4*VL, DATALEN // shorter than 'add 4*VL' when VL=32 + // Undo the extra subtraction by 256 and check whether data remains. + add $256, DATALEN jz .Ldone\@ - // The data length isn't a multiple of 4*VL. Process the remaining data - // of length 1 <= DATALEN < 4*VL, up to one vector (VL bytes) at a time. - // Going one vector at a time may seem inefficient compared to having - // separate code paths for each possible number of vectors remaining. - // However, using a loop keeps the code size down, and it performs - // surprising well; modern CPUs will start executing the next iteration - // before the previous one finishes and also predict the number of loop - // iterations. For a similar reason, we roll up the AES rounds. + // The data length isn't a multiple of 256 bytes. Process the remaining + // data of length 1 <= DATALEN < 256, up to one 64-byte vector at a + // time. Going one vector at a time may seem inefficient compared to + // having separate code paths for each possible number of vectors + // remaining. However, using a loop keeps the code size down, and it + // performs surprising well; modern CPUs will start executing the next + // iteration before the previous one finishes and also predict the + // number of loop iterations. For a similar reason, we roll up the AES + // rounds. // - // On the last iteration, the remaining length may be less than VL. - // Handle this using masking. + // On the last iteration, the remaining length may be less than 64 + // bytes. Handle this using masking. // // Since there are enough key powers available for all remaining data, // there is no need to do a GHASH reduction after each iteration. @@ -875,65 +947,60 @@ .Lcrypt_loop_1x\@: // Select the appropriate mask for this iteration: all 1's if - // DATALEN >= VL, otherwise DATALEN 1's. Do this branchlessly using the + // DATALEN >= 64, otherwise DATALEN 1's. Do this branchlessly using the // bzhi instruction from BMI2. (This relies on DATALEN <= 255.) -.if VL < 64 - mov $-1, %eax - bzhi DATALEN, %eax, %eax - kmovd %eax, %k1 -.else mov $-1, %rax bzhi DATALEN64, %rax, %rax kmovq %rax, %k1 -.endif // Encrypt a vector of counter blocks. This does not need to be masked. - vpshufb BSWAP_MASK, LE_CTR, V0 + vpshufb BSWAP_MASK, LE_CTR, %zmm0 vpaddd LE_CTR_INC, LE_CTR, LE_CTR - vpxord RNDKEY0, V0, V0 + vpxord RNDKEY0, %zmm0, %zmm0 lea 16(KEY), %rax 1: vbroadcasti32x4 (%rax), RNDKEY - vaesenc RNDKEY, V0, V0 + vaesenc RNDKEY, %zmm0, %zmm0 add $16, %rax cmp %rax, RNDKEYLAST_PTR jne 1b - vaesenclast RNDKEYLAST, V0, V0 + vaesenclast RNDKEYLAST, %zmm0, %zmm0 // XOR the data with the appropriate number of keystream bytes. - vmovdqu8 (SRC), V1{%k1}{z} - vpxord V1, V0, V0 - vmovdqu8 V0, (DST){%k1} + vmovdqu8 (SRC), %zmm1{%k1}{z} + vpxord %zmm1, %zmm0, %zmm0 + vmovdqu8 %zmm0, (DST){%k1} // Update GHASH with the ciphertext block(s), without reducing. // - // In the case of DATALEN < VL, the ciphertext is zero-padded to VL. - // (If decrypting, it's done by the above masked load. If encrypting, - // it's done by the below masked register-to-register move.) Note that - // if DATALEN <= VL - 16, there will be additional padding beyond the - // padding of the last block specified by GHASH itself; i.e., there may - // be whole block(s) that get processed by the GHASH multiplication and - // reduction instructions but should not actually be included in the + // In the case of DATALEN < 64, the ciphertext is zero-padded to 64 + // bytes. (If decrypting, it's done by the above masked load. If + // encrypting, it's done by the below masked register-to-register move.) + // Note that if DATALEN <= 48, there will be additional padding beyond + // the padding of the last block specified by GHASH itself; i.e., there + // may be whole block(s) that get processed by the GHASH multiplication + // and reduction instructions but should not actually be included in the // GHASH. However, any such blocks are all-zeroes, and the values that // they're multiplied with are also all-zeroes. Therefore they just add // 0 * 0 = 0 to the final GHASH result, which makes no difference. vmovdqu8 (POWERS_PTR), H_POW1 .if \enc - vmovdqu8 V0, V1{%k1}{z} + vmovdqu8 %zmm0, %zmm1{%k1}{z} .endif - vpshufb BSWAP_MASK, V1, V0 - vpxord GHASH_ACC, V0, V0 - _ghash_mul_noreduce H_POW1, V0, LO, MI, HI, GHASHDATA3, V1, V2, V3 + vpshufb BSWAP_MASK, %zmm1, %zmm0 + vpxord GHASH_ACC, %zmm0, %zmm0 + _ghash_mul_noreduce H_POW1, %zmm0, LO, MI, HI, \ + GHASHDATA3, %zmm1, %zmm2, %zmm3 vpxor GHASH_ACC_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM - add $VL, POWERS_PTR - add $VL, SRC - add $VL, DST - sub $VL, DATALEN + add $64, POWERS_PTR + add $64, SRC + add $64, DST + sub $64, DATALEN jg .Lcrypt_loop_1x\@ // Finally, do the GHASH reduction. - _ghash_reduce LO, MI, HI, GFPOLY, V0 + _ghash_reduce LO, MI, HI, GFPOLY, %zmm0 _horizontal_xor HI, HI_XMM, GHASH_ACC_XMM, %xmm0, %xmm1, %xmm2 .Ldone\@: @@ -944,14 +1011,14 @@ RET .endm -// void aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, -// const u32 le_ctr[4], u8 ghash_acc[16], -// u64 total_aadlen, u64 total_datalen); -// bool aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, -// const u32 le_ctr[4], -// const u8 ghash_acc[16], -// u64 total_aadlen, u64 total_datalen, -// const u8 tag[16], int taglen); +// void aes_gcm_enc_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, +// const u32 le_ctr[4], u8 ghash_acc[16], +// u64 total_aadlen, u64 total_datalen); +// bool aes_gcm_dec_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, +// const u32 le_ctr[4], +// const u8 ghash_acc[16], +// u64 total_aadlen, u64 total_datalen, +// const u8 tag[16], int taglen); // // This macro generates one of the above two functions (with \enc selecting // which one). Both functions finish computing the GCM authentication tag by @@ -1081,119 +1148,16 @@ RET .endm -_set_veclen 32 -SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_256) - _aes_gcm_precompute -SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_256) -SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_256) - _aes_gcm_update 1 -SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_256) -SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_256) - _aes_gcm_update 0 -SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_256) - -_set_veclen 64 -SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_512) - _aes_gcm_precompute -SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_512) -SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_512) +SYM_FUNC_START(aes_gcm_enc_update_vaes_avx512) _aes_gcm_update 1 -SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_512) -SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_512) +SYM_FUNC_END(aes_gcm_enc_update_vaes_avx512) +SYM_FUNC_START(aes_gcm_dec_update_vaes_avx512) _aes_gcm_update 0 -SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_512) - -// void aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key, -// u8 ghash_acc[16], -// const u8 *aad, int aadlen); -// -// This function processes the AAD (Additional Authenticated Data) in GCM. -// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the -// data given by |aad| and |aadlen|. |key->ghash_key_powers| must have been -// initialized. On the first call, |ghash_acc| must be all zeroes. |aadlen| -// must be a multiple of 16, except on the last call where it can be any length. -// The caller must do any buffering needed to ensure this. -// -// AES-GCM is almost always used with small amounts of AAD, less than 32 bytes. -// Therefore, for AAD processing we currently only provide this implementation -// which uses 256-bit vectors (ymm registers) and only has a 1x-wide loop. This -// keeps the code size down, and it enables some micro-optimizations, e.g. using -// VEX-coded instructions instead of EVEX-coded to save some instruction bytes. -// To optimize for large amounts of AAD, we could implement a 4x-wide loop and -// provide a version using 512-bit vectors, but that doesn't seem to be useful. -SYM_FUNC_START(aes_gcm_aad_update_vaes_avx10) - - // Function arguments - .set KEY, %rdi - .set GHASH_ACC_PTR, %rsi - .set AAD, %rdx - .set AADLEN, %ecx - .set AADLEN64, %rcx // Zero-extend AADLEN before using! - - // Additional local variables. - // %rax, %ymm0-%ymm3, and %k1 are used as temporary registers. - .set BSWAP_MASK, %ymm4 - .set GFPOLY, %ymm5 - .set GHASH_ACC, %ymm6 - .set GHASH_ACC_XMM, %xmm6 - .set H_POW1, %ymm7 - - // Load some constants. - vbroadcasti128 .Lbswap_mask(%rip), BSWAP_MASK - vbroadcasti128 .Lgfpoly(%rip), GFPOLY - - // Load the GHASH accumulator. - vmovdqu (GHASH_ACC_PTR), GHASH_ACC_XMM - - // Update GHASH with 32 bytes of AAD at a time. - // - // Pre-subtracting 32 from AADLEN saves an instruction from the loop and - // also ensures that at least one write always occurs to AADLEN, - // zero-extending it and allowing AADLEN64 to be used later. - sub $32, AADLEN - jl .Laad_loop_1x_done - vmovdqu8 OFFSETOFEND_H_POWERS-32(KEY), H_POW1 // [H^2, H^1] -.Laad_loop_1x: - vmovdqu (AAD), %ymm0 - vpshufb BSWAP_MASK, %ymm0, %ymm0 - vpxor %ymm0, GHASH_ACC, GHASH_ACC - _ghash_mul H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ - %ymm0, %ymm1, %ymm2 - vextracti128 $1, GHASH_ACC, %xmm0 - vpxor %xmm0, GHASH_ACC_XMM, GHASH_ACC_XMM - add $32, AAD - sub $32, AADLEN - jge .Laad_loop_1x -.Laad_loop_1x_done: - add $32, AADLEN - jz .Laad_done - - // Update GHASH with the remaining 1 <= AADLEN < 32 bytes of AAD. - mov $-1, %eax - bzhi AADLEN, %eax, %eax - kmovd %eax, %k1 - vmovdqu8 (AAD), %ymm0{%k1}{z} - neg AADLEN64 - and $~15, AADLEN64 // -round_up(AADLEN, 16) - vmovdqu8 OFFSETOFEND_H_POWERS(KEY,AADLEN64), H_POW1 - vpshufb BSWAP_MASK, %ymm0, %ymm0 - vpxor %ymm0, GHASH_ACC, GHASH_ACC - _ghash_mul H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \ - %ymm0, %ymm1, %ymm2 - vextracti128 $1, GHASH_ACC, %xmm0 - vpxor %xmm0, GHASH_ACC_XMM, GHASH_ACC_XMM - -.Laad_done: - // Store the updated GHASH accumulator back to memory. - vmovdqu GHASH_ACC_XMM, (GHASH_ACC_PTR) - - vzeroupper // This is needed after using ymm or zmm registers. - RET -SYM_FUNC_END(aes_gcm_aad_update_vaes_avx10) +SYM_FUNC_END(aes_gcm_dec_update_vaes_avx512) -SYM_FUNC_START(aes_gcm_enc_final_vaes_avx10) +SYM_FUNC_START(aes_gcm_enc_final_vaes_avx512) _aes_gcm_final 1 -SYM_FUNC_END(aes_gcm_enc_final_vaes_avx10) -SYM_FUNC_START(aes_gcm_dec_final_vaes_avx10) +SYM_FUNC_END(aes_gcm_enc_final_vaes_avx512) +SYM_FUNC_START(aes_gcm_dec_final_vaes_avx512) _aes_gcm_final 0 -SYM_FUNC_END(aes_gcm_dec_final_vaes_avx10) +SYM_FUNC_END(aes_gcm_dec_final_vaes_avx512) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index d953ac470aae..bb6e2c47ffc6 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -874,8 +874,38 @@ struct aes_gcm_key_aesni { #define AES_GCM_KEY_AESNI_SIZE \ (sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1))) -/* Key struct used by the VAES + AVX10 implementations of AES-GCM */ -struct aes_gcm_key_avx10 { +/* Key struct used by the VAES + AVX2 implementation of AES-GCM */ +struct aes_gcm_key_vaes_avx2 { + /* + * Common part of the key. The assembly code prefers 16-byte alignment + * for the round keys; we get this by them being located at the start of + * the struct and the whole struct being 32-byte aligned. + */ + struct aes_gcm_key base; + + /* + * Powers of the hash key H^8 through H^1. These are 128-bit values. + * They all have an extra factor of x^-1 and are byte-reversed. + * The assembly code prefers 32-byte alignment for this. + */ + u64 h_powers[8][2] __aligned(32); + + /* + * Each entry in this array contains the two halves of an entry of + * h_powers XOR'd together, in the following order: + * H^8,H^6,H^7,H^5,H^4,H^2,H^3,H^1 i.e. indices 0,2,1,3,4,6,5,7. + * This is used for Karatsuba multiplication. + */ + u64 h_powers_xored[8]; +}; + +#define AES_GCM_KEY_VAES_AVX2(key) \ + container_of((key), struct aes_gcm_key_vaes_avx2, base) +#define AES_GCM_KEY_VAES_AVX2_SIZE \ + (sizeof(struct aes_gcm_key_vaes_avx2) + (31 & ~(CRYPTO_MINALIGN - 1))) + +/* Key struct used by the VAES + AVX512 implementation of AES-GCM */ +struct aes_gcm_key_vaes_avx512 { /* * Common part of the key. The assembly code prefers 16-byte alignment * for the round keys; we get this by them being located at the start of @@ -895,10 +925,10 @@ struct aes_gcm_key_avx10 { /* Three padding blocks required by the assembly code */ u64 padding[3][2]; }; -#define AES_GCM_KEY_AVX10(key) \ - container_of((key), struct aes_gcm_key_avx10, base) -#define AES_GCM_KEY_AVX10_SIZE \ - (sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1))) +#define AES_GCM_KEY_VAES_AVX512(key) \ + container_of((key), struct aes_gcm_key_vaes_avx512, base) +#define AES_GCM_KEY_VAES_AVX512_SIZE \ + (sizeof(struct aes_gcm_key_vaes_avx512) + (63 & ~(CRYPTO_MINALIGN - 1))) /* * These flags are passed to the AES-GCM helper functions to specify the @@ -910,14 +940,16 @@ struct aes_gcm_key_avx10 { #define FLAG_RFC4106 BIT(0) #define FLAG_ENC BIT(1) #define FLAG_AVX BIT(2) -#define FLAG_AVX10_256 BIT(3) -#define FLAG_AVX10_512 BIT(4) +#define FLAG_VAES_AVX2 BIT(3) +#define FLAG_VAES_AVX512 BIT(4) static inline struct aes_gcm_key * aes_gcm_key_get(struct crypto_aead *tfm, int flags) { - if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) + if (flags & FLAG_VAES_AVX512) return PTR_ALIGN(crypto_aead_ctx(tfm), 64); + else if (flags & FLAG_VAES_AVX2) + return PTR_ALIGN(crypto_aead_ctx(tfm), 32); else return PTR_ALIGN(crypto_aead_ctx(tfm), 16); } @@ -927,26 +959,16 @@ aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key); asmlinkage void aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key); asmlinkage void -aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key); +aes_gcm_precompute_vaes_avx2(struct aes_gcm_key_vaes_avx2 *key); asmlinkage void -aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key); +aes_gcm_precompute_vaes_avx512(struct aes_gcm_key_vaes_avx512 *key); static void aes_gcm_precompute(struct aes_gcm_key *key, int flags) { - /* - * To make things a bit easier on the assembly side, the AVX10 - * implementations use the same key format. Therefore, a single - * function using 256-bit vectors would suffice here. However, it's - * straightforward to provide a 512-bit one because of how the assembly - * code is structured, and it works nicely because the total size of the - * key powers is a multiple of 512 bits. So we take advantage of that. - * - * A similar situation applies to the AES-NI implementations. - */ - if (flags & FLAG_AVX10_512) - aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key)); - else if (flags & FLAG_AVX10_256) - aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key)); + if (flags & FLAG_VAES_AVX512) + aes_gcm_precompute_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key)); + else if (flags & FLAG_VAES_AVX2) + aes_gcm_precompute_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key)); else if (flags & FLAG_AVX) aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key)); else @@ -960,15 +982,21 @@ asmlinkage void aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key, u8 ghash_acc[16], const u8 *aad, int aadlen); asmlinkage void -aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key, - u8 ghash_acc[16], const u8 *aad, int aadlen); +aes_gcm_aad_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, + u8 ghash_acc[16], const u8 *aad, int aadlen); +asmlinkage void +aes_gcm_aad_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, + u8 ghash_acc[16], const u8 *aad, int aadlen); static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16], const u8 *aad, int aadlen, int flags) { - if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) - aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc, - aad, aadlen); + if (flags & FLAG_VAES_AVX512) + aes_gcm_aad_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key), + ghash_acc, aad, aadlen); + else if (flags & FLAG_VAES_AVX2) + aes_gcm_aad_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key), + ghash_acc, aad, aadlen); else if (flags & FLAG_AVX) aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc, aad, aadlen); @@ -986,13 +1014,13 @@ aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key, const u32 le_ctr[4], u8 ghash_acc[16], const u8 *src, u8 *dst, int datalen); asmlinkage void -aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key, - const u32 le_ctr[4], u8 ghash_acc[16], - const u8 *src, u8 *dst, int datalen); +aes_gcm_enc_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, + const u32 le_ctr[4], u8 ghash_acc[16], + const u8 *src, u8 *dst, int datalen); asmlinkage void -aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key, - const u32 le_ctr[4], u8 ghash_acc[16], - const u8 *src, u8 *dst, int datalen); +aes_gcm_enc_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, + const u32 le_ctr[4], u8 ghash_acc[16], + const u8 *src, u8 *dst, int datalen); asmlinkage void aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key, @@ -1003,13 +1031,13 @@ aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key, const u32 le_ctr[4], u8 ghash_acc[16], const u8 *src, u8 *dst, int datalen); asmlinkage void -aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key, - const u32 le_ctr[4], u8 ghash_acc[16], - const u8 *src, u8 *dst, int datalen); +aes_gcm_dec_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, + const u32 le_ctr[4], u8 ghash_acc[16], + const u8 *src, u8 *dst, int datalen); asmlinkage void -aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key, - const u32 le_ctr[4], u8 ghash_acc[16], - const u8 *src, u8 *dst, int datalen); +aes_gcm_dec_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, + const u32 le_ctr[4], u8 ghash_acc[16], + const u8 *src, u8 *dst, int datalen); /* __always_inline to optimize out the branches based on @flags */ static __always_inline void @@ -1018,14 +1046,14 @@ aes_gcm_update(const struct aes_gcm_key *key, const u8 *src, u8 *dst, int datalen, int flags) { if (flags & FLAG_ENC) { - if (flags & FLAG_AVX10_512) - aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key), - le_ctr, ghash_acc, - src, dst, datalen); - else if (flags & FLAG_AVX10_256) - aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key), - le_ctr, ghash_acc, - src, dst, datalen); + if (flags & FLAG_VAES_AVX512) + aes_gcm_enc_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key), + le_ctr, ghash_acc, + src, dst, datalen); + else if (flags & FLAG_VAES_AVX2) + aes_gcm_enc_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key), + le_ctr, ghash_acc, + src, dst, datalen); else if (flags & FLAG_AVX) aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key), le_ctr, ghash_acc, @@ -1034,14 +1062,14 @@ aes_gcm_update(const struct aes_gcm_key *key, aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr, ghash_acc, src, dst, datalen); } else { - if (flags & FLAG_AVX10_512) - aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key), - le_ctr, ghash_acc, - src, dst, datalen); - else if (flags & FLAG_AVX10_256) - aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key), - le_ctr, ghash_acc, - src, dst, datalen); + if (flags & FLAG_VAES_AVX512) + aes_gcm_dec_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key), + le_ctr, ghash_acc, + src, dst, datalen); + else if (flags & FLAG_VAES_AVX2) + aes_gcm_dec_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key), + le_ctr, ghash_acc, + src, dst, datalen); else if (flags & FLAG_AVX) aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key), le_ctr, ghash_acc, @@ -1062,9 +1090,13 @@ aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key, const u32 le_ctr[4], u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen); asmlinkage void -aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, - const u32 le_ctr[4], u8 ghash_acc[16], - u64 total_aadlen, u64 total_datalen); +aes_gcm_enc_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, + const u32 le_ctr[4], u8 ghash_acc[16], + u64 total_aadlen, u64 total_datalen); +asmlinkage void +aes_gcm_enc_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, + const u32 le_ctr[4], u8 ghash_acc[16], + u64 total_aadlen, u64 total_datalen); /* __always_inline to optimize out the branches based on @flags */ static __always_inline void @@ -1072,10 +1104,14 @@ aes_gcm_enc_final(const struct aes_gcm_key *key, const u32 le_ctr[4], u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen, int flags) { - if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) - aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key), - le_ctr, ghash_acc, - total_aadlen, total_datalen); + if (flags & FLAG_VAES_AVX512) + aes_gcm_enc_final_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key), + le_ctr, ghash_acc, + total_aadlen, total_datalen); + else if (flags & FLAG_VAES_AVX2) + aes_gcm_enc_final_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key), + le_ctr, ghash_acc, + total_aadlen, total_datalen); else if (flags & FLAG_AVX) aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key), le_ctr, ghash_acc, @@ -1097,10 +1133,15 @@ aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key, u64 total_aadlen, u64 total_datalen, const u8 tag[16], int taglen); asmlinkage bool __must_check -aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, - const u32 le_ctr[4], const u8 ghash_acc[16], - u64 total_aadlen, u64 total_datalen, - const u8 tag[16], int taglen); +aes_gcm_dec_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key, + const u32 le_ctr[4], const u8 ghash_acc[16], + u64 total_aadlen, u64 total_datalen, + const u8 tag[16], int taglen); +asmlinkage bool __must_check +aes_gcm_dec_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key, + const u32 le_ctr[4], const u8 ghash_acc[16], + u64 total_aadlen, u64 total_datalen, + const u8 tag[16], int taglen); /* __always_inline to optimize out the branches based on @flags */ static __always_inline bool __must_check @@ -1108,11 +1149,16 @@ aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4], u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen, u8 tag[16], int taglen, int flags) { - if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) - return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key), - le_ctr, ghash_acc, - total_aadlen, total_datalen, - tag, taglen); + if (flags & FLAG_VAES_AVX512) + return aes_gcm_dec_final_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key), + le_ctr, ghash_acc, + total_aadlen, total_datalen, + tag, taglen); + else if (flags & FLAG_VAES_AVX2) + return aes_gcm_dec_final_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key), + le_ctr, ghash_acc, + total_aadlen, total_datalen, + tag, taglen); else if (flags & FLAG_AVX) return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key), le_ctr, ghash_acc, @@ -1195,10 +1241,14 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496); BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624); BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512); - BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_enc) != 0); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_length) != 480); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) != 512); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) != 640); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_enc) != 0); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_length) != 480); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) != 512); + BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, padding) != 768); if (likely(crypto_simd_usable())) { err = aes_check_keylen(keylen); @@ -1231,8 +1281,9 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, gf128mul_lle(&h, (const be128 *)x_to_the_minus1); /* Compute the needed key powers */ - if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) { - struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key); + if (flags & FLAG_VAES_AVX512) { + struct aes_gcm_key_vaes_avx512 *k = + AES_GCM_KEY_VAES_AVX512(key); for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) { k->h_powers[i][0] = be64_to_cpu(h.b); @@ -1240,6 +1291,22 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, gf128mul_lle(&h, &h1); } memset(k->padding, 0, sizeof(k->padding)); + } else if (flags & FLAG_VAES_AVX2) { + struct aes_gcm_key_vaes_avx2 *k = + AES_GCM_KEY_VAES_AVX2(key); + static const u8 indices[8] = { 0, 2, 1, 3, 4, 6, 5, 7 }; + + for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) { + k->h_powers[i][0] = be64_to_cpu(h.b); + k->h_powers[i][1] = be64_to_cpu(h.a); + gf128mul_lle(&h, &h1); + } + for (i = 0; i < ARRAY_SIZE(k->h_powers_xored); i++) { + int j = indices[i]; + + k->h_powers_xored[i] = k->h_powers[j][0] ^ + k->h_powers[j][1]; + } } else { struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key); @@ -1508,15 +1575,15 @@ DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX, "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx", AES_GCM_KEY_AESNI_SIZE, 500); -/* aes_gcm_algs_vaes_avx10_256 */ -DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, - "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256", - AES_GCM_KEY_AVX10_SIZE, 700); +/* aes_gcm_algs_vaes_avx2 */ +DEFINE_GCM_ALGS(vaes_avx2, FLAG_VAES_AVX2, + "generic-gcm-vaes-avx2", "rfc4106-gcm-vaes-avx2", + AES_GCM_KEY_VAES_AVX2_SIZE, 600); -/* aes_gcm_algs_vaes_avx10_512 */ -DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, - "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", - AES_GCM_KEY_AVX10_SIZE, 800); +/* aes_gcm_algs_vaes_avx512 */ +DEFINE_GCM_ALGS(vaes_avx512, FLAG_VAES_AVX512, + "generic-gcm-vaes-avx512", "rfc4106-gcm-vaes-avx512", + AES_GCM_KEY_VAES_AVX512_SIZE, 800); static int __init register_avx_algs(void) { @@ -1548,6 +1615,10 @@ static int __init register_avx_algs(void) ARRAY_SIZE(skcipher_algs_vaes_avx2)); if (err) return err; + err = crypto_register_aeads(aes_gcm_algs_vaes_avx2, + ARRAY_SIZE(aes_gcm_algs_vaes_avx2)); + if (err) + return err; if (!boot_cpu_has(X86_FEATURE_AVX512BW) || !boot_cpu_has(X86_FEATURE_AVX512VL) || @@ -1556,26 +1627,21 @@ static int __init register_avx_algs(void) XFEATURE_MASK_AVX512, NULL)) return 0; - err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_256, - ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256)); - if (err) - return err; - if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { int i; for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++) skcipher_algs_vaes_avx512[i].base.cra_priority = 1; - for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) - aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; + for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx512); i++) + aes_gcm_algs_vaes_avx512[i].base.cra_priority = 1; } err = crypto_register_skciphers(skcipher_algs_vaes_avx512, ARRAY_SIZE(skcipher_algs_vaes_avx512)); if (err) return err; - err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_512, - ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512)); + err = crypto_register_aeads(aes_gcm_algs_vaes_avx512, + ARRAY_SIZE(aes_gcm_algs_vaes_avx512)); if (err) return err; @@ -1595,8 +1661,8 @@ static void unregister_avx_algs(void) unregister_aeads(aes_gcm_algs_aesni_avx); unregister_skciphers(skcipher_algs_vaes_avx2); unregister_skciphers(skcipher_algs_vaes_avx512); - unregister_aeads(aes_gcm_algs_vaes_avx10_256); - unregister_aeads(aes_gcm_algs_vaes_avx10_512); + unregister_aeads(aes_gcm_algs_vaes_avx2); + unregister_aeads(aes_gcm_algs_vaes_avx512); } #else /* CONFIG_X86_64 */ static struct aead_alg aes_gcm_algs_aesni[0]; diff --git a/arch/x86/crypto/polyval-clmulni_asm.S b/arch/x86/crypto/polyval-clmulni_asm.S deleted file mode 100644 index a6ebe4e7dd2b..000000000000 --- a/arch/x86/crypto/polyval-clmulni_asm.S +++ /dev/null @@ -1,321 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright 2021 Google LLC - */ -/* - * This is an efficient implementation of POLYVAL using intel PCLMULQDQ-NI - * instructions. It works on 8 blocks at a time, by precomputing the first 8 - * keys powers h^8, ..., h^1 in the POLYVAL finite field. This precomputation - * allows us to split finite field multiplication into two steps. - * - * In the first step, we consider h^i, m_i as normal polynomials of degree less - * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication - * is simply polynomial multiplication. - * - * In the second step, we compute the reduction of p(x) modulo the finite field - * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1. - * - * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where - * multiplication is finite field multiplication. The advantage is that the - * two-step process only requires 1 finite field reduction for every 8 - * polynomial multiplications. Further parallelism is gained by interleaving the - * multiplications and polynomial reductions. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> - -#define STRIDE_BLOCKS 8 - -#define GSTAR %xmm7 -#define PL %xmm8 -#define PH %xmm9 -#define TMP_XMM %xmm11 -#define LO %xmm12 -#define HI %xmm13 -#define MI %xmm14 -#define SUM %xmm15 - -#define KEY_POWERS %rdi -#define MSG %rsi -#define BLOCKS_LEFT %rdx -#define ACCUMULATOR %rcx -#define TMP %rax - -.section .rodata.cst16.gstar, "aM", @progbits, 16 -.align 16 - -.Lgstar: - .quad 0xc200000000000000, 0xc200000000000000 - -.text - -/* - * Performs schoolbook1_iteration on two lists of 128-bit polynomials of length - * count pointed to by MSG and KEY_POWERS. - */ -.macro schoolbook1 count - .set i, 0 - .rept (\count) - schoolbook1_iteration i 0 - .set i, (i +1) - .endr -.endm - -/* - * Computes the product of two 128-bit polynomials at the memory locations - * specified by (MSG + 16*i) and (KEY_POWERS + 16*i) and XORs the components of - * the 256-bit product into LO, MI, HI. - * - * Given: - * X = [X_1 : X_0] - * Y = [Y_1 : Y_0] - * - * We compute: - * LO += X_0 * Y_0 - * MI += X_0 * Y_1 + X_1 * Y_0 - * HI += X_1 * Y_1 - * - * Later, the 256-bit result can be extracted as: - * [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0] - * This step is done when computing the polynomial reduction for efficiency - * reasons. - * - * If xor_sum == 1, then also XOR the value of SUM into m_0. This avoids an - * extra multiplication of SUM and h^8. - */ -.macro schoolbook1_iteration i xor_sum - movups (16*\i)(MSG), %xmm0 - .if (\i == 0 && \xor_sum == 1) - pxor SUM, %xmm0 - .endif - vpclmulqdq $0x01, (16*\i)(KEY_POWERS), %xmm0, %xmm2 - vpclmulqdq $0x00, (16*\i)(KEY_POWERS), %xmm0, %xmm1 - vpclmulqdq $0x10, (16*\i)(KEY_POWERS), %xmm0, %xmm3 - vpclmulqdq $0x11, (16*\i)(KEY_POWERS), %xmm0, %xmm4 - vpxor %xmm2, MI, MI - vpxor %xmm1, LO, LO - vpxor %xmm4, HI, HI - vpxor %xmm3, MI, MI -.endm - -/* - * Performs the same computation as schoolbook1_iteration, except we expect the - * arguments to already be loaded into xmm0 and xmm1 and we set the result - * registers LO, MI, and HI directly rather than XOR'ing into them. - */ -.macro schoolbook1_noload - vpclmulqdq $0x01, %xmm0, %xmm1, MI - vpclmulqdq $0x10, %xmm0, %xmm1, %xmm2 - vpclmulqdq $0x00, %xmm0, %xmm1, LO - vpclmulqdq $0x11, %xmm0, %xmm1, HI - vpxor %xmm2, MI, MI -.endm - -/* - * Computes the 256-bit polynomial represented by LO, HI, MI. Stores - * the result in PL, PH. - * [PH : PL] = [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0] - */ -.macro schoolbook2 - vpslldq $8, MI, PL - vpsrldq $8, MI, PH - pxor LO, PL - pxor HI, PH -.endm - -/* - * Computes the 128-bit reduction of PH : PL. Stores the result in dest. - * - * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) = - * x^128 + x^127 + x^126 + x^121 + 1. - * - * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the - * product of two 128-bit polynomials in Montgomery form. We need to reduce it - * mod g(x). Also, since polynomials in Montgomery form have an "extra" factor - * of x^128, this product has two extra factors of x^128. To get it back into - * Montgomery form, we need to remove one of these factors by dividing by x^128. - * - * To accomplish both of these goals, we add multiples of g(x) that cancel out - * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low - * bits are zero, the polynomial division by x^128 can be done by right shifting. - * - * Since the only nonzero term in the low 64 bits of g(x) is the constant term, - * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can - * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 + - * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to - * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T - * = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191. - * - * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits - * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1 - * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) * - * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 : - * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0). - * - * So our final computation is: - * T = T_1 : T_0 = g*(x) * P_0 - * V = V_1 : V_0 = g*(x) * (P_1 + T_0) - * p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0 - * - * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0 - * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 : - * T_1 into dest. This allows us to reuse P_1 + T_0 when computing V. - */ -.macro montgomery_reduction dest - vpclmulqdq $0x00, PL, GSTAR, TMP_XMM # TMP_XMM = T_1 : T_0 = P_0 * g*(x) - pshufd $0b01001110, TMP_XMM, TMP_XMM # TMP_XMM = T_0 : T_1 - pxor PL, TMP_XMM # TMP_XMM = P_1 + T_0 : P_0 + T_1 - pxor TMP_XMM, PH # PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1 - pclmulqdq $0x11, GSTAR, TMP_XMM # TMP_XMM = V_1 : V_0 = V = [(P_1 + T_0) * g*(x)] - vpxor TMP_XMM, PH, \dest -.endm - -/* - * Compute schoolbook multiplication for 8 blocks - * m_0h^8 + ... + m_7h^1 - * - * If reduce is set, also computes the montgomery reduction of the - * previous full_stride call and XORs with the first message block. - * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1. - * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0. - */ -.macro full_stride reduce - pxor LO, LO - pxor HI, HI - pxor MI, MI - - schoolbook1_iteration 7 0 - .if \reduce - vpclmulqdq $0x00, PL, GSTAR, TMP_XMM - .endif - - schoolbook1_iteration 6 0 - .if \reduce - pshufd $0b01001110, TMP_XMM, TMP_XMM - .endif - - schoolbook1_iteration 5 0 - .if \reduce - pxor PL, TMP_XMM - .endif - - schoolbook1_iteration 4 0 - .if \reduce - pxor TMP_XMM, PH - .endif - - schoolbook1_iteration 3 0 - .if \reduce - pclmulqdq $0x11, GSTAR, TMP_XMM - .endif - - schoolbook1_iteration 2 0 - .if \reduce - vpxor TMP_XMM, PH, SUM - .endif - - schoolbook1_iteration 1 0 - - schoolbook1_iteration 0 1 - - addq $(8*16), MSG - schoolbook2 -.endm - -/* - * Process BLOCKS_LEFT blocks, where 0 < BLOCKS_LEFT < STRIDE_BLOCKS - */ -.macro partial_stride - mov BLOCKS_LEFT, TMP - shlq $4, TMP - addq $(16*STRIDE_BLOCKS), KEY_POWERS - subq TMP, KEY_POWERS - - movups (MSG), %xmm0 - pxor SUM, %xmm0 - movaps (KEY_POWERS), %xmm1 - schoolbook1_noload - dec BLOCKS_LEFT - addq $16, MSG - addq $16, KEY_POWERS - - test $4, BLOCKS_LEFT - jz .Lpartial4BlocksDone - schoolbook1 4 - addq $(4*16), MSG - addq $(4*16), KEY_POWERS -.Lpartial4BlocksDone: - test $2, BLOCKS_LEFT - jz .Lpartial2BlocksDone - schoolbook1 2 - addq $(2*16), MSG - addq $(2*16), KEY_POWERS -.Lpartial2BlocksDone: - test $1, BLOCKS_LEFT - jz .LpartialDone - schoolbook1 1 -.LpartialDone: - schoolbook2 - montgomery_reduction SUM -.endm - -/* - * Perform montgomery multiplication in GF(2^128) and store result in op1. - * - * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1 - * If op1, op2 are in montgomery form, this computes the montgomery - * form of op1*op2. - * - * void clmul_polyval_mul(u8 *op1, const u8 *op2); - */ -SYM_FUNC_START(clmul_polyval_mul) - FRAME_BEGIN - vmovdqa .Lgstar(%rip), GSTAR - movups (%rdi), %xmm0 - movups (%rsi), %xmm1 - schoolbook1_noload - schoolbook2 - montgomery_reduction SUM - movups SUM, (%rdi) - FRAME_END - RET -SYM_FUNC_END(clmul_polyval_mul) - -/* - * Perform polynomial evaluation as specified by POLYVAL. This computes: - * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1} - * where n=nblocks, h is the hash key, and m_i are the message blocks. - * - * rdi - pointer to precomputed key powers h^8 ... h^1 - * rsi - pointer to message blocks - * rdx - number of blocks to hash - * rcx - pointer to the accumulator - * - * void clmul_polyval_update(const struct polyval_tfm_ctx *keys, - * const u8 *in, size_t nblocks, u8 *accumulator); - */ -SYM_FUNC_START(clmul_polyval_update) - FRAME_BEGIN - vmovdqa .Lgstar(%rip), GSTAR - movups (ACCUMULATOR), SUM - subq $STRIDE_BLOCKS, BLOCKS_LEFT - js .LstrideLoopExit - full_stride 0 - subq $STRIDE_BLOCKS, BLOCKS_LEFT - js .LstrideLoopExitReduce -.LstrideLoop: - full_stride 1 - subq $STRIDE_BLOCKS, BLOCKS_LEFT - jns .LstrideLoop -.LstrideLoopExitReduce: - montgomery_reduction SUM -.LstrideLoopExit: - add $STRIDE_BLOCKS, BLOCKS_LEFT - jz .LskipPartial - partial_stride -.LskipPartial: - movups SUM, (ACCUMULATOR) - FRAME_END - RET -SYM_FUNC_END(clmul_polyval_update) diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c deleted file mode 100644 index 6b466867f91a..000000000000 --- a/arch/x86/crypto/polyval-clmulni_glue.c +++ /dev/null @@ -1,180 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Glue code for POLYVAL using PCMULQDQ-NI - * - * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi> - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * Copyright 2021 Google LLC - */ - -/* - * Glue code based on ghash-clmulni-intel_glue.c. - * - * This implementation of POLYVAL uses montgomery multiplication - * accelerated by PCLMULQDQ-NI to implement the finite field - * operations. - */ - -#include <asm/cpu_device_id.h> -#include <asm/fpu/api.h> -#include <crypto/internal/hash.h> -#include <crypto/polyval.h> -#include <crypto/utils.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#define POLYVAL_ALIGN 16 -#define POLYVAL_ALIGN_ATTR __aligned(POLYVAL_ALIGN) -#define POLYVAL_ALIGN_EXTRA ((POLYVAL_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) -#define POLYVAL_CTX_SIZE (sizeof(struct polyval_tfm_ctx) + POLYVAL_ALIGN_EXTRA) -#define NUM_KEY_POWERS 8 - -struct polyval_tfm_ctx { - /* - * These powers must be in the order h^8, ..., h^1. - */ - u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE] POLYVAL_ALIGN_ATTR; -}; - -struct polyval_desc_ctx { - u8 buffer[POLYVAL_BLOCK_SIZE]; -}; - -asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator); -asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2); - -static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm) -{ - return PTR_ALIGN(crypto_shash_ctx(tfm), POLYVAL_ALIGN); -} - -static void internal_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator) -{ - kernel_fpu_begin(); - clmul_polyval_update(keys, in, nblocks, accumulator); - kernel_fpu_end(); -} - -static void internal_polyval_mul(u8 *op1, const u8 *op2) -{ - kernel_fpu_begin(); - clmul_polyval_mul(op1, op2); - kernel_fpu_end(); -} - -static int polyval_x86_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(tfm); - int i; - - if (keylen != POLYVAL_BLOCK_SIZE) - return -EINVAL; - - memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE); - - for (i = NUM_KEY_POWERS-2; i >= 0; i--) { - memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE); - internal_polyval_mul(tctx->key_powers[i], - tctx->key_powers[i+1]); - } - - return 0; -} - -static int polyval_x86_init(struct shash_desc *desc) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - - memset(dctx, 0, sizeof(*dctx)); - - return 0; -} - -static int polyval_x86_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm); - unsigned int nblocks; - - do { - /* Allow rescheduling every 4K bytes. */ - nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE; - internal_polyval_update(tctx, src, nblocks, dctx->buffer); - srclen -= nblocks * POLYVAL_BLOCK_SIZE; - src += nblocks * POLYVAL_BLOCK_SIZE; - } while (srclen >= POLYVAL_BLOCK_SIZE); - - return srclen; -} - -static int polyval_x86_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *dst) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm); - - if (len) { - crypto_xor(dctx->buffer, src, len); - internal_polyval_mul(dctx->buffer, - tctx->key_powers[NUM_KEY_POWERS-1]); - } - - memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE); - - return 0; -} - -static struct shash_alg polyval_alg = { - .digestsize = POLYVAL_DIGEST_SIZE, - .init = polyval_x86_init, - .update = polyval_x86_update, - .finup = polyval_x86_finup, - .setkey = polyval_x86_setkey, - .descsize = sizeof(struct polyval_desc_ctx), - .base = { - .cra_name = "polyval", - .cra_driver_name = "polyval-clmulni", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = POLYVAL_BLOCK_SIZE, - .cra_ctxsize = POLYVAL_CTX_SIZE, - .cra_module = THIS_MODULE, - }, -}; - -__maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = { - X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); - -static int __init polyval_clmulni_mod_init(void) -{ - if (!x86_match_cpu(pcmul_cpu_id)) - return -ENODEV; - - if (!boot_cpu_has(X86_FEATURE_AVX)) - return -ENODEV; - - return crypto_register_shash(&polyval_alg); -} - -static void __exit polyval_clmulni_mod_exit(void) -{ - crypto_unregister_shash(&polyval_alg); -} - -module_init(polyval_clmulni_mod_init); -module_exit(polyval_clmulni_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI"); -MODULE_ALIAS_CRYPTO("polyval"); -MODULE_ALIAS_CRYPTO("polyval-clmulni"); diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 772c64ed4523..6ba2b3adcef0 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -4,6 +4,7 @@ */ #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/linkage.h> #include <linux/objtool.h> #include <asm/msr-index.h> @@ -29,8 +30,7 @@ SYM_FUNC_START(write_ibpb) FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET RET SYM_FUNC_END(write_ibpb) -/* For KVM */ -EXPORT_SYMBOL_GPL(write_ibpb); +EXPORT_SYMBOL_FOR_KVM(write_ibpb); SYM_FUNC_START(__WARN_trap) ANNOTATE_NOENDBR @@ -56,8 +56,7 @@ SYM_CODE_START_NOALIGN(x86_verw_sel) .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc SYM_CODE_END(x86_verw_sel); -/* For KVM */ -EXPORT_SYMBOL_GPL(x86_verw_sel); +EXPORT_SYMBOL_FOR_KVM(x86_verw_sel); .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ed04a968cc7d..f9983a1907bf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -19,6 +19,7 @@ * - idtentry: Define exception entry points. */ #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/linkage.h> #include <asm/segment.h> #include <asm/cache.h> @@ -1566,5 +1567,5 @@ SYM_FUNC_START(clear_bhb_loop) pop %rbp RET SYM_FUNC_END(clear_bhb_loop) -EXPORT_SYMBOL_GPL(clear_bhb_loop) +EXPORT_SYMBOL_FOR_KVM(clear_bhb_loop) STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index fafbd3e68cb8..894f7f16eb80 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -4,6 +4,7 @@ */ #include <linux/export.h> +#include <linux/kvm_types.h> #include <asm/asm.h> #include <asm/fred.h> @@ -146,5 +147,5 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) RET SYM_FUNC_END(asm_fred_entry_from_kvm) -EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm); +EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm); #endif diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index f004a4dc74c2..94e626cc6a07 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -78,13 +78,13 @@ static noinstr void fred_intx(struct pt_regs *regs) static __always_inline void fred_other(struct pt_regs *regs) { /* The compiler can fold these conditions into a single test */ - if (likely(regs->fred_ss.vector == FRED_SYSCALL && regs->fred_ss.lm)) { + if (likely(regs->fred_ss.vector == FRED_SYSCALL && regs->fred_ss.l)) { regs->orig_ax = regs->ax; regs->ax = -ENOSYS; do_syscall_64(regs, regs->orig_ax); return; } else if (ia32_enabled() && - likely(regs->fred_ss.vector == FRED_SYSENTER && !regs->fred_ss.lm)) { + likely(regs->fred_ss.vector == FRED_SYSENTER && !regs->fred_ss.l)) { regs->orig_ax = regs->ax; regs->ax = -ENOSYS; do_fast_syscall_32(regs); diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 2b15ea17bb7c..a67a644d0cfe 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -274,9 +274,10 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) * fetch EBP before invoking any of the syscall entry work * functions. */ - syscall_enter_from_user_mode_prepare(regs); + enter_from_user_mode(regs); instrumentation_begin(); + local_irq_enable(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { /* diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4877e16da69a..e979a3eac7a3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -475,3 +475,4 @@ 467 i386 open_tree_attr sys_open_tree_attr 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr +470 i386 listns sys_listns diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ced2a1deecd7..8a4ac4841be6 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -394,6 +394,7 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index b20661b8621d..44656d2fb555 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -2,6 +2,7 @@ #include <linux/perf_event.h> #include <linux/jump_label.h> #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/types.h> #include <linux/init.h> #include <linux/slab.h> @@ -763,7 +764,12 @@ static void amd_pmu_enable_all(int added) if (!test_bit(idx, cpuc->active_mask)) continue; - amd_pmu_enable_event(cpuc->events[idx]); + /* + * FIXME: cpuc->events[idx] can become NULL in a subtle race + * condition with NMI->throttle->x86_pmu_stop(). + */ + if (cpuc->events[idx]) + amd_pmu_enable_event(cpuc->events[idx]); } } @@ -1569,7 +1575,7 @@ void amd_pmu_enable_virt(void) /* Reload all events */ amd_pmu_reload_virt(); } -EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); +EXPORT_SYMBOL_FOR_KVM(amd_pmu_enable_virt); void amd_pmu_disable_virt(void) { @@ -1586,4 +1592,4 @@ void amd_pmu_disable_virt(void) /* Reload all events */ amd_pmu_reload_virt(); } -EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); +EXPORT_SYMBOL_FOR_KVM(amd_pmu_disable_virt); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 745caa6c15a3..0c38a31d5fc7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -20,6 +20,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/kdebug.h> +#include <linux/kvm_types.h> #include <linux/sched/mm.h> #include <linux/sched/clock.h> #include <linux/uaccess.h> @@ -554,14 +555,22 @@ static inline int precise_br_compat(struct perf_event *event) return m == b; } -int x86_pmu_max_precise(void) +int x86_pmu_max_precise(struct pmu *pmu) { int precise = 0; - /* Support for constant skid */ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { - precise++; + /* arch PEBS */ + if (x86_pmu.arch_pebs) { + precise = 2; + if (hybrid(pmu, arch_pebs_cap).pdists) + precise++; + + return precise; + } + /* legacy PEBS - support for constant skid */ + precise++; /* Support for IP fixup */ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) precise++; @@ -569,13 +578,14 @@ int x86_pmu_max_precise(void) if (x86_pmu.pebs_prec_dist) precise++; } + return precise; } int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { - int precise = x86_pmu_max_precise(); + int precise = x86_pmu_max_precise(event->pmu); if (event->attr.precise_ip > precise) return -EOPNOTSUPP; @@ -714,7 +724,7 @@ struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data) { return static_call(x86_pmu_guest_get_msrs)(nr, data); } -EXPORT_SYMBOL_GPL(perf_guest_get_msrs); +EXPORT_SYMBOL_FOR_KVM(perf_guest_get_msrs); /* * There may be PMI landing after enabled=0. The PMI hitting could be before or @@ -1344,6 +1354,7 @@ static void x86_pmu_enable(struct pmu *pmu) hwc->state |= PERF_HES_ARCH; x86_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[hwc->idx] = NULL; } /* @@ -1365,6 +1376,7 @@ static void x86_pmu_enable(struct pmu *pmu) * if cpuc->enabled = 0, then no wrmsr as * per x86_pmu_enable_event() */ + cpuc->events[hwc->idx] = event; x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; @@ -1531,7 +1543,6 @@ static void x86_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; - cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); static_call(x86_pmu_enable)(event); perf_event_update_userpage(event); @@ -1610,7 +1621,6 @@ void x86_pmu_stop(struct perf_event *event, int flags) if (test_bit(hwc->idx, cpuc->active_mask)) { static_call(x86_pmu_disable)(event); __clear_bit(hwc->idx, cpuc->active_mask); - cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; } @@ -1648,6 +1658,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) * Not a TXN, therefore cleanup properly. */ x86_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[event->hw.idx] = NULL; for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) @@ -2629,7 +2640,9 @@ static ssize_t max_precise_show(struct device *cdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); + struct pmu *pmu = dev_get_drvdata(cdev); + + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu)); } static DEVICE_ATTR_RO(max_precise); @@ -2789,13 +2802,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_callchain_store(entry, regs->ip)) - return; - - if (perf_hw_regs(regs)) + if (perf_hw_regs(regs)) { + if (perf_callchain_store(entry, regs->ip)) + return; unwind_start(&state, current, regs, NULL); - else + } else { unwind_start(&state, current, NULL, (void *)regs->sp); + } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); @@ -2845,46 +2858,6 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } -#ifdef CONFIG_UPROBES -/* - * Heuristic-based check if uprobe is installed at the function entry. - * - * Under assumption of user code being compiled with frame pointers, - * `push %rbp/%ebp` is a good indicator that we indeed are. - * - * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. - * If we get this wrong, captured stack trace might have one extra bogus - * entry, but the rest of stack trace will still be meaningful. - */ -static bool is_uprobe_at_func_entry(struct pt_regs *regs) -{ - struct arch_uprobe *auprobe; - - if (!current->utask) - return false; - - auprobe = current->utask->auprobe; - if (!auprobe) - return false; - - /* push %rbp/%ebp */ - if (auprobe->insn[0] == 0x55) - return true; - - /* endbr64 (64-bit only) */ - if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) - return true; - - return false; -} - -#else -static bool is_uprobe_at_func_entry(struct pt_regs *regs) -{ - return false; -} -#endif /* CONFIG_UPROBES */ - #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -3106,7 +3079,7 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) cap->events_mask_len = x86_pmu.events_mask_len; cap->pebs_ept = x86_pmu.pebs_ept; } -EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); +EXPORT_SYMBOL_FOR_KVM(perf_get_x86_pmu_capability); u64 perf_get_hw_event_config(int hw_event) { @@ -3117,4 +3090,4 @@ u64 perf_get_hw_event_config(int hw_event) return 0; } -EXPORT_SYMBOL_GPL(perf_get_hw_event_config); +EXPORT_SYMBOL_FOR_KVM(perf_get_hw_event_config); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fe65be0b9d9c..853fe073bab3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2563,6 +2563,44 @@ static void intel_pmu_disable_fixed(struct perf_event *event) cpuc->fixed_ctrl_val &= ~mask; } +static inline void __intel_pmu_update_event_ext(int idx, u64 ext) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u32 msr; + + if (idx < INTEL_PMC_IDX_FIXED) { + msr = MSR_IA32_PMC_V6_GP0_CFG_C + + x86_pmu.addr_offset(idx, false); + } else { + msr = MSR_IA32_PMC_V6_FX0_CFG_C + + x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); + } + + cpuc->cfg_c_val[idx] = ext; + wrmsrq(msr, ext); +} + +static void intel_pmu_disable_event_ext(struct perf_event *event) +{ + /* + * Only clear CFG_C MSR for PEBS counter group events, + * it avoids the HW counter's value to be added into + * other PEBS records incorrectly after PEBS counter + * group events are disabled. + * + * For other events, it's unnecessary to clear CFG_C MSRs + * since CFG_C doesn't take effect if counter is in + * disabled state. That helps to reduce the WRMSR overhead + * in context switches. + */ + if (!is_pebs_counter_event_group(event)) + return; + + __intel_pmu_update_event_ext(event->hw.idx, 0); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_disable_event_ext, intel_pmu_disable_event_ext); + static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -2571,9 +2609,12 @@ static void intel_pmu_disable_event(struct perf_event *event) switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: intel_clear_masks(event, idx); + static_call_cond(intel_pmu_disable_event_ext)(event); x86_pmu_disable_event(event); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + static_call_cond(intel_pmu_disable_event_ext)(event); + fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_disable_fixed(event); break; @@ -2940,6 +2981,79 @@ static void intel_pmu_enable_acr(struct perf_event *event) DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr); +static void intel_pmu_enable_event_ext(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + union arch_pebs_index old, new; + struct arch_pebs_cap cap; + u64 ext = 0; + + cap = hybrid(cpuc->pmu, arch_pebs_cap); + + if (event->attr.precise_ip) { + u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event); + + ext |= ARCH_PEBS_EN; + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) + ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD; + + if (pebs_data_cfg && cap.caps) { + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) + ext |= ARCH_PEBS_AUX & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_GP) + ext |= ARCH_PEBS_GPR & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_XMMS) + ext |= ARCH_PEBS_VECR_XMM & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_LBRS) + ext |= ARCH_PEBS_LBR & cap.caps; + + if (pebs_data_cfg & + (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT)) + ext |= ARCH_PEBS_CNTR_GP & cap.caps; + + if (pebs_data_cfg & + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT)) + ext |= ARCH_PEBS_CNTR_FIXED & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_METRICS) + ext |= ARCH_PEBS_CNTR_METRICS & cap.caps; + } + + if (cpuc->n_pebs == cpuc->n_large_pebs) + new.thresh = ARCH_PEBS_THRESH_MULTI; + else + new.thresh = ARCH_PEBS_THRESH_SINGLE; + + rdmsrq(MSR_IA32_PEBS_INDEX, old.whole); + if (new.thresh != old.thresh || !old.en) { + if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) { + /* + * Large PEBS was enabled. + * Drain PEBS buffer before applying the single PEBS. + */ + intel_pmu_drain_pebs_buffer(); + } else { + new.wr = 0; + new.full = 0; + new.en = 1; + wrmsrq(MSR_IA32_PEBS_INDEX, new.whole); + } + } + } + + if (is_pebs_counter_event_group(event)) + ext |= ARCH_PEBS_CNTR_ALLOW; + + if (cpuc->cfg_c_val[hwc->idx] != ext) + __intel_pmu_update_event_ext(hwc->idx, ext); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_enable_event_ext, intel_pmu_enable_event_ext); + static void intel_pmu_enable_event(struct perf_event *event) { u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; @@ -2955,10 +3069,12 @@ static void intel_pmu_enable_event(struct perf_event *event) enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); static_call_cond(intel_pmu_enable_acr_event)(event); + static_call_cond(intel_pmu_enable_event_ext)(event); __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: static_call_cond(intel_pmu_enable_acr_event)(event); + static_call_cond(intel_pmu_enable_event_ext)(event); fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_enable_fixed(event); @@ -3216,6 +3332,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) } /* + * Arch PEBS sets bit 54 in the global status register + */ + if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT, + (unsigned long *)&status)) { + handled++; + static_call(x86_pmu_drain_pebs)(regs, &data); + + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; + } + + /* * Intel PT */ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { @@ -3269,7 +3398,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * The PEBS buffer has to be drained before handling the A-PMI */ if (is_pebs_counter_event_group(event)) - x86_pmu.drain_pebs(regs, &data); + static_call(x86_pmu_drain_pebs)(regs, &data); last_period = event->hw.last_period; @@ -4029,7 +4158,9 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) if (!event->attr.exclude_kernel) flags &= ~PERF_SAMPLE_REGS_USER; if (event->attr.sample_regs_user & ~PEBS_GP_REGS) - flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_intr & ~PEBS_GP_REGS) + flags &= ~PERF_SAMPLE_REGS_INTR; return flags; } @@ -4204,6 +4335,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event) return false; } +static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu) +{ + u64 caps; + + if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline) + return true; + + caps = hybrid(pmu, arch_pebs_cap).caps; + if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK)) + return true; + + return false; +} + static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event, u64 *cause_mask, int *num) { @@ -4237,6 +4382,8 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (event->attr.precise_ip) { + struct arch_pebs_cap pebs_cap = hybrid(event->pmu, arch_pebs_cap); + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; @@ -4250,6 +4397,15 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); + + if (x86_pmu.arch_pebs) { + u64 cntr_mask = hybrid(event->pmu, intel_ctrl) & + ~GLOBAL_CTRL_EN_PERF_METRICS; + u64 pebs_mask = event->attr.precise_ip >= 3 ? + pebs_cap.pdists : pebs_cap.counters; + if (cntr_mask != pebs_mask) + event->hw.dyn_constraint &= pebs_mask; + } } if (needs_branch_stack(event)) { @@ -4341,8 +4497,7 @@ static int intel_pmu_hw_config(struct perf_event *event) } if ((event->attr.sample_type & PERF_SAMPLE_READ) && - (x86_pmu.intel_cap.pebs_format >= 6) && - x86_pmu.intel_cap.pebs_baseline && + intel_pmu_has_pebs_counter_group(event->pmu) && is_sampling_event(event) && event->attr.precise_ip) event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; @@ -5212,7 +5367,13 @@ err: static int intel_pmu_cpu_prepare(int cpu) { - return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); + int ret; + + ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); + if (ret) + return ret; + + return alloc_arch_pebs_buf_on_cpu(cpu); } static void flip_smm_bit(void *data) @@ -5257,6 +5418,163 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con u64 fixed_cntr_mask, u64 intel_ctrl); +enum dyn_constr_type { + DYN_CONSTR_NONE, + DYN_CONSTR_BR_CNTR, + DYN_CONSTR_ACR_CNTR, + DYN_CONSTR_ACR_CAUSE, + DYN_CONSTR_PEBS, + DYN_CONSTR_PDIST, + + DYN_CONSTR_MAX, +}; + +static const char * const dyn_constr_type_name[] = { + [DYN_CONSTR_NONE] = "a normal event", + [DYN_CONSTR_BR_CNTR] = "a branch counter logging event", + [DYN_CONSTR_ACR_CNTR] = "an auto-counter reload event", + [DYN_CONSTR_ACR_CAUSE] = "an auto-counter reload cause event", + [DYN_CONSTR_PEBS] = "a PEBS event", + [DYN_CONSTR_PDIST] = "a PEBS PDIST event", +}; + +static void __intel_pmu_check_dyn_constr(struct event_constraint *constr, + enum dyn_constr_type type, u64 mask) +{ + struct event_constraint *c1, *c2; + int new_weight, check_weight; + u64 new_mask, check_mask; + + for_each_event_constraint(c1, constr) { + new_mask = c1->idxmsk64 & mask; + new_weight = hweight64(new_mask); + + /* ignore topdown perf metrics event */ + if (c1->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) + continue; + + if (!new_weight && fls64(c1->idxmsk64) < INTEL_PMC_IDX_FIXED) { + pr_info("The event 0x%llx is not supported as %s.\n", + c1->code, dyn_constr_type_name[type]); + } + + if (new_weight <= 1) + continue; + + for_each_event_constraint(c2, c1 + 1) { + bool check_fail = false; + + check_mask = c2->idxmsk64 & mask; + check_weight = hweight64(check_mask); + + if (c2->idxmsk64 & INTEL_PMC_MSK_TOPDOWN || + !check_weight) + continue; + + /* The same constraints or no overlap */ + if (new_mask == check_mask || + (new_mask ^ check_mask) == (new_mask | check_mask)) + continue; + + /* + * A scheduler issue may be triggered in the following cases. + * - Two overlap constraints have the same weight. + * E.g., A constraints: 0x3, B constraints: 0x6 + * event counter failure case + * B PMC[2:1] 1 + * A PMC[1:0] 0 + * A PMC[1:0] FAIL + * - Two overlap constraints have different weight. + * The constraint has a low weight, but has high last bit. + * E.g., A constraints: 0x7, B constraints: 0xC + * event counter failure case + * B PMC[3:2] 2 + * A PMC[2:0] 0 + * A PMC[2:0] 1 + * A PMC[2:0] FAIL + */ + if (new_weight == check_weight) { + check_fail = true; + } else if (new_weight < check_weight) { + if ((new_mask | check_mask) != check_mask && + fls64(new_mask) > fls64(check_mask)) + check_fail = true; + } else { + if ((new_mask | check_mask) != new_mask && + fls64(new_mask) < fls64(check_mask)) + check_fail = true; + } + + if (check_fail) { + pr_info("The two events 0x%llx and 0x%llx may not be " + "fully scheduled under some circumstances as " + "%s.\n", + c1->code, c2->code, dyn_constr_type_name[type]); + } + } + } +} + +static void intel_pmu_check_dyn_constr(struct pmu *pmu, + struct event_constraint *constr, + u64 cntr_mask) +{ + enum dyn_constr_type i; + u64 mask; + + for (i = DYN_CONSTR_NONE; i < DYN_CONSTR_MAX; i++) { + mask = 0; + switch (i) { + case DYN_CONSTR_NONE: + mask = cntr_mask; + break; + case DYN_CONSTR_BR_CNTR: + if (x86_pmu.flags & PMU_FL_BR_CNTR) + mask = x86_pmu.lbr_counters; + break; + case DYN_CONSTR_ACR_CNTR: + mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); + break; + case DYN_CONSTR_ACR_CAUSE: + if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64)) + continue; + mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); + break; + case DYN_CONSTR_PEBS: + if (x86_pmu.arch_pebs) + mask = hybrid(pmu, arch_pebs_cap).counters; + break; + case DYN_CONSTR_PDIST: + if (x86_pmu.arch_pebs) + mask = hybrid(pmu, arch_pebs_cap).pdists; + break; + default: + pr_warn("Unsupported dynamic constraint type %d\n", i); + } + + if (mask) + __intel_pmu_check_dyn_constr(constr, i, mask); + } +} + +static void intel_pmu_check_event_constraints_all(struct pmu *pmu) +{ + struct event_constraint *event_constraints = hybrid(pmu, event_constraints); + struct event_constraint *pebs_constraints = hybrid(pmu, pebs_constraints); + u64 cntr_mask = hybrid(pmu, cntr_mask64); + u64 fixed_cntr_mask = hybrid(pmu, fixed_cntr_mask64); + u64 intel_ctrl = hybrid(pmu, intel_ctrl); + + intel_pmu_check_event_constraints(event_constraints, cntr_mask, + fixed_cntr_mask, intel_ctrl); + + if (event_constraints) + intel_pmu_check_dyn_constr(pmu, event_constraints, cntr_mask); + + if (pebs_constraints) + intel_pmu_check_dyn_constr(pmu, pebs_constraints, cntr_mask); +} + static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); static inline bool intel_pmu_broken_perf_cap(void) @@ -5269,34 +5587,89 @@ static inline bool intel_pmu_broken_perf_cap(void) return false; } +static inline void __intel_update_pmu_caps(struct pmu *pmu) +{ + struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id()); + + if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM) + dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; +} + +static inline void __intel_update_large_pebs_flags(struct pmu *pmu) +{ + u64 caps = hybrid(pmu, arch_pebs_cap).caps; + + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; + if (caps & ARCH_PEBS_LBR) + x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK; + if (caps & ARCH_PEBS_CNTR_MASK) + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; + + if (!(caps & ARCH_PEBS_AUX)) + x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC; + if (!(caps & ARCH_PEBS_GPR)) { + x86_pmu.large_pebs_flags &= + ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER); + } +} + +#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED)) + static void update_pmu_cap(struct pmu *pmu) { - unsigned int cntr, fixed_cntr, ecx, edx; - union cpuid35_eax eax; - union cpuid35_ebx ebx; + unsigned int eax, ebx, ecx, edx; + union cpuid35_eax eax_0; + union cpuid35_ebx ebx_0; + u64 cntrs_mask = 0; + u64 pebs_mask = 0; + u64 pdists_mask = 0; - cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); + cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx); - if (ebx.split.umask2) + if (ebx_0.split.umask2) hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; - if (ebx.split.eq) + if (ebx_0.split.eq) hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; - if (eax.split.cntr_subleaf) { + if (eax_0.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, - &cntr, &fixed_cntr, &ecx, &edx); - hybrid(pmu, cntr_mask64) = cntr; - hybrid(pmu, fixed_cntr_mask64) = fixed_cntr; + &eax, &ebx, &ecx, &edx); + hybrid(pmu, cntr_mask64) = eax; + hybrid(pmu, fixed_cntr_mask64) = ebx; + cntrs_mask = counter_mask(eax, ebx); } - if (eax.split.acr_subleaf) { + if (eax_0.split.acr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, - &cntr, &fixed_cntr, &ecx, &edx); + &eax, &ebx, &ecx, &edx); /* The mask of the counters which can be reloaded */ - hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); - + hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx); /* The mask of the counters which can cause a reload of reloadable counters */ - hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); + hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx); + } + + /* Bits[5:4] should be set simultaneously if arch-PEBS is supported */ + if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF, + &eax, &ebx, &ecx, &edx); + hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32; + + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF, + &eax, &ebx, &ecx, &edx); + pebs_mask = counter_mask(eax, ecx); + pdists_mask = counter_mask(ebx, edx); + hybrid(pmu, arch_pebs_cap).counters = pebs_mask; + hybrid(pmu, arch_pebs_cap).pdists = pdists_mask; + + if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) { + x86_pmu.arch_pebs = 0; + } else { + __intel_update_pmu_caps(pmu); + __intel_update_large_pebs_flags(pmu); + } + } else { + WARN_ON(x86_pmu.arch_pebs == 1); + x86_pmu.arch_pebs = 0; } if (!intel_pmu_broken_perf_cap()) { @@ -5319,10 +5692,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) else pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS; - intel_pmu_check_event_constraints(pmu->event_constraints, - pmu->cntr_mask64, - pmu->fixed_cntr_mask64, - pmu->intel_ctrl); + intel_pmu_check_event_constraints_all(&pmu->pmu); intel_pmu_check_extra_regs(pmu->extra_regs); } @@ -5418,6 +5788,7 @@ static void intel_pmu_cpu_starting(int cpu) return; init_debug_store_on_cpu(cpu); + init_arch_pebs_on_cpu(cpu); /* * Deal with CPUs that don't clear their LBRs on power-up, and that may * even boot with LBRs enabled. @@ -5456,6 +5827,8 @@ static void intel_pmu_cpu_starting(int cpu) } } + __intel_update_pmu_caps(cpuc->pmu); + if (!cpuc->shared_regs) return; @@ -5515,6 +5888,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc) static void intel_pmu_cpu_dying(int cpu) { fini_debug_store_on_cpu(cpu); + fini_arch_pebs_on_cpu(cpu); } void intel_cpuc_finish(struct cpu_hw_events *cpuc) @@ -5535,6 +5909,7 @@ static void intel_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + release_arch_pebs_buf_on_cpu(cpu); intel_cpuc_finish(cpuc); if (is_hybrid() && cpuc->pmu) @@ -6250,7 +6625,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) static umode_t pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.ds_pebs ? attr->mode : 0; + return intel_pmu_has_pebs() ? attr->mode : 0; } static umode_t @@ -6940,8 +7315,11 @@ __init int intel_pmu_init(void) * Many features on and after V6 require dynamic constraint, * e.g., Arch PEBS, ACR. */ - if (version >= 6) + if (version >= 6) { x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT; + x86_pmu.late_setup = intel_pmu_late_setup; + } + /* * Install the hw-cache-events table: */ @@ -7727,6 +8105,14 @@ __init int intel_pmu_init(void) if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) update_pmu_cap(NULL); + if (x86_pmu.arch_pebs) { + static_call_update(intel_pmu_disable_event_ext, + intel_pmu_disable_event_ext); + static_call_update(intel_pmu_enable_event_ext, + intel_pmu_enable_event_ext); + pr_cont("Architectural PEBS, "); + } + intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, &x86_pmu.fixed_cntr_mask64, &x86_pmu.intel_ctrl); @@ -7735,10 +8121,8 @@ __init int intel_pmu_init(void) if (x86_pmu.intel_cap.anythread_deprecated) x86_pmu.format_attrs = intel_arch_formats_attr; - intel_pmu_check_event_constraints(x86_pmu.event_constraints, - x86_pmu.cntr_mask64, - x86_pmu.fixed_cntr_mask64, - x86_pmu.intel_ctrl); + intel_pmu_check_event_constraints_all(NULL); + /* * Access LBR MSR may cause #GP under certain circumstances. * Check all LBR MSR here. diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index ec753e39b007..fa67fda6e45b 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,7 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL - * MTL,SRF,GRR,ARL,LNL + * MTL,SRF,GRR,ARL,LNL,PTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -53,31 +53,32 @@ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, - * GRR,ARL,LNL + * GRR,ARL,LNL,PTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL + * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL, + * PTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR,MTL,ARL,LNL,SRF + * RPL,SPR,MTL,ARL,LNL,SRF,PTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL,MTL,ARL,LNL + * ADL,RPL,MTL,ARL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, - * ARL,LNL + * ARL,LNL,PTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -96,7 +97,7 @@ * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL,MTL,ARL,LNL + * TNT,RKL,ADL,RPL,MTL,ARL,LNL,PTL * Scope: Package (physical package) * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter. * perf code: 0x00 @@ -522,7 +523,6 @@ static const struct cstate_model lnl_cstates __initconst = { BIT(PERF_CSTATE_CORE_C7_RES), .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | - BIT(PERF_CSTATE_PKG_C3_RES) | BIT(PERF_CSTATE_PKG_C6_RES) | BIT(PERF_CSTATE_PKG_C10_RES), }; @@ -628,6 +628,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_cstates), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &srf_cstates), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &grr_cstates), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &srf_cstates), X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_cstates), X86_MATCH_VFM(INTEL_ICELAKE, &icl_cstates), @@ -652,6 +653,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates), X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates), + X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &lnl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 01bc59e9286c..feb1c3cf63e4 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -626,13 +626,18 @@ static int alloc_pebs_buffer(int cpu) int max, node = cpu_to_node(cpu); void *buffer, *insn_buff, *cea; - if (!x86_pmu.ds_pebs) + if (!intel_pmu_has_pebs()) return 0; buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; + if (x86_pmu.arch_pebs) { + hwev->pebs_vaddr = buffer; + return 0; + } + /* * HSW+ already provides us the eventing ip; no need to allocate this * buffer then. @@ -645,7 +650,7 @@ static int alloc_pebs_buffer(int cpu) } per_cpu(insn_buffer, cpu) = insn_buff; } - hwev->ds_pebs_vaddr = buffer; + hwev->pebs_vaddr = buffer; /* Update the cpu entry area mapping */ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; ds->pebs_buffer_base = (unsigned long) cea; @@ -661,17 +666,20 @@ static void release_pebs_buffer(int cpu) struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); void *cea; - if (!x86_pmu.ds_pebs) + if (!intel_pmu_has_pebs()) return; - kfree(per_cpu(insn_buffer, cpu)); - per_cpu(insn_buffer, cpu) = NULL; + if (x86_pmu.ds_pebs) { + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; - /* Clear the fixmap */ - cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; - ds_clear_cea(cea, x86_pmu.pebs_buffer_size); - dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); - hwev->ds_pebs_vaddr = NULL; + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); + } + + dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) @@ -824,6 +832,56 @@ void reserve_ds_buffers(void) } } +inline int alloc_arch_pebs_buf_on_cpu(int cpu) +{ + if (!x86_pmu.arch_pebs) + return 0; + + return alloc_pebs_buffer(cpu); +} + +inline void release_arch_pebs_buf_on_cpu(int cpu) +{ + if (!x86_pmu.arch_pebs) + return; + + release_pebs_buffer(cpu); +} + +void init_arch_pebs_on_cpu(int cpu) +{ + struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + u64 arch_pebs_base; + + if (!x86_pmu.arch_pebs) + return; + + if (!cpuc->pebs_vaddr) { + WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu); + x86_pmu.pebs_active = 0; + return; + } + + /* + * 4KB-aligned pointer of the output buffer + * (__alloc_pages_node() return page aligned address) + * Buffer Size = 4KB * 2^SIZE + * contiguous physical buffer (__alloc_pages_node() with order) + */ + arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT; + wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base, + (u32)(arch_pebs_base >> 32)); + x86_pmu.pebs_active = 1; +} + +inline void fini_arch_pebs_on_cpu(int cpu) +{ + if (!x86_pmu.arch_pebs) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0); +} + /* * BTS */ @@ -1471,6 +1529,25 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, } } +u64 intel_get_arch_pebs_data_config(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 pebs_data_cfg = 0; + u64 cntr_mask; + + if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX)) + return 0; + + pebs_data_cfg |= pebs_update_adaptive_cfg(event); + + cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) | + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) | + PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS; + pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask; + + return pebs_data_cfg; +} + void intel_pmu_pebs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1532,6 +1609,15 @@ static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc) intel_pmu_drain_pebs_buffer(); } +static void __intel_pmu_pebs_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + cpuc->pebs_enabled |= 1ULL << hwc->idx; +} + void intel_pmu_pebs_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1540,9 +1626,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct debug_store *ds = cpuc->ds; unsigned int idx = hwc->idx; - hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - - cpuc->pebs_enabled |= 1ULL << hwc->idx; + __intel_pmu_pebs_enable(event); if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); @@ -1604,14 +1688,22 @@ void intel_pmu_pebs_del(struct perf_event *event) pebs_update_state(needed_cb, cpuc, event, false); } -void intel_pmu_pebs_disable(struct perf_event *event) +static void __intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; intel_pmu_drain_large_pebs(cpuc); - cpuc->pebs_enabled &= ~(1ULL << hwc->idx); + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; +} + +void intel_pmu_pebs_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + __intel_pmu_pebs_disable(event); if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) @@ -1623,8 +1715,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) if (cpuc->enabled) wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); - - hwc->config |= ARCH_PERFMON_EVENTSEL_INT; } void intel_pmu_pebs_enable_all(void) @@ -2060,6 +2150,90 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc, #define PEBS_LATENCY_MASK 0xffff +static inline void __setup_perf_sample_data(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data) +{ + perf_sample_data_init(data, 0, event->hw.last_period); + + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happened between the record and + * PMI. + */ + perf_sample_save_callchain(data, event, iregs); +} + +static inline void __setup_pebs_basic_group(struct perf_event *event, + struct pt_regs *regs, + struct perf_sample_data *data, + u64 sample_type, u64 ip, + u64 tsc, u16 retire) +{ + /* The ip in basic is EventingIP */ + set_linear_ip(regs, ip); + regs->flags = PERF_EFLAGS_EXACT; + setup_pebs_time(event, data, tsc); + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + data->weight.var3_w = retire; +} + +static inline void __setup_pebs_gpr_group(struct perf_event *event, + struct pt_regs *regs, + struct pebs_gprs *gprs, + u64 sample_type) +{ + if (event->attr.precise_ip < 2) { + set_linear_ip(regs, gprs->ip); + regs->flags &= ~PERF_EFLAGS_EXACT; + } + + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) + adaptive_pebs_save_regs(regs, gprs); +} + +static inline void __setup_pebs_meminfo_group(struct perf_event *event, + struct perf_sample_data *data, + u64 sample_type, u64 latency, + u16 instr_latency, u64 address, + u64 aux, u64 tsx_tuning, u64 ax) +{ + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + u64 tsx_latency = intel_get_tsx_weight(tsx_tuning); + + data->weight.var2_w = instr_latency; + + /* + * Although meminfo::latency is defined as a u64, + * only the lower 32 bits include the valid data + * in practice on Ice Lake and earlier platforms. + */ + if (sample_type & PERF_SAMPLE_WEIGHT) + data->weight.full = latency ?: tsx_latency; + else + data->weight.var1_dw = (u32)latency ?: tsx_latency; + + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + if (sample_type & PERF_SAMPLE_DATA_SRC) { + data->data_src.val = get_data_src(event, aux); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } + + if (sample_type & PERF_SAMPLE_ADDR_TYPE) { + data->addr = address; + data->sample_flags |= PERF_SAMPLE_ADDR; + } + + if (sample_type & PERF_SAMPLE_TRANSACTION) { + data->txn = intel_get_tsx_transaction(tsx_tuning, ax); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } +} + /* * With adaptive PEBS the layout depends on what fields are configured. */ @@ -2069,12 +2243,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 sample_type = event->attr.sample_type; struct pebs_basic *basic = __pebs; void *next_record = basic + 1; - u64 sample_type, format_group; struct pebs_meminfo *meminfo = NULL; struct pebs_gprs *gprs = NULL; struct x86_perf_regs *perf_regs; + u64 format_group; + u16 retire; if (basic == NULL) return; @@ -2082,31 +2258,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_regs = container_of(regs, struct x86_perf_regs, regs); perf_regs->xmm_regs = NULL; - sample_type = event->attr.sample_type; format_group = basic->format_group; - perf_sample_data_init(data, 0, event->hw.last_period); - setup_pebs_time(event, data, basic->tsc); - - /* - * We must however always use iregs for the unwinder to stay sane; the - * record BP,SP,IP can point into thin air when the record is from a - * previous PMI context or an (I)RET happened between the record and - * PMI. - */ - perf_sample_save_callchain(data, event, iregs); + __setup_perf_sample_data(event, iregs, data); *regs = *iregs; - /* The ip in basic is EventingIP */ - set_linear_ip(regs, basic->ip); - regs->flags = PERF_EFLAGS_EXACT; - if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { - if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY) - data->weight.var3_w = basic->retire_latency; - else - data->weight.var3_w = 0; - } + /* basic group */ + retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ? + basic->retire_latency : 0; + __setup_pebs_basic_group(event, regs, data, sample_type, + basic->ip, basic->tsc, retire); /* * The record for MEMINFO is in front of GP @@ -2122,54 +2284,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, gprs = next_record; next_record = gprs + 1; - if (event->attr.precise_ip < 2) { - set_linear_ip(regs, gprs->ip); - regs->flags &= ~PERF_EFLAGS_EXACT; - } - - if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) - adaptive_pebs_save_regs(regs, gprs); + __setup_pebs_gpr_group(event, regs, gprs, sample_type); } if (format_group & PEBS_DATACFG_MEMINFO) { - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { - u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? - meminfo->cache_latency : meminfo->mem_latency; - - if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) - data->weight.var2_w = meminfo->instr_latency; - - /* - * Although meminfo::latency is defined as a u64, - * only the lower 32 bits include the valid data - * in practice on Ice Lake and earlier platforms. - */ - if (sample_type & PERF_SAMPLE_WEIGHT) { - data->weight.full = latency ?: - intel_get_tsx_weight(meminfo->tsx_tuning); - } else { - data->weight.var1_dw = (u32)latency ?: - intel_get_tsx_weight(meminfo->tsx_tuning); - } - - data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; - } - - if (sample_type & PERF_SAMPLE_DATA_SRC) { - data->data_src.val = get_data_src(event, meminfo->aux); - data->sample_flags |= PERF_SAMPLE_DATA_SRC; - } - - if (sample_type & PERF_SAMPLE_ADDR_TYPE) { - data->addr = meminfo->address; - data->sample_flags |= PERF_SAMPLE_ADDR; - } - - if (sample_type & PERF_SAMPLE_TRANSACTION) { - data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, - gprs ? gprs->ax : 0); - data->sample_flags |= PERF_SAMPLE_TRANSACTION; - } + u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? + meminfo->cache_latency : meminfo->mem_latency; + u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? + meminfo->instr_latency : 0; + u64 ax = gprs ? gprs->ax : 0; + + __setup_pebs_meminfo_group(event, data, sample_type, latency, + instr_latency, meminfo->address, + meminfo->aux, meminfo->tsx_tuning, + ax); } if (format_group & PEBS_DATACFG_XMMS) { @@ -2220,6 +2348,135 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, format_group); } +static inline bool arch_pebs_record_continued(struct arch_pebs_header *header) +{ + /* Continue bit or null PEBS record indicates fragment follows. */ + return header->cont || !(header->format & GENMASK_ULL(63, 16)); +} + +static void setup_arch_pebs_sample_data(struct perf_event *event, + struct pt_regs *iregs, + void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 sample_type = event->attr.sample_type; + struct arch_pebs_header *header = NULL; + struct arch_pebs_aux *meminfo = NULL; + struct arch_pebs_gprs *gprs = NULL; + struct x86_perf_regs *perf_regs; + void *next_record; + void *at = __pebs; + + if (at == NULL) + return; + + perf_regs = container_of(regs, struct x86_perf_regs, regs); + perf_regs->xmm_regs = NULL; + + __setup_perf_sample_data(event, iregs, data); + + *regs = *iregs; + +again: + header = at; + next_record = at + sizeof(struct arch_pebs_header); + if (header->basic) { + struct arch_pebs_basic *basic = next_record; + u16 retire = 0; + + next_record = basic + 1; + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + retire = basic->valid ? basic->retire : 0; + __setup_pebs_basic_group(event, regs, data, sample_type, + basic->ip, basic->tsc, retire); + } + + /* + * The record for MEMINFO is in front of GP + * But PERF_SAMPLE_TRANSACTION needs gprs->ax. + * Save the pointer here but process later. + */ + if (header->aux) { + meminfo = next_record; + next_record = meminfo + 1; + } + + if (header->gpr) { + gprs = next_record; + next_record = gprs + 1; + + __setup_pebs_gpr_group(event, regs, + (struct pebs_gprs *)gprs, + sample_type); + } + + if (header->aux) { + u64 ax = gprs ? gprs->ax : 0; + + __setup_pebs_meminfo_group(event, data, sample_type, + meminfo->cache_latency, + meminfo->instr_latency, + meminfo->address, meminfo->aux, + meminfo->tsx_tuning, ax); + } + + if (header->xmm) { + struct pebs_xmm *xmm; + + next_record += sizeof(struct arch_pebs_xer_header); + + xmm = next_record; + perf_regs->xmm_regs = xmm->xmm; + next_record = xmm + 1; + } + + if (header->lbr) { + struct arch_pebs_lbr_header *lbr_header = next_record; + struct lbr_entry *lbr; + int num_lbr; + + next_record = lbr_header + 1; + lbr = next_record; + + num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ? + lbr_header->depth : + header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES; + next_record += num_lbr * sizeof(struct lbr_entry); + + if (has_branch_stack(event)) { + intel_pmu_store_pebs_lbrs(lbr); + intel_pmu_lbr_save_brstack(data, cpuc, event); + } + } + + if (header->cntr) { + struct arch_pebs_cntr_header *cntr = next_record; + unsigned int nr; + + next_record += sizeof(struct arch_pebs_cntr_header); + + if (is_pebs_counter_event_group(event)) { + __setup_pebs_counter_group(cpuc, event, + (struct pebs_cntr_header *)cntr, next_record); + data->sample_flags |= PERF_SAMPLE_READ; + } + + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed); + if (cntr->metrics == INTEL_CNTR_METRICS) + nr += 2; + next_record += nr * sizeof(u64); + } + + /* Parse followed fragments if there are. */ + if (arch_pebs_record_continued(header)) { + at = at + header->size; + goto again; + } +} + static inline void * get_next_pebs_record_by_bit(void *base, void *top, int bit) { @@ -2602,6 +2859,57 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d } } +static __always_inline void +__intel_pmu_handle_pebs_record(struct pt_regs *iregs, + struct pt_regs *regs, + struct perf_sample_data *data, + void *at, u64 pebs_status, + short *counts, void **last, + setup_fn setup_sample) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event; + int bit; + + for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) { + event = cpuc->events[bit]; + + if (WARN_ON_ONCE(!event) || + WARN_ON_ONCE(!event->attr.precise_ip)) + continue; + + if (counts[bit]++) { + __intel_pmu_pebs_event(event, iregs, regs, data, + last[bit], setup_sample); + } + + last[bit] = at; + } +} + +static __always_inline void +__intel_pmu_handle_last_pebs_record(struct pt_regs *iregs, + struct pt_regs *regs, + struct perf_sample_data *data, + u64 mask, short *counts, void **last, + setup_fn setup_sample) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event; + int bit; + + for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) { + if (!counts[bit]) + continue; + + event = cpuc->events[bit]; + + __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit], + counts[bit], setup_sample); + } + +} + static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; @@ -2611,9 +2919,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; struct pebs_basic *basic; - struct perf_event *event; void *base, *at, *top; - int bit; u64 mask; if (!x86_pmu.pebs_active) @@ -2626,6 +2932,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d mask = hybrid(cpuc->pmu, pebs_events_mask) | (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED); + mask &= cpuc->pebs_enabled; if (unlikely(base >= top)) { intel_pmu_pebs_event_update_no_drain(cpuc, mask); @@ -2643,38 +2950,114 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d if (basic->format_size != cpuc->pebs_record_size) continue; - pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask; - for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) { - event = cpuc->events[bit]; + pebs_status = mask & basic->applicable_counters; + __intel_pmu_handle_pebs_record(iregs, regs, data, at, + pebs_status, counts, last, + setup_pebs_adaptive_sample_data); + } - if (WARN_ON_ONCE(!event) || - WARN_ON_ONCE(!event->attr.precise_ip)) - continue; + __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last, + setup_pebs_adaptive_sample_data); +} - if (counts[bit]++) { - __intel_pmu_pebs_event(event, iregs, regs, data, last[bit], - setup_pebs_adaptive_sample_data); - } - last[bit] = at; - } +static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs, + struct perf_sample_data *data) +{ + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS]; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union arch_pebs_index index; + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; + void *base, *at, *top; + u64 mask; + + rdmsrq(MSR_IA32_PEBS_INDEX, index.whole); + + if (unlikely(!index.wr)) { + intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX); + return; } - for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) { - if (!counts[bit]) + base = cpuc->pebs_vaddr; + top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT); + + index.wr = 0; + index.full = 0; + index.en = 1; + if (cpuc->n_pebs == cpuc->n_large_pebs) + index.thresh = ARCH_PEBS_THRESH_MULTI; + else + index.thresh = ARCH_PEBS_THRESH_SINGLE; + wrmsrq(MSR_IA32_PEBS_INDEX, index.whole); + + mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled; + + if (!iregs) + iregs = &dummy_iregs; + + /* Process all but the last event for each counter. */ + for (at = base; at < top;) { + struct arch_pebs_header *header; + struct arch_pebs_basic *basic; + u64 pebs_status; + + header = at; + + if (WARN_ON_ONCE(!header->size)) + break; + + /* 1st fragment or single record must have basic group */ + if (!header->basic) { + at += header->size; continue; + } - event = cpuc->events[bit]; + basic = at + sizeof(struct arch_pebs_header); + pebs_status = mask & basic->applicable_counters; + __intel_pmu_handle_pebs_record(iregs, regs, data, at, + pebs_status, counts, last, + setup_arch_pebs_sample_data); + + /* Skip non-last fragments */ + while (arch_pebs_record_continued(header)) { + if (!header->size) + break; + at += header->size; + header = at; + } - __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit], - counts[bit], setup_pebs_adaptive_sample_data); + /* Skip last fragment or the single record */ + at += header->size; } + + __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, + counts, last, + setup_arch_pebs_sample_data); +} + +static void __init intel_arch_pebs_init(void) +{ + /* + * Current hybrid platforms always both support arch-PEBS or not + * on all kinds of cores. So directly set x86_pmu.arch_pebs flag + * if boot cpu supports arch-PEBS. + */ + x86_pmu.arch_pebs = 1; + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; + x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs; + x86_pmu.pebs_capable = ~0ULL; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + + x86_pmu.pebs_enable = __intel_pmu_pebs_enable; + x86_pmu.pebs_disable = __intel_pmu_pebs_disable; } /* * PEBS probe and setup */ -void __init intel_pebs_init(void) +static void __init intel_ds_pebs_init(void) { /* * No support for 32bit formats @@ -2736,10 +3119,8 @@ void __init intel_pebs_init(void) break; case 6: - if (x86_pmu.intel_cap.pebs_baseline) { + if (x86_pmu.intel_cap.pebs_baseline) x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; - x86_pmu.late_setup = intel_pmu_late_setup; - } fallthrough; case 5: x86_pmu.pebs_ept = 1; @@ -2789,6 +3170,14 @@ void __init intel_pebs_init(void) } } +void __init intel_pebs_init(void) +{ + if (x86_pmu.intel_cap.pebs_format == 0xf) + intel_arch_pebs_init(); + else + intel_ds_pebs_init(); +} + void perf_restore_debug_store(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 7aa59966e7c3..72f2adcda7c6 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/kvm_types.h> #include <linux/perf_event.h> #include <linux/types.h> @@ -1705,7 +1706,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->info = x86_pmu.lbr_info; lbr->has_callstack = x86_pmu_has_lbr_callstack(); } -EXPORT_SYMBOL_GPL(x86_perf_get_lbr); +EXPORT_SYMBOL_FOR_KVM(x86_perf_get_lbr); struct event_constraint vlbr_constraint = __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index e8cf29d2b10c..44524a387c58 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -17,6 +17,7 @@ #include <linux/limits.h> #include <linux/slab.h> #include <linux/device.h> +#include <linux/kvm_types.h> #include <asm/cpuid/api.h> #include <asm/perf_event.h> @@ -82,13 +83,13 @@ u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) return (c & cd->mask) >> shift; } -EXPORT_SYMBOL_GPL(intel_pt_validate_cap); +EXPORT_SYMBOL_FOR_KVM(intel_pt_validate_cap); u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return intel_pt_validate_cap(pt_pmu.caps, cap); } -EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap); +EXPORT_SYMBOL_FOR_KVM(intel_pt_validate_hw_cap); static ssize_t pt_cap_show(struct device *cdev, struct device_attribute *attr, @@ -1590,7 +1591,7 @@ void intel_pt_handle_vmx(int on) local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); +EXPORT_SYMBOL_FOR_KVM(intel_pt_handle_vmx); /* * PMU callbacks diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d6c945cc5d07..e228e564b15e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1325,8 +1325,6 @@ static void uncore_pci_sub_driver_init(void) continue; pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; - if (!pmu) - continue; if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) continue; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2b969386dcdd..3161ec0a3416 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -283,8 +283,9 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; - void *ds_pebs_vaddr; void *ds_bts_vaddr; + /* DS based PEBS or arch-PEBS buffer address */ + void *pebs_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; @@ -303,6 +304,8 @@ struct cpu_hw_events { /* Intel ACR configuration */ u64 acr_cfg_b[X86_PMC_IDX_MAX]; u64 acr_cfg_c[X86_PMC_IDX_MAX]; + /* Cached CFG_C values */ + u64 cfg_c_val[X86_PMC_IDX_MAX]; /* * Intel LBR bits @@ -708,6 +711,12 @@ enum hybrid_pmu_type { hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; +struct arch_pebs_cap { + u64 caps; + u64 counters; + u64 pdists; +}; + struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -752,6 +761,8 @@ struct x86_hybrid_pmu { mid_ack :1, enabled_ack :1; + struct arch_pebs_cap arch_pebs_cap; + u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX]; }; @@ -906,7 +917,7 @@ struct x86_pmu { union perf_capabilities intel_cap; /* - * Intel DebugStore bits + * Intel DebugStore and PEBS bits */ unsigned int bts :1, bts_active :1, @@ -917,7 +928,8 @@ struct x86_pmu { pebs_no_tlb :1, pebs_no_isolation :1, pebs_block :1, - pebs_ept :1; + pebs_ept :1, + arch_pebs :1; int pebs_record_size; int pebs_buffer_size; u64 pebs_events_mask; @@ -930,6 +942,11 @@ struct x86_pmu { u64 pebs_capable; /* + * Intel Architectural PEBS + */ + struct arch_pebs_cap arch_pebs_cap; + + /* * Intel LBR */ unsigned int lbr_tos, lbr_from, lbr_to, @@ -1124,7 +1141,6 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ .pmu_type = _pmu, \ } -int is_x86_event(struct perf_event *event); struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; @@ -1217,7 +1233,7 @@ int x86_reserve_hardware(void); void x86_release_hardware(void); -int x86_pmu_max_precise(void); +int x86_pmu_max_precise(struct pmu *pmu); void hw_perf_lbr_event_destroy(struct perf_event *event); @@ -1604,6 +1620,14 @@ extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); int intel_pmu_init(void); +int alloc_arch_pebs_buf_on_cpu(int cpu); + +void release_arch_pebs_buf_on_cpu(int cpu); + +void init_arch_pebs_on_cpu(int cpu); + +void fini_arch_pebs_on_cpu(int cpu); + void init_debug_store_on_cpu(int cpu); void fini_debug_store_on_cpu(int cpu); @@ -1760,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void); void intel_pmu_pebs_data_source_lnl(void); +u64 intel_get_arch_pebs_data_config(struct perf_event *event); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); @@ -1792,6 +1818,11 @@ static inline int intel_pmu_max_num_pebs(struct pmu *pmu) return fls((u32)hybrid(pmu, pebs_events_mask)); } +static inline bool intel_pmu_has_pebs(void) +{ + return x86_pmu.ds_pebs || x86_pmu.arch_pebs; +} + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index fc5f32d4da6e..4b1a6ade1700 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -30,7 +30,7 @@ enum cpuid_leafs CPUID_6_EAX, CPUID_8000_000A_EDX, CPUID_7_ECX, - CPUID_8000_0007_EBX, + CPUID_LNX_6, CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4091a776e37a..d90ce601917c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -314,13 +314,14 @@ #define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_LASS (12*32+ 6) /* "lass" Linear Address Space Separation */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ #define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ #define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ #define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ #define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ #define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */ #define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ @@ -407,9 +408,12 @@ #define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ #define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ -/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ +/* + * Linux-defined word for use with scattered/synthetic bits. + */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ #define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ + #define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ @@ -499,6 +503,9 @@ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ #define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ #define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ +#define X86_FEATURE_SGX_EUPDATESVN (21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */ + +#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */ /* * BUG word(s) diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 12b34d5b2953..2bb65677c079 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -79,7 +79,7 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int .type = type, .vector = vector, .nmi = type == EVENT_TYPE_NMI, - .lm = 1, + .l = 1, }; asm_fred_entry_from_kvm(ss); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 93156ac4ffe0..b08c95872eed 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,11 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return &arch_ftrace_regs(fregs)->regs; } +#define arch_ftrace_partial_regs(regs) do { \ + regs->flags &= ~X86_EFLAGS_FIXED; \ + regs->cs = __KERNEL_CS; \ +} while (0) + #define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 6e2458088800..fe5d9a10d900 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -46,38 +46,31 @@ do { \ } while(0) static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) + u32 __user *uaddr) { - if (can_do_masked_user_access()) - uaddr = masked_user_access_begin(uaddr); - else if (!user_access_begin(uaddr, sizeof(u32))) - return -EFAULT; - - switch (op) { - case FUTEX_OP_SET: - unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); - break; - case FUTEX_OP_ADD: - unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, - uaddr, oparg, Efault); - break; - case FUTEX_OP_OR: - unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); - break; - case FUTEX_OP_ANDN: - unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); - break; - case FUTEX_OP_XOR: - unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); - break; - default: - user_access_end(); - return -ENOSYS; + scoped_user_rw_access(uaddr, Efault) { + switch (op) { + case FUTEX_OP_SET: + unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ADD: + unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_OR: + unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ANDN: + unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); + break; + case FUTEX_OP_XOR: + unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); + break; + default: + return -ENOSYS; + } } - user_access_end(); return 0; Efault: - user_access_end(); return -EFAULT; } @@ -86,21 +79,19 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, { int ret = 0; - if (can_do_masked_user_access()) - uaddr = masked_user_access_begin(uaddr); - else if (!user_access_begin(uaddr, sizeof(u32))) - return -EFAULT; - asm volatile("\n" - "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" - "2:\n" - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \ - : "+r" (ret), "=a" (oldval), "+m" (*uaddr) - : "r" (newval), "1" (oldval) - : "memory" - ); - user_access_end(); - *uval = oldval; + scoped_user_rw_access(uaddr, Efault) { + asm_inline volatile("\n" + "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" + "2:\n" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "r" (newval), "1" (oldval) + : "memory"); + *uval = oldval; + } return ret; +Efault: + return -EFAULT; } #endif diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index abd637e54e94..3218770670d3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -393,7 +393,7 @@ static __always_inline void __##func(struct pt_regs *regs) /** * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler - when raised from kernel mode + * when raised from kernel mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE @@ -403,7 +403,7 @@ static __always_inline void __##func(struct pt_regs *regs) /** * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler - when raised from user mode + * when raised from user mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 54368a43abf6..4733e9064ee5 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -44,4 +44,6 @@ enum insn_mmio_type { enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes); +bool insn_is_nop(struct insn *insn); + #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 091f88c8254d..846d21c1a7f8 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -312,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn) /** * for_each_insn_prefix() -- Iterate prefixes in the instruction * @insn: Pointer to struct insn. - * @idx: Index storage. * @prefix: Prefix byte. * * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix @@ -321,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn) * Since prefixes.nbytes can be bigger than 4 if some prefixes * are repeated, it cannot be used for looping over the prefixes. */ -#define for_each_insn_prefix(insn, idx, prefix) \ - for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) +#define for_each_insn_prefix(insn, prefix) \ + for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index 5dbeac48a5b9..695f87efbeb8 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -4,7 +4,15 @@ #include <linux/percpu-defs.h> #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SHIFT 4 +#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT) + +/* + * The largest PEBS record could consume a page, ensure + * a record at least can be written after triggering PMI. + */ +#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT) +#define ARCH_PEBS_THRESH_SINGLE 1 /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS_FMT4 8 diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h index 23268a188e70..d7c704ed1be9 100644 --- a/arch/x86/include/asm/kvm_types.h +++ b/arch/x86/include/asm/kvm_types.h @@ -10,6 +10,11 @@ #define KVM_SUB_MODULES kvm-intel #else #undef KVM_SUB_MODULES +/* + * Don't export symbols for KVM without vendor modules, as kvm.ko is built iff + * at least one vendor module is enabled. + */ +#define EXPORT_SYMBOL_FOR_KVM(symbol) #endif #define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 31e3cb550fb3..2d98886de09a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -48,6 +48,7 @@ /* AMD-specific bits */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ +#define MCI_STATUS_PADDRV BIT_ULL(54) /* Valid System Physical Address */ #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ @@ -62,6 +63,7 @@ */ #define MCI_CONFIG_MCAX 0x1 #define MCI_CONFIG_FRUTEXT BIT_ULL(9) +#define MCI_CONFIG_PADDRV BIT_ULL(11) #define MCI_IPID_MCATYPE 0xFFFF0000 #define MCI_IPID_HWID 0xFFF @@ -166,6 +168,12 @@ #define MCE_IN_KERNEL_COPYIN BIT_ULL(7) /* + * Indicates that handler should check and clear Deferred error registers + * rather than common ones. + */ +#define MCE_CHECK_DFR_REGS BIT_ULL(8) + +/* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external * debugging tools. Each entry is only valid when its finished flag @@ -302,6 +310,12 @@ DECLARE_PER_CPU(struct mce, injectm); /* Disable CMCI/polling for MCA bank claimed by firmware */ extern void mce_disable_bank(int bank); +#ifdef CONFIG_X86_MCE_THRESHOLD +void mce_save_apei_thr_limit(u32 thr_limit); +#else +static inline void mce_save_apei_thr_limit(u32 thr_limit) { } +#endif /* CONFIG_X86_MCE_THRESHOLD */ + /* * Exception handler */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9e1720d73244..3d0a0950d20a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -166,6 +166,10 @@ * Processor MMIO stale data * vulnerabilities. */ +#define ARCH_CAP_MCU_ENUM BIT(16) /* + * Indicates the presence of microcode update + * feature enumeration and status information. + */ #define ARCH_CAP_FB_CLEAR BIT(17) /* * VERW clears CPU fill buffer * even on MDS_NO CPUs. @@ -327,6 +331,26 @@ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \ PERF_CAP_PEBS_TIMING_INFO) +/* Arch PEBS */ +#define MSR_IA32_PEBS_BASE 0x000003f4 +#define MSR_IA32_PEBS_INDEX 0x000003f5 +#define ARCH_PEBS_OFFSET_MASK 0x7fffff +#define ARCH_PEBS_INDEX_WR_SHIFT 4 + +#define ARCH_PEBS_RELOAD 0xffffffff +#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35) +#define ARCH_PEBS_CNTR_GP BIT_ULL(36) +#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37) +#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38) +#define ARCH_PEBS_LBR_SHIFT 40 +#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT) +#define ARCH_PEBS_VECR_XMM BIT_ULL(49) +#define ARCH_PEBS_GPR BIT_ULL(61) +#define ARCH_PEBS_AUX BIT_ULL(62) +#define ARCH_PEBS_EN BIT_ULL(63) +#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \ + ARCH_PEBS_CNTR_METRICS) + #define MSR_IA32_RTIT_CTL 0x00000570 #define RTIT_CTL_TRACEEN BIT(0) #define RTIT_CTL_CYCLEACC BIT(1) @@ -929,6 +953,10 @@ #define MSR_IA32_APICBASE_BASE (0xfffff<<12) #define MSR_IA32_UCODE_WRITE 0x00000079 + +#define MSR_IA32_MCU_ENUMERATION 0x0000007b +#define MCU_STAGING BIT(4) + #define MSR_IA32_UCODE_REV 0x0000008b /* Intel SGX Launch Enclave Public Key Hash MSRs */ @@ -1226,6 +1254,8 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 +#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5 + /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 53f4089333f2..2f0e47be79a4 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -9,6 +9,7 @@ #include <asm/alternative.h> #include <linux/kmsan-checks.h> +#include <linux/mmdebug.h> /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; @@ -31,13 +32,20 @@ static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); -extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) -#define __phys_addr_symbol(x) \ - ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif +static inline unsigned long __phys_addr_symbol(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; + + /* only check upper bounds since lower bounds will trigger carry */ + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); + + return y + phys_base; +} + #define __phys_reloc_hide(x) (x) void clear_page_orig(void *page); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 332428caaed2..725d0eff7acd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -23,6 +23,7 @@ #else /* !__ASSEMBLY__: */ #include <linux/args.h> +#include <linux/bits.h> #include <linux/build_bug.h> #include <linux/stringify.h> #include <asm/asm.h> @@ -572,9 +573,9 @@ do { \ #define x86_this_cpu_constant_test_bit(_nr, _var) \ ({ \ unsigned long __percpu *addr__ = \ - (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \ + (unsigned long __percpu *)&(_var) + BIT_WORD(_nr); \ \ - !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \ + !!(BIT_MASK(_nr) & raw_cpu_read(*addr__)); \ }) #define x86_this_cpu_variable_test_bit(_nr, _var) \ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 49a4d442f3fc..7276ba70c88a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -141,16 +141,16 @@ #define ARCH_PERFMON_EVENTS_COUNT 7 #define PEBS_DATACFG_MEMINFO BIT_ULL(0) -#define PEBS_DATACFG_GP BIT_ULL(1) +#define PEBS_DATACFG_GP BIT_ULL(1) #define PEBS_DATACFG_XMMS BIT_ULL(2) #define PEBS_DATACFG_LBRS BIT_ULL(3) -#define PEBS_DATACFG_LBR_SHIFT 24 #define PEBS_DATACFG_CNTR BIT_ULL(4) +#define PEBS_DATACFG_METRICS BIT_ULL(5) +#define PEBS_DATACFG_LBR_SHIFT 24 #define PEBS_DATACFG_CNTR_SHIFT 32 #define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0) #define PEBS_DATACFG_FIX_SHIFT 48 #define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0) -#define PEBS_DATACFG_METRICS BIT_ULL(5) /* Steal the highest bit of pebs_data_cfg for SW usage */ #define PEBS_UPDATE_DS_SW BIT_ULL(63) @@ -200,6 +200,8 @@ union cpuid10_edx { #define ARCH_PERFMON_EXT_LEAF 0x00000023 #define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 #define ARCH_PERFMON_ACR_LEAF 0x2 +#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4 +#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5 union cpuid35_eax { struct { @@ -210,7 +212,10 @@ union cpuid35_eax { unsigned int acr_subleaf:1; /* Events Sub-Leaf */ unsigned int events_subleaf:1; - unsigned int reserved:28; + /* arch-PEBS Sub-Leaves */ + unsigned int pebs_caps_subleaf:1; + unsigned int pebs_cnts_subleaf:1; + unsigned int reserved:26; } split; unsigned int full; }; @@ -432,6 +437,8 @@ static inline bool is_topdown_idx(int idx) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) #define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55 #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT) +#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54 +#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT) #define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48 #define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48) @@ -503,6 +510,107 @@ struct pebs_cntr_header { #define INTEL_CNTR_METRICS 0x3 /* + * Arch PEBS + */ +union arch_pebs_index { + struct { + u64 rsvd:4, + wr:23, + rsvd2:4, + full:1, + en:1, + rsvd3:3, + thresh:23, + rsvd4:5; + }; + u64 whole; +}; + +struct arch_pebs_header { + union { + u64 format; + struct { + u64 size:16, /* Record size */ + rsvd:14, + mode:1, /* 64BIT_MODE */ + cont:1, + rsvd2:3, + cntr:5, + lbr:2, + rsvd3:7, + xmm:1, + ymmh:1, + rsvd4:2, + opmask:1, + zmmh:1, + h16zmm:1, + rsvd5:5, + gpr:1, + aux:1, + basic:1; + }; + }; + u64 rsvd6; +}; + +struct arch_pebs_basic { + u64 ip; + u64 applicable_counters; + u64 tsc; + u64 retire :16, /* Retire Latency */ + valid :1, + rsvd :47; + u64 rsvd2; + u64 rsvd3; +}; + +struct arch_pebs_aux { + u64 address; + u64 rsvd; + u64 rsvd2; + u64 rsvd3; + u64 rsvd4; + u64 aux; + u64 instr_latency :16, + pad2 :16, + cache_latency :16, + pad3 :16; + u64 tsx_tuning; +}; + +struct arch_pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp; + u64 rsvd; +}; + +struct arch_pebs_xer_header { + u64 xstate; + u64 rsvd; +}; + +#define ARCH_PEBS_LBR_NAN 0x0 +#define ARCH_PEBS_LBR_NUM_8 0x1 +#define ARCH_PEBS_LBR_NUM_16 0x2 +#define ARCH_PEBS_LBR_NUM_VAR 0x3 +#define ARCH_PEBS_BASE_LBR_ENTRIES 8 +struct arch_pebs_lbr_header { + u64 rsvd; + u64 ctl; + u64 depth; + u64 ler_from; + u64 ler_to; + u64 ler_info; +}; + +struct arch_pebs_cntr_header { + u32 cntr; + u32 fixed; + u32 metrics; + u32 reserved; +}; + +/* * AMD Extended Performance Monitoring and Debug cpuid feature detection */ #define EXT_PERFMON_DEBUG_FEATURES 0x80000022 diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 50f75467f73d..35d062a2e304 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -84,8 +84,8 @@ struct fred_ss { : 4, /* Event was incident to enclave execution */ enclave : 1, - /* CPU was in long mode */ - lm : 1, + /* CPU was in 64-bit mode */ + l : 1, /* * Nested exception during FRED delivery, not set * for #DF. @@ -187,12 +187,12 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); -static inline unsigned long regs_return_value(struct pt_regs *regs) +static __always_inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; } -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->ax = rc; } @@ -277,34 +277,34 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) } #endif -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } -static inline unsigned long instruction_pointer(struct pt_regs *regs) +static __always_inline unsigned long instruction_pointer(struct pt_regs *regs) { return regs->ip; } -static inline void instruction_pointer_set(struct pt_regs *regs, - unsigned long val) +static __always_inline +void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->ip = val; } -static inline unsigned long frame_pointer(struct pt_regs *regs) +static __always_inline unsigned long frame_pointer(struct pt_regs *regs) { return regs->bp; } -static inline unsigned long user_stack_pointer(struct pt_regs *regs) +static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs) { return regs->sp; } -static inline void user_stack_pointer_set(struct pt_regs *regs, - unsigned long val) +static __always_inline +void user_stack_pointer_set(struct pt_regs *regs, unsigned long val) { regs->sp = val; } diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 6a0069761508..04958459a7ca 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** +/* * Copyright(c) 2016-20 Intel Corporation. * * Intel Software Guard Extensions (SGX) support. @@ -28,21 +28,22 @@ #define SGX_CPUID_EPC_MASK GENMASK(3, 0) enum sgx_encls_function { - ECREATE = 0x00, - EADD = 0x01, - EINIT = 0x02, - EREMOVE = 0x03, - EDGBRD = 0x04, - EDGBWR = 0x05, - EEXTEND = 0x06, - ELDU = 0x08, - EBLOCK = 0x09, - EPA = 0x0A, - EWB = 0x0B, - ETRACK = 0x0C, - EAUG = 0x0D, - EMODPR = 0x0E, - EMODT = 0x0F, + ECREATE = 0x00, + EADD = 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU = 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, + EAUG = 0x0D, + EMODPR = 0x0E, + EMODT = 0x0F, + EUPDATESVN = 0x18, }; /** @@ -65,15 +66,19 @@ enum sgx_encls_function { /** * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV - * %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function. - * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not + * @SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function. + * @SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not * been completed yet. - * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. - * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's + * @SGX_CHILD_PRESENT: SECS has child pages present in the EPC. + * @SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's * public key does not match IA32_SGXLEPUBKEYHASH. - * %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it + * @SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it * is in the PENDING or MODIFIED state. - * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received + * @SGX_INSUFFICIENT_ENTROPY: Insufficient entropy in RNG. + * @SGX_NO_UPDATE: EUPDATESVN could not update the CPUSVN because the + * current SVN was not newer than CPUSVN. This is the most + * common error code returned by EUPDATESVN. + * @SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received */ enum sgx_return_code { SGX_EPC_PAGE_CONFLICT = 7, @@ -81,6 +86,8 @@ enum sgx_return_code { SGX_CHILD_PRESENT = 13, SGX_INVALID_EINITTOKEN = 16, SGX_PAGE_NOT_MODIFIABLE = 20, + SGX_INSUFFICIENT_ENTROPY = 29, + SGX_NO_UPDATE = 31, SGX_UNMASKED_EVENT = 128, }; @@ -89,7 +96,7 @@ enum sgx_return_code { /** * enum sgx_miscselect - additional information to an SSA frame - * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. + * @SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. * * Save State Area (SSA) is a stack inside the enclave used to store processor * state when an exception or interrupt occurs. This enum defines additional @@ -105,17 +112,17 @@ enum sgx_miscselect { #define SGX_SSA_MISC_EXINFO_SIZE 16 /** - * enum sgx_attributes - the attributes field in &struct sgx_secs - * %SGX_ATTR_INIT: Enclave can be entered (is initialized). - * %SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). - * %SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. - * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote + * enum sgx_attribute - the attributes field in &struct sgx_secs + * @SGX_ATTR_INIT: Enclave can be entered (is initialized). + * @SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). + * @SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. + * @SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote * attestation. - * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). - * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to + * @SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). + * @SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to * sign cryptographic tokens that can be passed to * EINIT as an authorization to run an enclave. - * %SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an + * @SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an * asynchronous exit has occurred. */ enum sgx_attribute { @@ -188,7 +195,7 @@ struct sgx_secs { /** * enum sgx_tcs_flags - execution flags for TCS - * %SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints + * @SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints * inside an enclave. It is cleared by EADD but can * be set later with EDBGWR. */ @@ -253,11 +260,11 @@ struct sgx_pageinfo { /** * enum sgx_page_type - bits in the SECINFO flags defining the page type - * %SGX_PAGE_TYPE_SECS: a SECS page - * %SGX_PAGE_TYPE_TCS: a TCS page - * %SGX_PAGE_TYPE_REG: a regular page - * %SGX_PAGE_TYPE_VA: a VA page - * %SGX_PAGE_TYPE_TRIM: a page in trimmed state + * @SGX_PAGE_TYPE_SECS: a SECS page + * @SGX_PAGE_TYPE_TCS: a TCS page + * @SGX_PAGE_TYPE_REG: a regular page + * @SGX_PAGE_TYPE_VA: a VA page + * @SGX_PAGE_TYPE_TRIM: a page in trimmed state * * Make sure when making changes to this enum that its values can still fit * in the bitfield within &struct sgx_encl_page @@ -275,14 +282,14 @@ enum sgx_page_type { /** * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo - * %SGX_SECINFO_R: allow read - * %SGX_SECINFO_W: allow write - * %SGX_SECINFO_X: allow execution - * %SGX_SECINFO_SECS: a SECS page - * %SGX_SECINFO_TCS: a TCS page - * %SGX_SECINFO_REG: a regular page - * %SGX_SECINFO_VA: a VA page - * %SGX_SECINFO_TRIM: a page in trimmed state + * @SGX_SECINFO_R: allow read + * @SGX_SECINFO_W: allow write + * @SGX_SECINFO_X: allow execution + * @SGX_SECINFO_SECS: a SECS page + * @SGX_SECINFO_TCS: a TCS page + * @SGX_SECINFO_REG: a regular page + * @SGX_SECINFO_VA: a VA page + * @SGX_SECINFO_TRIM: a page in trimmed state */ enum sgx_secinfo_flags { SGX_SECINFO_R = BIT(0), diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h index 1e6ec10b3a15..a20b1c08c99f 100644 --- a/arch/x86/include/asm/shared/msr.h +++ b/arch/x86/include/asm/shared/msr.h @@ -12,4 +12,19 @@ struct msr { }; }; +/* + * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the + * boot kernel since they rely on tracepoint/exception handling infrastructure + * that's not available here. + */ +static inline void raw_rdmsr(unsigned int reg, struct msr *m) +{ + asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); +} + +static inline void raw_wrmsr(unsigned int reg, const struct msr *m) +{ + asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); +} + #endif /* _ASM_X86_SHARED_MSR_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 4f84d421d1cf..20a3baae9568 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -23,18 +23,55 @@ #else /* __ASSEMBLER__ */ +/* + * The CLAC/STAC instructions toggle the enforcement of + * X86_FEATURE_SMAP along with X86_FEATURE_LASS. + * + * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page + * tables. The kernel is not allowed to touch pages with that bit set + * unless the AC bit is set. + * + * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings, + * regardless of location. + * + * Note: a barrier is implicit in alternative(). + */ + static __always_inline void clac(void) { - /* Note: a barrier is implicit in alternative() */ alternative("", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { - /* Note: a barrier is implicit in alternative() */ alternative("", "stac", X86_FEATURE_SMAP); } +/* + * LASS enforcement is based on bit 63 of the virtual address. The + * kernel is not allowed to touch memory in the lower half of the + * virtual address space. + * + * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data + * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP. + * + * Even with the AC bit set, LASS will continue to block instruction + * fetches from the user half of the address space. To allow those, + * clear CR4.LASS to disable the LASS mechanism entirely. + * + * Note: a barrier is implicit in alternative(). + */ + +static __always_inline void lass_clac(void) +{ + alternative("", "clac", X86_FEATURE_LASS); +} + +static __always_inline void lass_stac(void) +{ + alternative("", "stac", X86_FEATURE_LASS); +} + static __always_inline unsigned long smap_save(void) { unsigned long flags; diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index c3c2c1914d65..9cb5aae7fba9 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -1,6 +1,32 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_STRING_H +#define _ASM_X86_STRING_H + #ifdef CONFIG_X86_32 # include <asm/string_32.h> #else # include <asm/string_64.h> #endif + +static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len) +{ + void *ret = to; + + asm volatile("rep movsb" + : "+D" (to), "+S" (from), "+c" (len) + : : "memory"); + return ret; +} + +static __always_inline void *__inline_memset(void *s, int v, size_t n) +{ + void *ret = s; + + asm volatile("rep stosb" + : "+D" (s), "+c" (n) + : "a" ((uint8_t)v) + : "memory"); + return ret; +} + +#endif /* _ASM_X86_STRING_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 17f6c3fedeee..0581c477d466 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -701,5 +701,6 @@ DEFINE_GHCB_ACCESSORS(sw_exit_info_1) DEFINE_GHCB_ACCESSORS(sw_exit_info_2) DEFINE_GHCB_ACCESSORS(sw_scratch) DEFINE_GHCB_ACCESSORS(xcr0) +DEFINE_GHCB_ACCESSORS(xss) #endif diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 21041898157a..1fadf0cf520c 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void) return __amd_nodes_per_pkg; } +#else /* CONFIG_SMP */ +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } +#endif /* !CONFIG_SMP */ + extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -241,12 +247,6 @@ static inline bool topology_is_core_online(unsigned int cpu) } #define topology_is_core_online topology_is_core_online -#else /* CONFIG_SMP */ -static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_max_smt_threads(void) { return 1; } -static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } -#endif /* !CONFIG_SMP */ - static inline void arch_fix_phys_package_id(int num, u32 slot) { } @@ -325,4 +325,6 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick +extern int arch_sched_node_distance(int from, int to); + #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 91a3fb8ae7ff..367297b188c3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -528,18 +528,18 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt #define user_access_save() smap_save() #define user_access_restore(x) smap_restore(x) -#define unsafe_put_user(x, ptr, label) \ +#define arch_unsafe_put_user(x, ptr, label) \ __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define unsafe_get_user(x, ptr, err_label) \ +#define arch_unsafe_get_user(x, ptr, err_label) \ do { \ __inttype(*(ptr)) __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) #else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define unsafe_get_user(x, ptr, err_label) \ +#define arch_unsafe_get_user(x, ptr, err_label) \ do { \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ @@ -618,11 +618,11 @@ do { \ } while (0) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ sizeof(type), err_label) #else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ do { \ int __kr_err; \ \ @@ -633,7 +633,7 @@ do { \ } while (0) #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define arch_put_kernel_nofault(dst, src, type, err_label) \ __put_user_size(*((type *)(src)), (__force type __user *)(dst), \ sizeof(type), err_label) diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h new file mode 100644 index 000000000000..12064284bc4e --- /dev/null +++ b/arch/x86/include/asm/unwind_user.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UNWIND_USER_H +#define _ASM_X86_UNWIND_USER_H + +#ifdef CONFIG_HAVE_UNWIND_USER_FP + +#include <asm/ptrace.h> +#include <asm/uprobes.h> + +#define ARCH_INIT_USER_FP_FRAME(ws) \ + .cfa_off = 2*(ws), \ + .ra_off = -1*(ws), \ + .fp_off = -2*(ws), \ + .use_fp = true, + +#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \ + .cfa_off = 1*(ws), \ + .ra_off = -1*(ws), \ + .fp_off = 0, \ + .use_fp = false, + +static inline int unwind_user_word_size(struct pt_regs *regs) +{ + /* We can't unwind VM86 stacks */ + if (regs->flags & X86_VM_MASK) + return 0; +#ifdef CONFIG_X86_64 + if (!user_64bit_mode(regs)) + return sizeof(int); +#endif + return sizeof(long); +} + +static inline bool unwind_user_at_function_start(struct pt_regs *regs) +{ + return is_uprobe_at_func_entry(regs); +} + +#endif /* CONFIG_HAVE_UNWIND_USER_FP */ + +#endif /* _ASM_X86_UNWIND_USER_H */ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 1ee2e5115955..362210c79998 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -62,4 +62,13 @@ struct arch_uprobe_task { unsigned int saved_tf; }; +#ifdef CONFIG_UPROBES +extern bool is_uprobe_at_func_entry(struct pt_regs *regs); +#else +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ + #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index f1a4adc78272..81d0c8bf1137 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -136,6 +136,8 @@ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) #define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ #define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) +#define X86_CR4_LASS_BIT 27 /* enable Linear Address Space Separation support */ +#define X86_CR4_LASS _BITUL(X86_CR4_LASS_BIT) #define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */ #define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT) diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index 2dd35bbdc822..3c4d52072189 100644 --- a/arch/x86/include/uapi/asm/sgx.h +++ b/arch/x86/include/uapi/asm/sgx.h @@ -10,7 +10,7 @@ /** * enum sgx_page_flags - page control flags - * %SGX_PAGE_MEASURE: Measure the page contents with a sequence of + * @SGX_PAGE_MEASURE: Measure the page contents with a sequence of * ENCLS[EEXTEND] operations. */ enum sgx_page_flags { @@ -143,6 +143,12 @@ struct sgx_enclave_run; /** * typedef sgx_enclave_user_handler_t - Exit handler function accepted by * __vdso_sgx_enter_enclave() + * @rdi: RDI at the time of EEXIT, undefined on AEX + * @rsi: RSI at the time of EEXIT, undefined on AEX + * @rdx: RDX at the time of EEXIT, undefined on AEX + * @rsp: RSP (untrusted) at the time of EEXIT or AEX + * @r8: R8 at the time of EEXIT, undefined on AEX + * @r9: R9 at the time of EEXIT, undefined on AEX * @run: The run instance given by the caller * * The register parameters contain the snapshot of their values at enclave @@ -166,7 +172,7 @@ typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx, * @exception_addr: The address that triggered the exception * @user_handler: User provided callback run on exception * @user_data: Data passed to the user handler - * @reserved Reserved for future extensions + * @reserved: Reserved for future extensions * * If @user_handler is provided, the handler will be invoked on all return paths * of the normal flow. The user handler may transfer control, e.g. via a diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 9792e329343e..1baa86dfe029 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -93,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 #define EXIT_REASON_TDCALL 77 #define EXIT_REASON_MSR_READ_IMM 84 #define EXIT_REASON_MSR_WRITE_IMM 85 diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index 0916f00a992e..e21419e686eb 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -19,6 +19,8 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data) if (!cmc->enabled) return 0; + mce_save_apei_thr_limit(cmc->notify.error_threshold_value); + /* * We expect HEST to provide a list of MC banks that report errors * in firmware first mode. Otherwise, return non-zero value to diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 7047124490f6..d7c8ef1e354d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -196,7 +196,7 @@ int amd_detect_prefcore(bool *detected) break; } - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { u32 tmp; int ret; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d19a3fd7cf04..74f4c659f9c9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -9,6 +9,7 @@ #include <asm/text-patching.h> #include <asm/insn.h> +#include <asm/insn-eval.h> #include <asm/ibt.h> #include <asm/set_memory.h> #include <asm/nmi.h> @@ -346,25 +347,6 @@ static void add_nop(u8 *buf, unsigned int len) } /* - * Matches NOP and NOPL, not any of the other possible NOPs. - */ -static bool insn_is_nop(struct insn *insn) -{ - /* Anything NOP, but no REP NOP */ - if (insn->opcode.bytes[0] == 0x90 && - (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3)) - return true; - - /* NOPL */ - if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F) - return true; - - /* TODO: more nops */ - - return false; -} - -/* * Find the offset of the first non-NOP instruction starting at @offset * but no further than @len. */ @@ -559,7 +541,7 @@ EXPORT_SYMBOL(BUG_func); * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)". */ -static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) +static unsigned int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) { void *target, *bug = &BUG_func; s32 disp; @@ -643,7 +625,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * order. */ for (a = start; a < end; a++) { - int insn_buff_sz = 0; + unsigned int insn_buff_sz = 0; /* * In case of nested ALTERNATIVE()s the outer alternative might @@ -683,11 +665,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; - if (a->flags & ALT_FLAG_DIRECT_CALL) { + if (a->flags & ALT_FLAG_DIRECT_CALL) insn_buff_sz = alt_replace_call(instr, insn_buff, a); - if (insn_buff_sz < 0) - continue; - } for (; insn_buff_sz < a->instrlen; insn_buff_sz++) insn_buff[insn_buff_sz] = 0x90; @@ -2474,16 +2453,30 @@ void __init_or_module text_poke_early(void *addr, const void *opcode, __ro_after_init struct mm_struct *text_poke_mm; __ro_after_init unsigned long text_poke_mm_addr; +/* + * Text poking creates and uses a mapping in the lower half of the + * address space. Relax LASS enforcement when accessing the poking + * address. + * + * objtool enforces a strict policy of "no function calls within AC=1 + * regions". Adhere to the policy by using inline versions of + * memcpy()/memset() that will never result in a function call. + */ + static void text_poke_memcpy(void *dst, const void *src, size_t len) { - memcpy(dst, src, len); + lass_stac(); + __inline_memcpy(dst, src, len); + lass_clac(); } static void text_poke_memset(void *dst, const void *src, size_t len) { int c = *(const int *)src; - memset(dst, c, len); + lass_stac(); + __inline_memset(dst, c, len); + lass_clac(); } typedef void text_poke_f(void *dst, const void *src, size_t len); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 680d305589a3..9c29e12b84e5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -36,6 +36,7 @@ #include <linux/dmi.h> #include <linux/smp.h> #include <linux/mm.h> +#include <linux/kvm_types.h> #include <xen/xen.h> @@ -173,6 +174,7 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; +/* Measured in ticks per HZ. */ unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); @@ -792,6 +794,7 @@ static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); u64 tsc_perj = 0, tsc_start = 0; + long delta_tsc_khz, bus_khz; unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; @@ -894,14 +897,15 @@ static int __init calibrate_APIC_clock(void) apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { - apic_pr_verbose("..... CPU clock speed is %ld.%04ld MHz.\n", - (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + delta_tsc_khz = (deltatsc * HZ) / (1000 * LAPIC_CAL_LOOPS); + + apic_pr_verbose("..... CPU clock speed is %ld.%03ld MHz.\n", + delta_tsc_khz / 1000, delta_tsc_khz % 1000); } - apic_pr_verbose("..... host bus clock speed is %u.%04u MHz.\n", - lapic_timer_period / (1000000 / HZ), - lapic_timer_period % (1000000 / HZ)); + bus_khz = (long)lapic_timer_period * HZ / 1000; + apic_pr_verbose("..... host bus clock speed is %ld.%03ld MHz.\n", + bus_khz / 1000, bus_khz % 1000); /* * Do a sanity check on the APIC calibration result @@ -2316,7 +2320,7 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) dest |= msg->arch_addr_hi.destid_8_31 << 8; return dest; } -EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); +EXPORT_SYMBOL_FOR_KVM(x86_msi_msg_get_destid); static void __init apic_bsp_up_setup(void) { diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 9ef3be866832..2ed3b5c88c7f 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: GPL-2.0 */ #include <linux/irq.h> +#include <linux/kvm_types.h> #include <asm/apic.h> #include "local.h" @@ -25,7 +26,7 @@ u32 default_cpu_present_to_apicid(int mps_cpu) else return BAD_APICID; } -EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); +EXPORT_SYMBOL_FOR_KVM(default_cpu_present_to_apicid); /* * Set up the logical destination ID when the APIC operates in logical diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5ba2feb2c04c..1e0442e867b1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2864,7 +2864,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, ioapic = mp_irqdomain_ioapic_idx(domain); pin = info->ioapic.pin; - if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) + if (irq_resolve_mapping(domain, (irq_hw_number_t)pin)) return -EEXIST; data = kzalloc(sizeof(*data), GFP_KERNEL); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2ba9f2d42d8c..bc94ff1e250a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -3,7 +3,7 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/mm.h> - +#include <linux/kvm_types.h> #include <linux/io.h> #include <linux/sched.h> #include <linux/sched/clock.h> @@ -1037,7 +1037,14 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) static const struct x86_cpu_id zen5_rdseed_microcode[] = { ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121), ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + ZEN_MODEL_STEP_UCODE(0x1a, 0x24, 0x0, 0x0b204037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x0, 0x0b404035), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x1, 0x0b404108), + ZEN_MODEL_STEP_UCODE(0x1a, 0x60, 0x0, 0x0b600037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x68, 0x0, 0x0b608038), + ZEN_MODEL_STEP_UCODE(0x1a, 0x70, 0x0, 0x0b700037), {}, }; @@ -1311,7 +1318,7 @@ unsigned long amd_get_dr_addr_mask(unsigned int dr) return per_cpu(amd_dr_addr_mask[dr], smp_processor_id()); } -EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask); +EXPORT_SYMBOL_FOR_KVM(amd_get_dr_addr_mask); static void zenbleed_check_cpu(void *unused) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d7fa03bf51b4..d8660770dc6a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include <linux/sched/smt.h> #include <linux/pgtable.h> #include <linux/bpf.h> +#include <linux/kvm_types.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -53,56 +54,8 @@ * mitigation option. */ -static void __init spectre_v1_select_mitigation(void); -static void __init spectre_v1_apply_mitigation(void); -static void __init spectre_v2_select_mitigation(void); -static void __init spectre_v2_update_mitigation(void); -static void __init spectre_v2_apply_mitigation(void); -static void __init retbleed_select_mitigation(void); -static void __init retbleed_update_mitigation(void); -static void __init retbleed_apply_mitigation(void); -static void __init spectre_v2_user_select_mitigation(void); -static void __init spectre_v2_user_update_mitigation(void); -static void __init spectre_v2_user_apply_mitigation(void); -static void __init ssb_select_mitigation(void); -static void __init ssb_apply_mitigation(void); -static void __init l1tf_select_mitigation(void); -static void __init l1tf_apply_mitigation(void); -static void __init mds_select_mitigation(void); -static void __init mds_update_mitigation(void); -static void __init mds_apply_mitigation(void); -static void __init taa_select_mitigation(void); -static void __init taa_update_mitigation(void); -static void __init taa_apply_mitigation(void); -static void __init mmio_select_mitigation(void); -static void __init mmio_update_mitigation(void); -static void __init mmio_apply_mitigation(void); -static void __init rfds_select_mitigation(void); -static void __init rfds_update_mitigation(void); -static void __init rfds_apply_mitigation(void); -static void __init srbds_select_mitigation(void); -static void __init srbds_apply_mitigation(void); -static void __init l1d_flush_select_mitigation(void); -static void __init srso_select_mitigation(void); -static void __init srso_update_mitigation(void); -static void __init srso_apply_mitigation(void); -static void __init gds_select_mitigation(void); -static void __init gds_apply_mitigation(void); -static void __init bhi_select_mitigation(void); -static void __init bhi_update_mitigation(void); -static void __init bhi_apply_mitigation(void); -static void __init its_select_mitigation(void); -static void __init its_update_mitigation(void); -static void __init its_apply_mitigation(void); -static void __init tsa_select_mitigation(void); -static void __init tsa_apply_mitigation(void); -static void __init vmscape_select_mitigation(void); -static void __init vmscape_update_mitigation(void); -static void __init vmscape_apply_mitigation(void); - /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; -EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); /* The current value of the SPEC_CTRL MSR with task-specific bits set */ DEFINE_PER_CPU(u64, x86_spec_ctrl_current); @@ -179,7 +132,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control IBPB on vCPU load */ DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); +EXPORT_SYMBOL_FOR_KVM(switch_vcpu_ibpb); /* Control CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); @@ -198,7 +151,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); * mitigation is required. */ DEFINE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -EXPORT_SYMBOL_GPL(cpu_buf_vm_clear); +EXPORT_SYMBOL_FOR_KVM(cpu_buf_vm_clear); #undef pr_fmt #define pr_fmt(fmt) "mitigations: " fmt @@ -233,99 +186,6 @@ static void __init cpu_print_attack_vectors(void) } } -void __init cpu_select_mitigations(void) -{ - /* - * Read the SPEC_CTRL MSR to account for reserved bits which may - * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD - * init code as it is not enumerated and depends on the family. - */ - if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) { - rdmsrq(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - - /* - * Previously running kernel (kexec), may have some controls - * turned ON. Clear them and let the mitigations setup below - * rediscover them based on configuration. - */ - x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; - } - - x86_arch_cap_msr = x86_read_arch_cap_msr(); - - cpu_print_attack_vectors(); - - /* Select the proper CPU mitigations before patching alternatives: */ - spectre_v1_select_mitigation(); - spectre_v2_select_mitigation(); - retbleed_select_mitigation(); - spectre_v2_user_select_mitigation(); - ssb_select_mitigation(); - l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); - mmio_select_mitigation(); - rfds_select_mitigation(); - srbds_select_mitigation(); - l1d_flush_select_mitigation(); - srso_select_mitigation(); - gds_select_mitigation(); - its_select_mitigation(); - bhi_select_mitigation(); - tsa_select_mitigation(); - vmscape_select_mitigation(); - - /* - * After mitigations are selected, some may need to update their - * choices. - */ - spectre_v2_update_mitigation(); - /* - * retbleed_update_mitigation() relies on the state set by - * spectre_v2_update_mitigation(); specifically it wants to know about - * spectre_v2=ibrs. - */ - retbleed_update_mitigation(); - /* - * its_update_mitigation() depends on spectre_v2_update_mitigation() - * and retbleed_update_mitigation(). - */ - its_update_mitigation(); - - /* - * spectre_v2_user_update_mitigation() depends on - * retbleed_update_mitigation(), specifically the STIBP - * selection is forced for UNRET or IBPB. - */ - spectre_v2_user_update_mitigation(); - mds_update_mitigation(); - taa_update_mitigation(); - mmio_update_mitigation(); - rfds_update_mitigation(); - bhi_update_mitigation(); - /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ - srso_update_mitigation(); - vmscape_update_mitigation(); - - spectre_v1_apply_mitigation(); - spectre_v2_apply_mitigation(); - retbleed_apply_mitigation(); - spectre_v2_user_apply_mitigation(); - ssb_apply_mitigation(); - l1tf_apply_mitigation(); - mds_apply_mitigation(); - taa_apply_mitigation(); - mmio_apply_mitigation(); - rfds_apply_mitigation(); - srbds_apply_mitigation(); - srso_apply_mitigation(); - gds_apply_mitigation(); - its_apply_mitigation(); - bhi_apply_mitigation(); - tsa_apply_mitigation(); - vmscape_apply_mitigation(); -} - /* * NOTE: This function is *only* called for SVM, since Intel uses * MSR_IA32_SPEC_CTRL for SSBD. @@ -366,7 +226,7 @@ x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest) speculation_ctrl_update(tif); } } -EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); +EXPORT_SYMBOL_FOR_KVM(x86_virt_spec_ctrl); static void x86_amd_ssb_disable(void) { @@ -1032,7 +892,7 @@ bool gds_ucode_mitigated(void) return (gds_mitigation == GDS_MITIGATION_FULL || gds_mitigation == GDS_MITIGATION_FULL_LOCKED); } -EXPORT_SYMBOL_GPL(gds_ucode_mitigated); +EXPORT_SYMBOL_FOR_KVM(gds_ucode_mitigated); void update_gds_msr(void) { @@ -2859,7 +2719,7 @@ void x86_spec_ctrl_setup_ap(void) } bool itlb_multihit_kvm_mitigation; -EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); +EXPORT_SYMBOL_FOR_KVM(itlb_multihit_kvm_mitigation); #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt @@ -2867,11 +2727,9 @@ EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); /* Default mitigation for L1TF-affected CPUs */ enum l1tf_mitigations l1tf_mitigation __ro_after_init = IS_ENABLED(CONFIG_MITIGATION_L1TF) ? L1TF_MITIGATION_AUTO : L1TF_MITIGATION_OFF; -#if IS_ENABLED(CONFIG_KVM_INTEL) -EXPORT_SYMBOL_GPL(l1tf_mitigation); -#endif +EXPORT_SYMBOL_FOR_KVM(l1tf_mitigation); enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; -EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); +EXPORT_SYMBOL_FOR_KVM(l1tf_vmx_mitigation); /* * These CPUs all support 44bits physical address space internally in the @@ -3371,6 +3229,99 @@ void cpu_bugs_smt_update(void) mutex_unlock(&spec_ctrl_mutex); } +void __init cpu_select_mitigations(void) +{ + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ + if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) { + rdmsrq(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + /* + * Previously running kernel (kexec), may have some controls + * turned ON. Clear them and let the mitigations setup below + * rediscover them based on configuration. + */ + x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; + } + + x86_arch_cap_msr = x86_read_arch_cap_msr(); + + cpu_print_attack_vectors(); + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); + retbleed_select_mitigation(); + spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + rfds_select_mitigation(); + srbds_select_mitigation(); + l1d_flush_select_mitigation(); + srso_select_mitigation(); + gds_select_mitigation(); + its_select_mitigation(); + bhi_select_mitigation(); + tsa_select_mitigation(); + vmscape_select_mitigation(); + + /* + * After mitigations are selected, some may need to update their + * choices. + */ + spectre_v2_update_mitigation(); + /* + * retbleed_update_mitigation() relies on the state set by + * spectre_v2_update_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_update_mitigation(); + /* + * its_update_mitigation() depends on spectre_v2_update_mitigation() + * and retbleed_update_mitigation(). + */ + its_update_mitigation(); + + /* + * spectre_v2_user_update_mitigation() depends on + * retbleed_update_mitigation(), specifically the STIBP + * selection is forced for UNRET or IBPB. + */ + spectre_v2_user_update_mitigation(); + mds_update_mitigation(); + taa_update_mitigation(); + mmio_update_mitigation(); + rfds_update_mitigation(); + bhi_update_mitigation(); + /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ + srso_update_mitigation(); + vmscape_update_mitigation(); + + spectre_v1_apply_mitigation(); + spectre_v2_apply_mitigation(); + retbleed_apply_mitigation(); + spectre_v2_user_apply_mitigation(); + ssb_apply_mitigation(); + l1tf_apply_mitigation(); + mds_apply_mitigation(); + taa_apply_mitigation(); + mmio_apply_mitigation(); + rfds_apply_mitigation(); + srbds_apply_mitigation(); + srso_apply_mitigation(); + gds_apply_mitigation(); + its_apply_mitigation(); + bhi_apply_mitigation(); + tsa_apply_mitigation(); + vmscape_apply_mitigation(); +} + #ifdef CONFIG_SYSFS #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 981f8b1f0792..dbc99a47be45 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -6,6 +6,7 @@ #include <linux/workqueue.h> #include <linux/delay.h> #include <linux/cpuhotplug.h> +#include <linux/kvm_types.h> #include <asm/cpu_device_id.h> #include <asm/cmdline.h> #include <asm/traps.h> @@ -289,7 +290,7 @@ bool handle_guest_split_lock(unsigned long ip) force_sig_fault(SIGBUS, BUS_ADRALN, NULL); return false; } -EXPORT_SYMBOL_GPL(handle_guest_split_lock); +EXPORT_SYMBOL_FOR_KVM(handle_guest_split_lock); void bus_lock_init(void) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 02d97834a1d4..e7ab22fce3b5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -7,6 +7,7 @@ #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/percpu.h> #include <linux/string.h> #include <linux/ctype.h> @@ -405,6 +406,28 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +static __always_inline void setup_lass(struct cpuinfo_x86 *c) +{ + if (!cpu_feature_enabled(X86_FEATURE_LASS)) + return; + + /* + * Legacy vsyscall page access causes a #GP when LASS is active. + * Disable LASS because the #GP handler doesn't support vsyscall + * emulation. + * + * Also disable LASS when running under EFI, as some runtime and + * boot services rely on 1:1 mappings in the lower half. + */ + if (IS_ENABLED(CONFIG_X86_VSYSCALL_EMULATION) || + IS_ENABLED(CONFIG_EFI)) { + setup_clear_cpu_cap(X86_FEATURE_LASS); + return; + } + + cr4_set_bits(X86_CR4_LASS); +} + /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED; @@ -464,14 +487,14 @@ void cr4_update_irqsoff(unsigned long set, unsigned long clear) __write_cr4(newval); } } -EXPORT_SYMBOL(cr4_update_irqsoff); +EXPORT_SYMBOL_FOR_KVM(cr4_update_irqsoff); /* Read the CR4 shadow. */ unsigned long cr4_read_shadow(void) { return this_cpu_read(cpu_tlbstate.cr4); } -EXPORT_SYMBOL_GPL(cr4_read_shadow); +EXPORT_SYMBOL_FOR_KVM(cr4_read_shadow); void cr4_init(void) { @@ -726,7 +749,7 @@ void load_direct_gdt(int cpu) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); } -EXPORT_SYMBOL_GPL(load_direct_gdt); +EXPORT_SYMBOL_FOR_KVM(load_direct_gdt); /* Load a fixmap remapping of the per-cpu GDT */ void load_fixmap_gdt(int cpu) @@ -1025,12 +1048,8 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_0001_EDX] = edx; } - if (c->extended_cpuid_level >= 0x80000007) { - cpuid(0x80000007, &eax, &ebx, &ecx, &edx); - - c->x86_capability[CPUID_8000_0007_EBX] = ebx; - c->x86_power = edx; - } + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); if (c->extended_cpuid_level >= 0x80000008) { cpuid(0x80000008, &eax, &ebx, &ecx, &edx); @@ -2015,10 +2034,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); - /* Set up SMEP/SMAP/UMIP */ setup_smep(c); setup_smap(c); setup_umip(c); + setup_lass(c); /* Enable FSGSBASE instructions if available. */ if (cpu_has(c, X86_FEATURE_FSGSBASE)) { diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index bc38b2d56f26..5c7a3a71191a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -42,15 +42,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; #ifdef CONFIG_CPU_SUP_INTEL -enum tsx_ctrl_states { - TSX_CTRL_ENABLE, - TSX_CTRL_DISABLE, - TSX_CTRL_RTM_ALWAYS_ABORT, - TSX_CTRL_NOT_SUPPORTED, -}; - -extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; - extern void __init tsx_init(void); void tsx_ap_init(void); void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 46efcbd6afa4..146f6f8b0650 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -72,6 +72,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL }, + { X86_FEATURE_SDCIAE, X86_FEATURE_CAT_L3 }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, @@ -79,6 +80,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX_LC, X86_FEATURE_SGX }, { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, + { X86_FEATURE_SGX_EUPDATESVN, X86_FEATURE_SGX1 }, { X86_FEATURE_SGX_EDECCSSA, X86_FEATURE_SGX1 }, { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 }, @@ -89,6 +91,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES }, { X86_FEATURE_FRED, X86_FEATURE_LKGS }, { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL }, + { X86_FEATURE_LASS, X86_FEATURE_SMAP }, {} }; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index d6906442f49b..3f1dda355307 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -43,9 +43,6 @@ /* Deferred error settings */ #define MSR_CU_DEF_ERR 0xC0000410 #define MASK_DEF_LVTOFF 0x000000F0 -#define MASK_DEF_INT_TYPE 0x00000006 -#define DEF_LVT_OFF 0x2 -#define DEF_INT_TYPE_APIC 0x2 /* Scalable MCA: */ @@ -54,6 +51,17 @@ static bool thresholding_irq_en; +struct mce_amd_cpu_data { + mce_banks_t thr_intr_banks; + mce_banks_t dfr_intr_banks; + + u32 thr_intr_en: 1, + dfr_intr_en: 1, + __resv: 30; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data); + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -79,6 +87,8 @@ struct smca_bank { const struct smca_hwid *hwid; u32 id; /* Value of MCA_IPID[InstanceId]. */ u8 sysfs_id; /* Value used for sysfs name. */ + u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */ + __reserved :63; }; static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); @@ -264,6 +274,7 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; static void smca_configure(unsigned int bank, unsigned int cpu) { + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); u8 *bank_counts = this_cpu_ptr(smca_bank_counts); const struct smca_hwid *s_hwid; unsigned int i, hwid_mcatype; @@ -294,11 +305,33 @@ static void smca_configure(unsigned int bank, unsigned int cpu) * APIC based interrupt. First, check that no interrupt has been * set. */ - if ((low & BIT(5)) && !((high >> 5) & 0x3)) + if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) { + __set_bit(bank, data->dfr_intr_banks); high |= BIT(5); + } + + /* + * SMCA Corrected Error Interrupt + * + * MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can + * send an MCA Thresholding interrupt without the OS initializing + * this feature. This can be used if the threshold limit is managed + * by the platform. + * + * MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR). + * The OS should set this to inform the platform that the OS is ready + * to handle the MCA Thresholding interrupt. + */ + if ((low & BIT(10)) && data->thr_intr_en) { + __set_bit(bank, data->thr_intr_banks); + high |= BIT(8); + } this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); + if (low & MCI_CONFIG_PADDRV) + this_cpu_ptr(smca_banks)[bank].paddrv = 1; + wrmsr(smca_config, low, high); } @@ -368,6 +401,14 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; + /* + * On SMCA CPUs, LVT offset is programmed at a different MSR, and + * the BIOS provides the value. The original field where LVT offset + * was set is reserved. Return early here: + */ + if (mce_flags.smca) + return false; + if (apic < 0) { pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, @@ -376,14 +417,6 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) } if (apic != msr) { - /* - * On SMCA CPUs, LVT offset is programmed at a different MSR, and - * the BIOS provides the value. The original field where LVT offset - * was set is reserved. Return early here: - */ - if (mce_flags.smca) - return false; - pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); @@ -443,6 +476,36 @@ static void threshold_restart_block(void *_tr) wrmsr(tr->b->address, lo, hi); } +static void threshold_restart_bank(unsigned int bank, bool intr_en) +{ + struct threshold_bank **thr_banks = this_cpu_read(threshold_banks); + struct threshold_block *block, *tmp; + struct thresh_restart tr; + + if (!thr_banks || !thr_banks[bank]) + return; + + memset(&tr, 0, sizeof(tr)); + + list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) { + tr.b = block; + tr.b->interrupt_enable = intr_en; + threshold_restart_block(&tr); + } +} + +/* Try to use the threshold limit reported through APEI. */ +static u16 get_thr_limit(void) +{ + u32 thr_limit = mce_get_apei_thr_limit(); + + /* Fallback to old default if APEI limit is not available. */ + if (!thr_limit) + return THRESHOLD_MAX; + + return min(thr_limit, THRESHOLD_MAX); +} + static void mce_threshold_block_init(struct threshold_block *b, int offset) { struct thresh_restart tr = { @@ -451,7 +514,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) .lvt_off = offset, }; - b->threshold_limit = THRESHOLD_MAX; + b->threshold_limit = get_thr_limit(); threshold_restart_block(&tr); }; @@ -464,41 +527,6 @@ static int setup_APIC_mce_threshold(int reserved, int new) return reserved; } -static int setup_APIC_deferred_error(int reserved, int new) -{ - if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, - APIC_EILVT_MSG_FIX, 0)) - return new; - - return reserved; -} - -static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) -{ - u32 low = 0, high = 0; - int def_offset = -1, def_new; - - if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) - return; - - def_new = (low & MASK_DEF_LVTOFF) >> 4; - if (!(low & MASK_DEF_LVTOFF)) { - pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); - def_new = DEF_LVT_OFF; - low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); - } - - def_offset = setup_APIC_deferred_error(def_offset, def_new); - if ((def_offset == def_new) && - (deferred_error_int_vector != amd_deferred_error_interrupt)) - deferred_error_int_vector = amd_deferred_error_interrupt; - - if (!mce_flags.smca) - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; - - wrmsr(MSR_CU_DEF_ERR, low, high); -} - static u32 get_block_address(u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block, unsigned int cpu) @@ -534,12 +562,10 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, return addr; } -static int -prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, - int offset, u32 misc_high) +static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + int offset, u32 misc_high) { unsigned int cpu = smp_processor_id(); - u32 smca_low, smca_high; struct threshold_block b; int new; @@ -556,20 +582,13 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, if (!b.interrupt_capable) goto done; + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks); b.interrupt_enable = 1; - if (!mce_flags.smca) { - new = (misc_high & MASK_LVTOFF_HI) >> 20; - goto set_offset; - } - - /* Gather LVT offset for thresholding: */ - if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) - goto out; - - new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + if (mce_flags.smca) + goto done; -set_offset: + new = (misc_high & MASK_LVTOFF_HI) >> 20; offset = setup_APIC_mce_threshold(offset, new); if (offset == new) thresholding_irq_en = true; @@ -577,7 +596,6 @@ set_offset: done: mce_threshold_block_init(&b, offset); -out: return offset; } @@ -668,6 +686,32 @@ static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c) mce_banks[0].ctl = 0; } +/* + * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is + * ready to send interrupts. + * + * Individual error sources are enabled later during per-bank init. + */ +static void smca_enable_interrupt_vectors(void) +{ + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); + u64 mca_intr_cfg, offset; + + if (!mce_flags.smca || !mce_flags.succor) + return; + + if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg)) + return; + + offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12; + if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0)) + data->thr_intr_en = 1; + + offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4; + if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0)) + data->dfr_intr_en = 1; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -679,10 +723,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) mce_flags.amd_threshold = 1; + smca_enable_interrupt_vectors(); + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (mce_flags.smca) + if (mce_flags.smca) { smca_configure(bank, cpu); + if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en) + continue; + } + disable_err_thresholding(c, bank); for (block = 0; block < NR_BLOCKS; ++block) { @@ -703,9 +753,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) offset = prepare_threshold_block(bank, block, address, offset, high); } } - - if (mce_flags.succor) - deferred_error_interrupt_enable(c); } void smca_bsp_init(void) @@ -748,9 +795,9 @@ bool amd_mce_is_memory_error(struct mce *m) } /* - * AMD systems do not have an explicit indicator that the value in MCA_ADDR is - * a system physical address. Therefore, individual cases need to be detected. - * Future cases and checks will be added as needed. + * Some AMD systems have an explicit indicator that the value in MCA_ADDR is a + * system physical address. Individual cases though, need to be detected for + * other systems. Future cases will be added as needed. * * 1) General case * a) Assume address is not usable. @@ -764,6 +811,8 @@ bool amd_mce_is_memory_error(struct mce *m) * a) Reported in legacy bank 4 with extended error code (XEC) 8. * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, * this bit should not be checked. + * 4) MCI_STATUS_PADDRVAL is set + * a) Will provide a valid system physical address. * * NOTE: SMCA UMC memory errors fall into case #1. */ @@ -777,6 +826,9 @@ bool amd_mce_usable_address(struct mce *m) return false; } + if (this_cpu_ptr(smca_banks)[m->bank].paddrv) + return m->status & MCI_STATUS_PADDRV; + /* Check poison bit for all other bank types. */ if (m->status & MCI_STATUS_POISON) return true; @@ -785,37 +837,6 @@ bool amd_mce_usable_address(struct mce *m) return false; } -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) -{ - struct mce_hw_err err; - struct mce *m = &err.m; - - mce_prep_record(&err); - - m->status = status; - m->misc = misc; - m->bank = bank; - m->tsc = rdtsc(); - - if (m->status & MCI_STATUS_ADDRV) { - m->addr = addr; - - smca_extract_err_addr(m); - } - - if (mce_flags.smca) { - rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); - - if (m->status & MCI_STATUS_SYNDV) { - rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); - rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1); - rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2); - } - } - - mce_log(&err); -} - DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) { trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); @@ -825,103 +846,20 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) apic_eoi(); } -/* - * Returns true if the logged error is deferred. False, otherwise. - */ -static inline bool -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) -{ - u64 status, addr = 0; - - rdmsrq(msr_stat, status); - if (!(status & MCI_STATUS_VAL)) - return false; - - if (status & MCI_STATUS_ADDRV) - rdmsrq(msr_addr, addr); - - __log_error(bank, status, addr, misc); - - wrmsrq(msr_stat, 0); - - return status & MCI_STATUS_DEFERRED; -} - -static bool _log_error_deferred(unsigned int bank, u32 misc) -{ - if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), - mca_msr_reg(bank, MCA_ADDR), misc)) - return false; - - /* - * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. - * Return true here to avoid accessing these registers. - */ - if (!mce_flags.smca) - return true; - - /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */ - wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); - return true; -} - -/* - * We have three scenarios for checking for Deferred errors: - * - * 1) Non-SMCA systems check MCA_STATUS and log error if found. - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also - * clear MCA_DESTAT. - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and - * log it. - */ -static void log_error_deferred(unsigned int bank) -{ - if (_log_error_deferred(bank, 0)) - return; - - /* - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check - * for a valid error. - */ - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank), - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0); -} - /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - unsigned int bank; - - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) - log_error_deferred(bank); + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks); } -static void log_error_thresholding(unsigned int bank, u64 misc) +void mce_amd_handle_storm(unsigned int bank, bool on) { - _log_error_deferred(bank, misc); + threshold_restart_bank(bank, on); } -static void log_and_reset_block(struct threshold_block *block) +static void amd_reset_thr_limit(unsigned int bank) { - struct thresh_restart tr; - u32 low = 0, high = 0; - - if (!block) - return; - - if (rdmsr_safe(block->address, &low, &high)) - return; - - if (!(high & MASK_OVERFLOW_HI)) - return; - - /* Log the MCE which caused the threshold event. */ - log_error_thresholding(block->bank, ((u64)high << 32) | low); - - /* Reset threshold block after logging error. */ - memset(&tr, 0, sizeof(tr)); - tr.b = block; - threshold_restart_block(&tr); + threshold_restart_bank(bank, true); } /* @@ -930,33 +868,21 @@ static void log_and_reset_block(struct threshold_block *block) */ static void amd_threshold_interrupt(void) { - struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank; - unsigned int bank, cpu = smp_processor_id(); - struct threshold_block *block, *tmp; - - /* - * Validate that the threshold bank has been initialized already. The - * handler is installed at boot time, but on a hotplug event the - * interrupt might fire before the data has been initialized. - */ - if (!bp) - return; - - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) - continue; - - thr_bank = bp[bank]; - if (!thr_bank) - continue; - - list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj) - log_and_reset_block(block); - } + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks); } void amd_clear_bank(struct mce *m) { + amd_reset_thr_limit(m->bank); + + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ + if (m->status & MCI_STATUS_DEFERRED) + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); + + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ + if (m->kflags & MCE_CHECK_DFR_REGS) + return; + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); } @@ -1172,7 +1098,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb b->address = address; b->interrupt_enable = 0; b->interrupt_capable = lvt_interrupt_supported(bank, high); - b->threshold_limit = THRESHOLD_MAX; + b->threshold_limit = get_thr_limit(); if (b->interrupt_capable) { default_attrs[2] = &interrupt_enable.attr; @@ -1183,6 +1109,8 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb list_add(&b->miscj, &tb->miscj); + mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 460e90a1a0b1..4aff14e04287 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC)); if (m->status & MCI_STATUS_ADDRV) { - m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); + if (m->kflags & MCE_CHECK_DFR_REGS) + m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i)); + else + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); /* * Mask the reported address by the reported granularity. @@ -715,6 +718,29 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) DEFINE_PER_CPU(unsigned, mce_poll_count); /* + * We have three scenarios for checking for Deferred errors: + * + * 1) Non-SMCA systems check MCA_STATUS and log error if found. + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also + * clear MCA_DESTAT. + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and + * log it. + */ +static bool smca_should_log_poll_error(struct mce *m) +{ + if (m->status & MCI_STATUS_VAL) + return true; + + m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank)); + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) { + m->kflags |= MCE_CHECK_DFR_REGS; + return true; + } + + return false; +} + +/* * Newer Intel systems that support software error * recovery need to make additional checks. Other * CPUs should skip over uncorrected errors, but log @@ -740,6 +766,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) { struct mce *m = &err->m; + if (mce_flags.smca) + return smca_should_log_poll_error(m); + /* If this entry is not valid, ignore it. */ if (!(m->status & MCI_STATUS_VAL)) return false; diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index b0e00ec5cc8c..a31cf984619c 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -67,6 +67,7 @@ void mce_track_storm(struct mce *mce); void mce_inherit_storm(unsigned int bank); bool mce_get_storm_mode(void); void mce_set_storm_mode(bool storm); +u32 mce_get_apei_thr_limit(void); #else static inline void cmci_storm_begin(unsigned int bank) {} static inline void cmci_storm_end(unsigned int bank) {} @@ -74,6 +75,7 @@ static inline void mce_track_storm(struct mce *mce) {} static inline void mce_inherit_storm(unsigned int bank) {} static inline bool mce_get_storm_mode(void) { return false; } static inline void mce_set_storm_mode(bool storm) {} +static inline u32 mce_get_apei_thr_limit(void) { return 0; } #endif /* @@ -267,6 +269,7 @@ void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m); #ifdef CONFIG_X86_MCE_AMD void mce_threshold_create_device(unsigned int cpu); void mce_threshold_remove_device(unsigned int cpu); +void mce_amd_handle_storm(unsigned int bank, bool on); extern bool amd_filter_mce(struct mce *m); bool amd_mce_usable_address(struct mce *m); void amd_clear_bank(struct mce *m); @@ -299,6 +302,7 @@ void smca_bsp_init(void); #else static inline void mce_threshold_create_device(unsigned int cpu) { } static inline void mce_threshold_remove_device(unsigned int cpu) { } +static inline void mce_amd_handle_storm(unsigned int bank, bool on) { } static inline bool amd_filter_mce(struct mce *m) { return false; } static inline bool amd_mce_usable_address(struct mce *m) { return false; } static inline void amd_clear_bank(struct mce *m) { } diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index f4a007616468..0d13c9ffcba0 100644 --- a/arch/x86/kernel/cpu/mce/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -13,6 +13,19 @@ #include "internal.h" +static u32 mce_apei_thr_limit; + +void mce_save_apei_thr_limit(u32 thr_limit) +{ + mce_apei_thr_limit = thr_limit; + pr_info("HEST corrected error threshold limit: %u\n", thr_limit); +} + +u32 mce_get_apei_thr_limit(void) +{ + return mce_apei_thr_limit; +} + static void default_threshold_interrupt(void) { pr_err("Unexpected threshold interrupt at vector %x\n", @@ -63,6 +76,9 @@ static void mce_handle_storm(unsigned int bank, bool on) case X86_VENDOR_INTEL: mce_intel_handle_storm(bank, on); break; + case X86_VENDOR_AMD: + mce_amd_handle_storm(bank, on); + break; } } @@ -85,7 +101,8 @@ void cmci_storm_end(unsigned int bank) { struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); - __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); + if (!mce_flags.amd_threshold) + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); storm->banks[bank].history = 0; storm->banks[bank].in_storm_mode = false; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index dc82153009da..3821a985f4ff 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -186,49 +186,61 @@ static u32 cpuid_to_ucode_rev(unsigned int val) return p.ucode_rev; } +static u32 get_cutoff_revision(u32 rev) +{ + switch (rev >> 8) { + case 0x80012: return 0x8001277; break; + case 0x80082: return 0x800820f; break; + case 0x83010: return 0x830107c; break; + case 0x86001: return 0x860010e; break; + case 0x86081: return 0x8608108; break; + case 0x87010: return 0x8701034; break; + case 0x8a000: return 0x8a0000a; break; + case 0xa0010: return 0xa00107a; break; + case 0xa0011: return 0xa0011da; break; + case 0xa0012: return 0xa001243; break; + case 0xa0082: return 0xa00820e; break; + case 0xa1011: return 0xa101153; break; + case 0xa1012: return 0xa10124e; break; + case 0xa1081: return 0xa108109; break; + case 0xa2010: return 0xa20102f; break; + case 0xa2012: return 0xa201212; break; + case 0xa4041: return 0xa404109; break; + case 0xa5000: return 0xa500013; break; + case 0xa6012: return 0xa60120a; break; + case 0xa7041: return 0xa704109; break; + case 0xa7052: return 0xa705208; break; + case 0xa7080: return 0xa708009; break; + case 0xa70c0: return 0xa70C009; break; + case 0xaa001: return 0xaa00116; break; + case 0xaa002: return 0xaa00218; break; + case 0xb0021: return 0xb002146; break; + case 0xb0081: return 0xb008111; break; + case 0xb1010: return 0xb101046; break; + case 0xb2040: return 0xb204031; break; + case 0xb4040: return 0xb404031; break; + case 0xb4041: return 0xb404101; break; + case 0xb6000: return 0xb600031; break; + case 0xb6080: return 0xb608031; break; + case 0xb7000: return 0xb700031; break; + default: break; + + } + return 0; +} + static bool need_sha_check(u32 cur_rev) { + u32 cutoff; + if (!cur_rev) { cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax); pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev); } - switch (cur_rev >> 8) { - case 0x80012: return cur_rev <= 0x8001277; break; - case 0x80082: return cur_rev <= 0x800820f; break; - case 0x83010: return cur_rev <= 0x830107c; break; - case 0x86001: return cur_rev <= 0x860010e; break; - case 0x86081: return cur_rev <= 0x8608108; break; - case 0x87010: return cur_rev <= 0x8701034; break; - case 0x8a000: return cur_rev <= 0x8a0000a; break; - case 0xa0010: return cur_rev <= 0xa00107a; break; - case 0xa0011: return cur_rev <= 0xa0011da; break; - case 0xa0012: return cur_rev <= 0xa001243; break; - case 0xa0082: return cur_rev <= 0xa00820e; break; - case 0xa1011: return cur_rev <= 0xa101153; break; - case 0xa1012: return cur_rev <= 0xa10124e; break; - case 0xa1081: return cur_rev <= 0xa108109; break; - case 0xa2010: return cur_rev <= 0xa20102f; break; - case 0xa2012: return cur_rev <= 0xa201212; break; - case 0xa4041: return cur_rev <= 0xa404109; break; - case 0xa5000: return cur_rev <= 0xa500013; break; - case 0xa6012: return cur_rev <= 0xa60120a; break; - case 0xa7041: return cur_rev <= 0xa704109; break; - case 0xa7052: return cur_rev <= 0xa705208; break; - case 0xa7080: return cur_rev <= 0xa708009; break; - case 0xa70c0: return cur_rev <= 0xa70C009; break; - case 0xaa001: return cur_rev <= 0xaa00116; break; - case 0xaa002: return cur_rev <= 0xaa00218; break; - case 0xb0021: return cur_rev <= 0xb002146; break; - case 0xb0081: return cur_rev <= 0xb008111; break; - case 0xb1010: return cur_rev <= 0xb101046; break; - case 0xb2040: return cur_rev <= 0xb204031; break; - case 0xb4040: return cur_rev <= 0xb404031; break; - case 0xb6000: return cur_rev <= 0xb600031; break; - case 0xb6080: return cur_rev <= 0xb608031; break; - case 0xb7000: return cur_rev <= 0xb700031; break; - default: break; - } + cutoff = get_cutoff_revision(cur_rev); + if (cutoff) + return cur_rev <= cutoff; pr_info("You should not be seeing this. Please send the following couple of lines to x86-<at>-kernel.org\n"); pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev); @@ -493,6 +505,7 @@ static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size) { u8 family = x86_family(bsp_cpuid_1_eax); struct microcode_header_amd *mc_hdr; + u32 cur_rev, cutoff, patch_rev; u32 sh_psize; u16 proc_id; u8 patch_fam; @@ -532,11 +545,32 @@ static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size) proc_id = mc_hdr->processor_rev_id; patch_fam = 0xf + (proc_id >> 12); - ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam); - if (patch_fam != family) return 1; + cur_rev = get_patch_level(); + + /* No cutoff revision means old/unaffected by signing algorithm weakness => matches */ + cutoff = get_cutoff_revision(cur_rev); + if (!cutoff) + goto ok; + + patch_rev = mc_hdr->patch_id; + + ucode_dbg("cur_rev: 0x%x, cutoff: 0x%x, patch_rev: 0x%x\n", + cur_rev, cutoff, patch_rev); + + if (cur_rev <= cutoff && patch_rev <= cutoff) + goto ok; + + if (cur_rev > cutoff && patch_rev > cutoff) + goto ok; + + return 1; + +ok: + ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam); + return 0; } @@ -605,8 +639,6 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc) mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE); - ucode_dbg("patch_id: 0x%x\n", mc->hdr.patch_id); - if (mc_patch_matches(mc, eq_id)) { desc->psize = patch_size; desc->mc = mc; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index f75c140906d0..ccc83b0bf63c 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -136,7 +136,7 @@ bool __init microcode_loader_disabled(void) return dis_ucode_ldr; } -static void early_parse_cmdline(void) +static void __init early_parse_cmdline(void) { char cmd_buf[64] = {}; char *s, *p = cmd_buf; @@ -589,6 +589,17 @@ static int load_late_stop_cpus(bool is_safe) pr_err("You should switch to early loading, if possible.\n"); } + /* + * Pre-load the microcode image into a staging device. This + * process is preemptible and does not require stopping CPUs. + * Successful staging simplifies the subsequent late-loading + * process, reducing rendezvous time. + * + * Even if the transfer fails, the update will proceed as usual. + */ + if (microcode_ops->use_staging) + microcode_ops->stage_microcode(); + atomic_set(&late_cpus_in, num_online_cpus()); atomic_set(&offline_in_nmi, 0); loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 371ca6eac00e..8744f3adc2a0 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -13,12 +13,15 @@ #define pr_fmt(fmt) "microcode: " fmt #include <linux/earlycpio.h> #include <linux/firmware.h> +#include <linux/pci_ids.h> #include <linux/uaccess.h> #include <linux/initrd.h> #include <linux/kernel.h> +#include <linux/delay.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/uio.h> +#include <linux/io.h> #include <linux/mm.h> #include <asm/cpu_device_id.h> @@ -33,6 +36,38 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; #define UCODE_BSP_LOADED ((struct microcode_intel *)0x1UL) +/* Defines for the microcode staging mailbox interface */ +#define MBOX_REG_NUM 4 +#define MBOX_REG_SIZE sizeof(u32) + +#define MBOX_CONTROL_OFFSET 0x0 +#define MBOX_STATUS_OFFSET 0x4 +#define MBOX_WRDATA_OFFSET 0x8 +#define MBOX_RDDATA_OFFSET 0xc + +#define MASK_MBOX_CTRL_ABORT BIT(0) +#define MASK_MBOX_CTRL_GO BIT(31) + +#define MASK_MBOX_STATUS_ERROR BIT(2) +#define MASK_MBOX_STATUS_READY BIT(31) + +#define MASK_MBOX_RESP_SUCCESS BIT(0) +#define MASK_MBOX_RESP_PROGRESS BIT(1) +#define MASK_MBOX_RESP_ERROR BIT(2) + +#define MBOX_CMD_LOAD 0x3 +#define MBOX_OBJ_STAGING 0xb +#define MBOX_HEADER(size) ((PCI_VENDOR_ID_INTEL) | \ + (MBOX_OBJ_STAGING << 16) | \ + ((u64)((size) / sizeof(u32)) << 32)) + +/* The size of each mailbox header */ +#define MBOX_HEADER_SIZE sizeof(u64) +/* The size of staging hardware response */ +#define MBOX_RESPONSE_SIZE sizeof(u64) + +#define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC) + /* Current microcode patch used in early patching on the APs. */ static struct microcode_intel *ucode_patch_va __read_mostly; static struct microcode_intel *ucode_patch_late __read_mostly; @@ -54,6 +89,23 @@ struct extended_sigtable { struct extended_signature sigs[]; }; +/** + * struct staging_state - Track the current staging process state + * + * @mmio_base: MMIO base address for staging + * @ucode_len: Total size of the microcode image + * @chunk_size: Size of each data piece + * @bytes_sent: Total bytes transmitted so far + * @offset: Current offset in the microcode image + */ +struct staging_state { + void __iomem *mmio_base; + unsigned int ucode_len; + unsigned int chunk_size; + unsigned int bytes_sent; + unsigned int offset; +}; + #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) @@ -299,6 +351,298 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size, return size ? NULL : patch; } +static inline u32 read_mbox_dword(void __iomem *mmio_base) +{ + u32 dword = readl(mmio_base + MBOX_RDDATA_OFFSET); + + /* Acknowledge read completion to the staging hardware */ + writel(0, mmio_base + MBOX_RDDATA_OFFSET); + return dword; +} + +static inline void write_mbox_dword(void __iomem *mmio_base, u32 dword) +{ + writel(dword, mmio_base + MBOX_WRDATA_OFFSET); +} + +static inline u64 read_mbox_header(void __iomem *mmio_base) +{ + u32 high, low; + + low = read_mbox_dword(mmio_base); + high = read_mbox_dword(mmio_base); + + return ((u64)high << 32) | low; +} + +static inline void write_mbox_header(void __iomem *mmio_base, u64 value) +{ + write_mbox_dword(mmio_base, value); + write_mbox_dword(mmio_base, value >> 32); +} + +static void write_mbox_data(void __iomem *mmio_base, u32 *chunk, unsigned int chunk_bytes) +{ + int i; + + /* + * The MMIO space is mapped as Uncached (UC). Each write arrives + * at the device as an individual transaction in program order. + * The device can then reassemble the sequence accordingly. + */ + for (i = 0; i < chunk_bytes / sizeof(u32); i++) + write_mbox_dword(mmio_base, chunk[i]); +} + +/* + * Prepare for a new microcode transfer: reset hardware and record the + * image size. + */ +static void init_stage(struct staging_state *ss) +{ + ss->ucode_len = get_totalsize(&ucode_patch_late->hdr); + + /* + * Abort any ongoing process, effectively resetting the device. + * Unlike regular mailbox data processing requests, this + * operation does not require a status check. + */ + writel(MASK_MBOX_CTRL_ABORT, ss->mmio_base + MBOX_CONTROL_OFFSET); +} + +/* + * Update the chunk size and decide whether another chunk can be sent. + * This accounts for remaining data and retry limits. + */ +static bool can_send_next_chunk(struct staging_state *ss, int *err) +{ + /* A page size or remaining bytes if this is the final chunk */ + ss->chunk_size = min(PAGE_SIZE, ss->ucode_len - ss->offset); + + /* + * Each microcode image is divided into chunks, each at most + * one page size. A 10-chunk image would typically require 10 + * transactions. + * + * However, the hardware managing the mailbox has limited + * resources and may not cache the entire image, potentially + * requesting the same chunk multiple times. + * + * To tolerate this behavior, allow up to twice the expected + * number of transactions (i.e., a 10-chunk image can take up to + * 20 attempts). + * + * If the number of attempts exceeds this limit, treat it as + * exceeding the maximum allowed transfer size. + */ + if (ss->bytes_sent + ss->chunk_size > ss->ucode_len * 2) { + *err = -EMSGSIZE; + return false; + } + + *err = 0; + return true; +} + +/* + * The hardware indicates completion by returning a sentinel end offset. + */ +static inline bool is_end_offset(u32 offset) +{ + return offset == UINT_MAX; +} + +/* + * Determine whether staging is complete: either the hardware signaled + * the end offset, or no more transactions are permitted (retry limit + * reached). + */ +static inline bool staging_is_complete(struct staging_state *ss, int *err) +{ + return is_end_offset(ss->offset) || !can_send_next_chunk(ss, err); +} + +/* + * Wait for the hardware to complete a transaction. + * Return 0 on success, or an error code on failure. + */ +static int wait_for_transaction(struct staging_state *ss) +{ + u32 timeout, status; + + /* Allow time for hardware to complete the operation: */ + for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) { + msleep(1); + + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET); + /* Break out early if the hardware is ready: */ + if (status & MASK_MBOX_STATUS_READY) + break; + } + + /* Check for explicit error response */ + if (status & MASK_MBOX_STATUS_ERROR) + return -EIO; + + /* + * Hardware has neither responded to the action nor signaled any + * error. Treat this as a timeout. + */ + if (!(status & MASK_MBOX_STATUS_READY)) + return -ETIMEDOUT; + + return 0; +} + +/* + * Transmit a chunk of the microcode image to the hardware. + * Return 0 on success, or an error code on failure. + */ +static int send_data_chunk(struct staging_state *ss, void *ucode_ptr) +{ + u32 *src_chunk = ucode_ptr + ss->offset; + u16 mbox_size; + + /* + * Write a 'request' mailbox object in this order: + * 1. Mailbox header includes total size + * 2. Command header specifies the load operation + * 3. Data section contains a microcode chunk + * + * Thus, the mailbox size is two headers plus the chunk size. + */ + mbox_size = MBOX_HEADER_SIZE * 2 + ss->chunk_size; + write_mbox_header(ss->mmio_base, MBOX_HEADER(mbox_size)); + write_mbox_header(ss->mmio_base, MBOX_CMD_LOAD); + write_mbox_data(ss->mmio_base, src_chunk, ss->chunk_size); + ss->bytes_sent += ss->chunk_size; + + /* Notify the hardware that the mailbox is ready for processing. */ + writel(MASK_MBOX_CTRL_GO, ss->mmio_base + MBOX_CONTROL_OFFSET); + + return wait_for_transaction(ss); +} + +/* + * Retrieve the next offset from the hardware response. + * Return 0 on success, or an error code on failure. + */ +static int fetch_next_offset(struct staging_state *ss) +{ + const u64 expected_header = MBOX_HEADER(MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE); + u32 offset, status; + u64 header; + + /* + * The 'response' mailbox returns three fields, in order: + * 1. Header + * 2. Next offset in the microcode image + * 3. Status flags + */ + header = read_mbox_header(ss->mmio_base); + offset = read_mbox_dword(ss->mmio_base); + status = read_mbox_dword(ss->mmio_base); + + /* All valid responses must start with the expected header. */ + if (header != expected_header) { + pr_err_once("staging: invalid response header (0x%llx)\n", header); + return -EBADR; + } + + /* + * Verify the offset: If not at the end marker, it must not + * exceed the microcode image length. + */ + if (!is_end_offset(offset) && offset > ss->ucode_len) { + pr_err_once("staging: invalid offset (%u) past the image end (%u)\n", + offset, ss->ucode_len); + return -EINVAL; + } + + /* Hardware may report errors explicitly in the status field */ + if (status & MASK_MBOX_RESP_ERROR) + return -EPROTO; + + ss->offset = offset; + return 0; +} + +/* + * Handle the staging process using the mailbox MMIO interface. The + * microcode image is transferred in chunks until completion. + * Return 0 on success or an error code on failure. + */ +static int do_stage(u64 mmio_pa) +{ + struct staging_state ss = {}; + int err; + + ss.mmio_base = ioremap(mmio_pa, MBOX_REG_NUM * MBOX_REG_SIZE); + if (WARN_ON_ONCE(!ss.mmio_base)) + return -EADDRNOTAVAIL; + + init_stage(&ss); + + /* Perform the staging process while within the retry limit */ + while (!staging_is_complete(&ss, &err)) { + /* Send a chunk of microcode each time: */ + err = send_data_chunk(&ss, ucode_patch_late); + if (err) + break; + /* + * Then, ask the hardware which piece of the image it + * needs next. The same piece may be sent more than once. + */ + err = fetch_next_offset(&ss); + if (err) + break; + } + + iounmap(ss.mmio_base); + + return err; +} + +static void stage_microcode(void) +{ + unsigned int pkg_id = UINT_MAX; + int cpu, err; + u64 mmio_pa; + + if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32))) { + pr_err("Microcode image 32-bit misaligned (0x%x), staging failed.\n", + get_totalsize(&ucode_patch_late->hdr)); + return; + } + + lockdep_assert_cpus_held(); + + /* + * The MMIO address is unique per package, and all the SMT + * primary threads are online here. Find each MMIO space by + * their package IDs to avoid duplicate staging. + */ + for_each_cpu(cpu, cpu_primary_thread_mask) { + if (topology_logical_package_id(cpu) == pkg_id) + continue; + + pkg_id = topology_logical_package_id(cpu); + + err = rdmsrq_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa); + if (WARN_ON_ONCE(err)) + return; + + err = do_stage(mmio_pa); + if (err) { + pr_err("Error: staging failed (%d) for CPU%d at package %u.\n", + err, cpu, pkg_id); + return; + } + } + + pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev); +} + static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, struct microcode_intel *mc, u32 *cur_rev) @@ -627,6 +971,7 @@ static struct microcode_ops microcode_intel_ops = { .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode_late, .finalize_late_load = finalize_late_load, + .stage_microcode = stage_microcode, .use_nmi = IS_ENABLED(CONFIG_X86_64), }; @@ -638,6 +983,18 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) llc_size_per_core = (unsigned int)llc_size; } +static __init bool staging_available(void) +{ + u64 val; + + val = x86_read_arch_cap_msr(); + if (!(val & ARCH_CAP_MCU_ENUM)) + return false; + + rdmsrq(MSR_IA32_MCU_ENUMERATION, val); + return !!(val & MCU_STAGING); +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -648,6 +1005,11 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + if (staging_available()) { + microcode_intel_ops.use_staging = true; + pr_info("Enabled staging feature.\n"); + } + calc_llc_size_per_core(c); return µcode_intel_ops; diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index ae8dbc2b908d..a10b547eda1e 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -31,10 +31,12 @@ struct microcode_ops { * See also the "Synchronization" section in microcode_core.c. */ enum ucode_state (*apply_microcode)(int cpu); + void (*stage_microcode)(void); int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); void (*finalize_late_load)(int result); unsigned int nmi_safe : 1, - use_nmi : 1; + use_nmi : 1, + use_staging : 1; }; struct early_load_data { diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 8c18327eb10b..0863733858dc 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -89,7 +89,6 @@ static int mtrr_state_set; u64 mtrr_tom2; struct mtrr_state_type mtrr_state; -EXPORT_SYMBOL_GPL(mtrr_state); /* Reserved bits in the high portion of the MTRRphysBaseN MSR. */ u32 phys_hi_rsvd; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 5655f253d929..2de3bd2f95d1 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -46,10 +46,6 @@ struct set_mtrr_context { u32 ccr3; }; -void set_mtrr_done(struct set_mtrr_context *ctxt); -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); - void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); bool get_mtrr_state(void); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 06ca5a30140c..3792ab4819dc 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -274,6 +274,11 @@ static void rdt_get_cdp_config(int level) rdt_resources_all[level].r_resctrl.cdp_capable = true; } +static void rdt_set_io_alloc_capable(struct rdt_resource *r) +{ + r->cache.io_alloc_capable = true; +} + static void rdt_get_cdp_l3_config(void) { rdt_get_cdp_config(RDT_RESOURCE_L3); @@ -719,6 +724,7 @@ enum { RDT_FLAG_SMBA, RDT_FLAG_BMEC, RDT_FLAG_ABMC, + RDT_FLAG_SDCIAE, }; #define RDT_OPT(idx, n, f) \ @@ -745,6 +751,7 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), + RDT_OPT(RDT_FLAG_SDCIAE, "sdciae", X86_FEATURE_SDCIAE), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) @@ -853,6 +860,8 @@ static __init bool get_rdt_alloc_resources(void) rdt_get_cache_alloc_cfg(1, r); if (rdt_cpu_has(X86_FEATURE_CDP_L3)) rdt_get_cdp_l3_config(); + if (rdt_cpu_has(X86_FEATURE_SDCIAE)) + rdt_set_io_alloc_capable(r); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 1189c0df4ad7..b20e705606b8 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -91,3 +91,43 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, return hw_dom->ctrl_val[idx]; } + +bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->sdciae_enabled; +} + +static void resctrl_sdciae_set_one_amd(void *arg) +{ + bool *enable = arg; + + if (*enable) + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); + else + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); +} + +static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_ctrl_domain *d; + + /* Walking r->ctrl_domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + /* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */ + list_for_each_entry(d, &r->ctrl_domains, hdr.list) + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1); +} + +int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (hw_res->r_resctrl.cache.io_alloc_capable && + hw_res->sdciae_enabled != enable) { + _resctrl_sdciae_enable(r, enable); + hw_res->sdciae_enabled = enable; + } + + return 0; +} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9f4c2f0aaf5c..4a916c84a322 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -46,6 +46,9 @@ struct arch_mbm_state { #define ABMC_EXTENDED_EVT_ID BIT(31) #define ABMC_EVT_ID BIT(0) +/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */ +#define SDCIAE_ENABLE_BIT 1 + /** * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share * a resource for a control function @@ -112,6 +115,7 @@ struct msr_param { * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource * @mbm_cntr_assign_enabled: ABMC feature is enabled + * @sdciae_enabled: SDCIAE feature (backing "io_alloc") is enabled. * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -126,6 +130,7 @@ struct rdt_hw_resource { unsigned int mbm_width; bool cdp_enabled; bool mbm_cntr_assign_enabled; + bool sdciae_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index fe1a2aa53c16..dffcc8307500 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -361,6 +361,7 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0), {} }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index caa4dc885c21..cde4b6cd3471 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -43,7 +43,11 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 }, { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, + { X86_FEATURE_SGX_EUPDATESVN, CPUID_EAX, 10, 0x00000012, 0 }, { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 }, + { X86_FEATURE_OVERFLOW_RECOV, CPUID_EBX, 0, 0x80000007, 0 }, + { X86_FEATURE_SUCCOR, CPUID_EBX, 1, 0x80000007, 0 }, + { X86_FEATURE_SMCA, CPUID_EBX, 3, 0x80000007, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, @@ -53,6 +57,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, + { X86_FEATURE_SDCIAE, CPUID_EBX, 6, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 7f8d1e11dbee..79d6020dfe9c 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -14,7 +14,7 @@ u64 sgx_attributes_reserved_mask; u64 sgx_xfrm_reserved_mask = ~0x3; u32 sgx_misc_reserved_mask; -static int sgx_open(struct inode *inode, struct file *file) +static int __sgx_open(struct inode *inode, struct file *file) { struct sgx_encl *encl; int ret; @@ -41,6 +41,23 @@ static int sgx_open(struct inode *inode, struct file *file) return 0; } +static int sgx_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = sgx_inc_usage_count(); + if (ret) + return ret; + + ret = __sgx_open(inode, file); + if (ret) { + sgx_dec_usage_count(); + return ret; + } + + return 0; +} + static int sgx_release(struct inode *inode, struct file *file) { struct sgx_encl *encl = file->private_data; diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 308dbbae6c6e..cf149b9f4916 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -765,6 +765,7 @@ void sgx_encl_release(struct kref *ref) WARN_ON_ONCE(encl->secs.epc_page); kfree(encl); + sgx_dec_usage_count(); } /* diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 42a088a337c5..74be751199a4 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -233,4 +233,9 @@ static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr) return __encls_2(EAUG, pginfo, addr); } +/* Attempt to update CPUSVN at runtime. */ +static inline int __eupdatesvn(void) +{ + return __encls_ret_1(EUPDATESVN, ""); +} #endif /* _X86_ENCLS_H */ diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 2de01b379aa3..dc73194416ac 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -5,6 +5,7 @@ #include <linux/freezer.h> #include <linux/highmem.h> #include <linux/kthread.h> +#include <linux/kvm_types.h> #include <linux/miscdevice.h> #include <linux/node.h> #include <linux/pagemap.h> @@ -16,6 +17,7 @@ #include <linux/vmalloc.h> #include <asm/msr.h> #include <asm/sgx.h> +#include <asm/archrandom.h> #include "driver.h" #include "encl.h" #include "encls.h" @@ -915,7 +917,107 @@ int sgx_set_attribute(unsigned long *allowed_attributes, *allowed_attributes |= SGX_ATTR_PROVISIONKEY; return 0; } -EXPORT_SYMBOL_GPL(sgx_set_attribute); +EXPORT_SYMBOL_FOR_KVM(sgx_set_attribute); + +/* Counter to count the active SGX users */ +static int sgx_usage_count; + +/** + * sgx_update_svn() - Attempt to call ENCLS[EUPDATESVN]. + * + * This instruction attempts to update CPUSVN to the + * currently loaded microcode update SVN and generate new + * cryptographic assets. + * + * Return: + * * %0: - Success or not supported + * * %-EAGAIN: - Can be safely retried, failure is due to lack of + * * entropy in RNG + * * %-EIO: - Unexpected error, retries are not advisable + */ +static int sgx_update_svn(void) +{ + int ret; + + /* + * If EUPDATESVN is not available, it is ok to + * silently skip it to comply with legacy behavior. + */ + if (!cpu_feature_enabled(X86_FEATURE_SGX_EUPDATESVN)) + return 0; + + /* + * EPC is guaranteed to be empty when there are no users. + * Ensure we are on our first user before proceeding further. + */ + WARN(sgx_usage_count, "Elevated usage count when calling EUPDATESVN\n"); + + for (int i = 0; i < RDRAND_RETRY_LOOPS; i++) { + ret = __eupdatesvn(); + + /* Stop on success or unexpected errors: */ + if (ret != SGX_INSUFFICIENT_ENTROPY) + break; + } + + switch (ret) { + case 0: + /* + * SVN successfully updated. + * Let users know when the update was successful. + */ + pr_info("SVN updated successfully\n"); + return 0; + case SGX_NO_UPDATE: + /* + * SVN update failed since the current SVN is + * not newer than CPUSVN. This is the most + * common case and indicates no harm. + */ + return 0; + case SGX_INSUFFICIENT_ENTROPY: + /* + * SVN update failed due to lack of entropy in DRNG. + * Indicate to userspace that it should retry. + */ + return -EAGAIN; + default: + break; + } + + /* + * EUPDATESVN was called when EPC is empty, all other error + * codes are unexpected. + */ + ENCLS_WARN(ret, "EUPDATESVN"); + return -EIO; +} + +/* Mutex to ensure no concurrent EPC accesses during EUPDATESVN */ +static DEFINE_MUTEX(sgx_svn_lock); + +int sgx_inc_usage_count(void) +{ + int ret; + + guard(mutex)(&sgx_svn_lock); + + if (!sgx_usage_count) { + ret = sgx_update_svn(); + if (ret) + return ret; + } + + sgx_usage_count++; + + return 0; +} + +void sgx_dec_usage_count(void) +{ + guard(mutex)(&sgx_svn_lock); + sgx_usage_count--; +} static int __init sgx_init(void) { diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index d2dad21259a8..f5940393d9bd 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -102,6 +102,9 @@ static inline int __init sgx_vepc_init(void) } #endif +int sgx_inc_usage_count(void); +void sgx_dec_usage_count(void); + void sgx_update_lepubkeyhash(u64 *lepubkeyhash); #endif /* _X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 7aaa3652e31d..8de1f1a755f2 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -5,6 +5,7 @@ * Copyright(c) 2021 Intel Corporation. */ +#include <linux/kvm_types.h> #include <linux/miscdevice.h> #include <linux/mm.h> #include <linux/mman.h> @@ -255,10 +256,11 @@ static int sgx_vepc_release(struct inode *inode, struct file *file) xa_destroy(&vepc->page_array); kfree(vepc); + sgx_dec_usage_count(); return 0; } -static int sgx_vepc_open(struct inode *inode, struct file *file) +static int __sgx_vepc_open(struct inode *inode, struct file *file) { struct sgx_vepc *vepc; @@ -273,6 +275,23 @@ static int sgx_vepc_open(struct inode *inode, struct file *file) return 0; } +static int sgx_vepc_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = sgx_inc_usage_count(); + if (ret) + return ret; + + ret = __sgx_vepc_open(inode, file); + if (ret) { + sgx_dec_usage_count(); + return ret; + } + + return 0; +} + static long sgx_vepc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -363,7 +382,7 @@ int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, WARN_ON_ONCE(ret); return 0; } -EXPORT_SYMBOL_GPL(sgx_virt_ecreate); +EXPORT_SYMBOL_FOR_KVM(sgx_virt_ecreate); static int __sgx_virt_einit(void __user *sigstruct, void __user *token, void __user *secs) @@ -432,4 +451,4 @@ int sgx_virt_einit(void __user *sigstruct, void __user *token, return ret; } -EXPORT_SYMBOL_GPL(sgx_virt_einit); +EXPORT_SYMBOL_FOR_KVM(sgx_virt_einit); diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 6073a16628f9..f55ea3cdbf88 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -75,15 +75,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) return phys_id == (u64)cpuid_to_apicid[cpu]; } -#ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { if (!(apicid & (__max_threads_per_core - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } -#else -static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } -#endif /* * Convert the APIC ID to a domain level ID by masking out the low bits diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index b5a5e1411469..71625795d711 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -16,6 +16,9 @@ EXPORT_SYMBOL_GPL(x86_topo_system); unsigned int __amd_nodes_per_pkg __ro_after_init; EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus) { diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 49782724a943..209b5a22d880 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -19,7 +19,17 @@ #undef pr_fmt #define pr_fmt(fmt) "tsx: " fmt -enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; +enum tsx_ctrl_states { + TSX_CTRL_AUTO, + TSX_CTRL_ENABLE, + TSX_CTRL_DISABLE, + TSX_CTRL_RTM_ALWAYS_ABORT, + TSX_CTRL_NOT_SUPPORTED, +}; + +static enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = + IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO) ? TSX_CTRL_AUTO : + IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF) ? TSX_CTRL_DISABLE : TSX_CTRL_ENABLE; static void tsx_disable(void) { @@ -156,11 +166,28 @@ static void tsx_dev_mode_disable(void) } } -void __init tsx_init(void) +static int __init tsx_parse_cmdline(char *str) { - char arg[5] = {}; - int ret; + if (!str) + return -EINVAL; + + if (!strcmp(str, "on")) { + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(str, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(str, "auto")) { + tsx_ctrl_state = TSX_CTRL_AUTO; + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("invalid option, defaulting to off\n"); + } + + return 0; +} +early_param("tsx", tsx_parse_cmdline); +void __init tsx_init(void) +{ tsx_dev_mode_disable(); /* @@ -194,27 +221,8 @@ void __init tsx_init(void) return; } - ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); - if (ret >= 0) { - if (!strcmp(arg, "on")) { - tsx_ctrl_state = TSX_CTRL_ENABLE; - } else if (!strcmp(arg, "off")) { - tsx_ctrl_state = TSX_CTRL_DISABLE; - } else if (!strcmp(arg, "auto")) { - tsx_ctrl_state = x86_get_tsx_auto_mode(); - } else { - tsx_ctrl_state = TSX_CTRL_DISABLE; - pr_err("invalid option, defaulting to off\n"); - } - } else { - /* tsx= not provided */ - if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) - tsx_ctrl_state = x86_get_tsx_auto_mode(); - else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) - tsx_ctrl_state = TSX_CTRL_DISABLE; - else - tsx_ctrl_state = TSX_CTRL_ENABLE; - } + if (tsx_ctrl_state == TSX_CTRL_AUTO) + tsx_ctrl_state = x86_get_tsx_auto_mode(); if (tsx_ctrl_state == TSX_CTRL_DISABLE) { tsx_disable(); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 71ee20102a8a..b10684dedc58 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -181,8 +181,8 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, * in false positive reports. Disable instrumentation to avoid those. */ __no_kmsan_checks -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, const char *log_lvl) +static void __show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, const char *log_lvl) { struct unwind_state state; struct stack_info stack_info = {0}; @@ -303,6 +303,25 @@ next: } } +static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, const char *log_lvl) +{ + /* + * Disable KASAN to avoid false positives during walking another + * task's stacks, as values on these stacks may change concurrently + * with task execution. + */ + bool disable_kasan = task && task != current; + + if (disable_kasan) + kasan_disable_current(); + + __show_trace_log_lvl(task, regs, stack, log_lvl); + + if (disable_kasan) + kasan_enable_current(); +} + void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c3acbd26408b..b15b97d3cb52 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -16,6 +16,7 @@ #include <linux/firmware-map.h> #include <linux/sort.h> #include <linux/memory_hotplug.h> +#include <linux/kvm_types.h> #include <asm/e820/api.h> #include <asm/setup.h> @@ -95,7 +96,7 @@ bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) { return _e820__mapped_any(e820_table_firmware, start, end, type); } -EXPORT_SYMBOL_GPL(e820__mapped_raw_any); +EXPORT_SYMBOL_FOR_KVM(e820__mapped_raw_any); bool e820__mapped_any(u64 start, u64 end, enum e820_type type) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e88eacb1b5bb..da233f20ae6f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -18,6 +18,7 @@ #include <uapi/asm/kvm.h> #include <linux/hardirq.h> +#include <linux/kvm_types.h> #include <linux/pkeys.h> #include <linux/vmalloc.h> @@ -276,7 +277,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) return true; } -EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_alloc_guest_fpstate); void fpu_free_guest_fpstate(struct fpu_guest *gfpu) { @@ -291,7 +292,7 @@ void fpu_free_guest_fpstate(struct fpu_guest *gfpu) gfpu->fpstate = NULL; vfree(fpstate); } -EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_free_guest_fpstate); /* * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable @@ -313,7 +314,7 @@ int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures) return __xfd_enable_feature(xfeatures, guest_fpu); } -EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features); +EXPORT_SYMBOL_FOR_KVM(fpu_enable_guest_xfd_features); #ifdef CONFIG_X86_64 void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) @@ -324,7 +325,7 @@ void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) xfd_update_state(guest_fpu->fpstate); fpregs_unlock(); } -EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); +EXPORT_SYMBOL_FOR_KVM(fpu_update_guest_xfd); /** * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state @@ -348,7 +349,7 @@ void fpu_sync_guest_vmexit_xfd_state(void) __this_cpu_write(xfd_state, fpstate->xfd); } } -EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state); +EXPORT_SYMBOL_FOR_KVM(fpu_sync_guest_vmexit_xfd_state); #endif /* CONFIG_X86_64 */ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) @@ -390,7 +391,7 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) fpregs_unlock(); return 0; } -EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u64 xfeatures, u32 pkru) @@ -409,7 +410,7 @@ void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; } } -EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi); +EXPORT_SYMBOL_FOR_KVM(fpu_copy_guest_fpstate_to_uabi); int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru) @@ -439,7 +440,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } -EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_copy_uabi_to_guest_fpstate); #endif /* CONFIG_KVM */ void kernel_fpu_begin_mask(unsigned int kfpu_mask) @@ -857,7 +858,7 @@ void switch_fpu_return(void) fpregs_restore_userregs(); } -EXPORT_SYMBOL_GPL(switch_fpu_return); +EXPORT_SYMBOL_FOR_KVM(switch_fpu_return); void fpregs_lock_and_load(void) { @@ -892,7 +893,7 @@ void fpregs_assert_state_consistent(void) WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); } -EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); +EXPORT_SYMBOL_FOR_KVM(fpregs_assert_state_consistent); #endif void fpregs_mark_activate(void) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 28e4fd65c9da..48113c5193aa 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -8,6 +8,7 @@ #include <linux/compat.h> #include <linux/cpu.h> #include <linux/mman.h> +#include <linux/kvm_types.h> #include <linux/nospec.h> #include <linux/pkeys.h> #include <linux/seq_file.h> @@ -1058,7 +1059,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } -EXPORT_SYMBOL_GPL(get_xsave_addr); +EXPORT_SYMBOL_FOR_KVM(get_xsave_addr); /* * Given an xstate feature nr, calculate where in the xsave buffer the state is. @@ -1482,7 +1483,7 @@ void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeatu if (addr) memset(addr, 0, xstate_sizes[xfeature]); } -EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); +EXPORT_SYMBOL_FOR_KVM(fpstate_clear_xstate_component); #endif #ifdef CONFIG_X86_64 @@ -1818,7 +1819,7 @@ u64 xstate_get_guest_group_perm(void) { return xstate_get_group_perm(true); } -EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); +EXPORT_SYMBOL_FOR_KVM(xstate_get_guest_group_perm); /** * fpu_xstate_prctl - xstate permission operations diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 367da3638167..823dbdd0eb41 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -354,12 +354,17 @@ SYM_CODE_START(return_to_handler) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR + /* Restore return_to_handler value that got eaten by previous ret instruction. */ + subq $8, %rsp + UNWIND_HINT_FUNC + /* Save ftrace_regs for function exit context */ subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rdx, RDX(%rsp) movq %rbp, RBP(%rsp) + movq %rsp, RSP(%rsp) movq %rsp, %rdi call ftrace_return_to_handler @@ -368,7 +373,8 @@ SYM_CODE_START(return_to_handler) movq RDX(%rsp), %rdx movq RAX(%rsp), %rax - addq $(FRAME_SIZE), %rsp + addq $(FRAME_SIZE) + 8, %rsp + /* * Jump back to the old return address. This cannot be JMP_NOSPEC rdi * since IBT would demand that contain ENDBR, which simply isn't so for diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index b01644c949b2..f846c15f21ca 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include <linux/percpu.h> #include <linux/kdebug.h> #include <linux/kernel.h> +#include <linux/kvm_types.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/smp.h> @@ -489,7 +490,7 @@ void hw_breakpoint_restore(void) set_debugreg(DR6_RESERVED, 6); set_debugreg(__this_cpu_read(cpu_dr7), 7); } -EXPORT_SYMBOL_GPL(hw_breakpoint_restore); +EXPORT_SYMBOL_FOR_KVM(hw_breakpoint_restore); /* * Handle debug exception notifications. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 10721a125226..86f4e574de02 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include <linux/delay.h> #include <linux/export.h> #include <linux/irq.h> +#include <linux/kvm_types.h> #include <asm/irq_stack.h> #include <asm/apic.h> @@ -361,7 +362,7 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) synchronize_rcu(); } } -EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); +EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); /* * Handler for POSTED_INTERRUPT_VECTOR. diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3863d7709386..c1fac3a9fecc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -141,7 +141,6 @@ bool can_boost(struct insn *insn, void *addr) { kprobe_opcode_t opcode; insn_byte_t prefix; - int i; if (search_exception_tables((unsigned long)addr)) return false; /* Page fault may occur on this address. */ @@ -154,7 +153,7 @@ bool can_boost(struct insn *insn, void *addr) if (insn->opcode.nbytes != 1) return false; - for_each_insn_prefix(insn, i, prefix) { + for_each_insn_prefix(insn, prefix) { insn_attr_t attr; attr = inat_get_opcode_attribute(prefix); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b67d7c59dca0..204765004c72 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,6 +29,7 @@ #include <linux/syscore_ops.h> #include <linux/cc_platform.h> #include <linux/efi.h> +#include <linux/kvm_types.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -162,7 +163,7 @@ void kvm_async_pf_task_wait_schedule(u32 token) } finish_swait(&n.wq, &wait); } -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule); +EXPORT_SYMBOL_FOR_KVM(kvm_async_pf_task_wait_schedule); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { @@ -253,7 +254,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) return flags; } -EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); +EXPORT_SYMBOL_FOR_KVM(kvm_read_and_reset_apf_flags); noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) { diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index e17c16c54a37..4469c784eaa0 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -98,7 +98,7 @@ static int filter_write(u32 reg) if (!__ratelimit(&fw_rs)) return 0; - pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d).\n", + pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d), tainting CPU_OUT_OF_SPEC.\n", reg, current->comm, current->pid); pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n"); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index be93ec7255bf..3d239ed12744 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/atomic.h> #include <linux/sched/clock.h> +#include <linux/kvm_types.h> #include <asm/cpu_entry_area.h> #include <asm/traps.h> @@ -613,9 +614,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx) { exc_nmi(regs); } -#if IS_MODULE(CONFIG_KVM_INTEL) -EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); -#endif +EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx); #endif #ifdef CONFIG_NMI_CHECK_CPU diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 52a5c03c353c..432c0a004c60 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -30,6 +30,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/ptrace.h> #include <linux/notifier.h> #include <linux/kprobes.h> @@ -303,9 +304,7 @@ void current_save_fsgs(void) save_fsgs(current); local_irq_restore(flags); } -#if IS_ENABLED(CONFIG_KVM) -EXPORT_SYMBOL_GPL(current_save_fsgs); -#endif +EXPORT_SYMBOL_FOR_KVM(current_save_fsgs); static __always_inline void loadseg(enum which_selector which, unsigned short sel) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 964f6b0a3d68..6032fa9ec753 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -13,6 +13,7 @@ #include <linux/objtool.h> #include <linux/pgtable.h> #include <linux/kexec.h> +#include <linux/kvm_types.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -541,7 +542,7 @@ void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) rcu_assign_pointer(cpu_emergency_virt_callback, callback); } -EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); +EXPORT_SYMBOL_FOR_KVM(cpu_emergency_register_virt_callback); void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) { @@ -551,7 +552,7 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) rcu_assign_pointer(cpu_emergency_virt_callback, NULL); synchronize_rcu(); } -EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); +EXPORT_SYMBOL_FOR_KVM(cpu_emergency_unregister_virt_callback); /* * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 11e20bb13aca..4ffba68dc57b 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -95,9 +95,12 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* Leave CR4 in %r13 to enable the right paging mode later. */ movq %cr4, %r13 - /* Disable global pages immediately to ensure this mapping is RWX */ + /* + * Disable global pages immediately to ensure this mapping is RWX. + * Disable LASS before jumping to the identity mapped page. + */ movq %r13, %r12 - andq $~(X86_CR4_PGE), %r12 + andq $~(X86_CR4_PGE | X86_CR4_LASS), %r12 movq %r12, %cr4 /* Save %rsp and CRs. */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a4ba735842a8..5cd6950ab672 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); -/* CPUs which are the primary SMT threads */ -struct cpumask __cpu_primary_thread_mask __read_mostly; - /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask; @@ -515,6 +512,76 @@ static void __init build_sched_topology(void) set_sched_topology(topology); } +#ifdef CONFIG_NUMA +static int sched_avg_remote_distance; +static int avg_remote_numa_distance(void) +{ + int i, j; + int distance, nr_remote, total_distance; + + if (sched_avg_remote_distance > 0) + return sched_avg_remote_distance; + + nr_remote = 0; + total_distance = 0; + for_each_node_state(i, N_CPU) { + for_each_node_state(j, N_CPU) { + distance = node_distance(i, j); + + if (distance >= REMOTE_DISTANCE) { + nr_remote++; + total_distance += distance; + } + } + } + if (nr_remote) + sched_avg_remote_distance = total_distance / nr_remote; + else + sched_avg_remote_distance = REMOTE_DISTANCE; + + return sched_avg_remote_distance; +} + +int arch_sched_node_distance(int from, int to) +{ + int d = node_distance(from, to); + + switch (boot_cpu_data.x86_vfm) { + case INTEL_GRANITERAPIDS_X: + case INTEL_ATOM_DARKMONT_X: + + if (!x86_has_numa_in_package || topology_max_packages() == 1 || + d < REMOTE_DISTANCE) + return d; + + /* + * With SNC enabled, there could be too many levels of remote + * NUMA node distances, creating NUMA domain levels + * including local nodes and partial remote nodes. + * + * Trim finer distance tuning for NUMA nodes in remote package + * for the purpose of building sched domains. Group NUMA nodes + * in the remote package in the same sched group. + * Simplify NUMA domains and avoid extra NUMA levels including + * different remote NUMA nodes and local nodes. + * + * GNR and CWF don't expect systems with more than 2 packages + * and more than 2 hops between packages. Single average remote + * distance won't be appropriate if there are more than 2 + * packages as average distance to different remote packages + * could be different. + */ + WARN_ONCE(topology_max_packages() > 2, + "sched: Expect only up to 2 packages for GNR or CWF, " + "but saw %d packages when building sched domains.", + topology_max_packages()); + + d = avg_remote_numa_distance(); + } + return d; +} +#endif /* CONFIG_NUMA */ + void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb324cc1fd99..bcf1dedc1d00 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -732,13 +732,23 @@ DEFINE_IDTENTRY(exc_bounds) enum kernel_gp_hint { GP_NO_HINT, GP_NON_CANONICAL, - GP_CANONICAL + GP_CANONICAL, + GP_LASS_VIOLATION, + GP_NULL_POINTER, +}; + +static const char * const kernel_gp_hint_help[] = { + [GP_NON_CANONICAL] = "probably for non-canonical address", + [GP_CANONICAL] = "maybe for address", + [GP_LASS_VIOLATION] = "probably LASS violation for address", + [GP_NULL_POINTER] = "kernel NULL pointer dereference", }; /* * When an uncaught #GP occurs, try to determine the memory address accessed by * the instruction and return that address to the caller. Also, try to figure - * out whether any part of the access to that address was non-canonical. + * out whether any part of the access to that address was non-canonical or + * across privilege levels. */ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, unsigned long *addr) @@ -760,14 +770,28 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, return GP_NO_HINT; #ifdef CONFIG_X86_64 + /* Operand is in the kernel half */ + if (*addr >= ~__VIRTUAL_MASK) + return GP_CANONICAL; + + /* The last byte of the operand is not in the user canonical half */ + if (*addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) + return GP_NON_CANONICAL; + /* - * Check that: - * - the operand is not in the kernel half - * - the last byte of the operand is not in the user canonical half + * A NULL pointer dereference usually causes a #PF. However, it + * can result in a #GP when LASS is active. Provide the same + * hint in the rare case that the condition is hit without LASS. */ - if (*addr < ~__VIRTUAL_MASK && - *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) - return GP_NON_CANONICAL; + if (*addr < PAGE_SIZE) + return GP_NULL_POINTER; + + /* + * Assume that LASS caused the exception, because the address is + * canonical and in the user half. + */ + if (cpu_feature_enabled(X86_FEATURE_LASS)) + return GP_LASS_VIOLATION; #endif return GP_CANONICAL; @@ -930,9 +954,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) if (hint != GP_NO_HINT) snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", - (hint == GP_NON_CANONICAL) ? "probably for non-canonical address" - : "maybe for address", - gp_addr); + kernel_gp_hint_help[hint], gp_addr); /* * KASAN is interested only in the non-canonical case, clear it diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 87e749106dda..7d3e13e14eab 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -11,6 +11,7 @@ #include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/clocksource.h> +#include <linux/kvm_types.h> #include <linux/percpu.h> #include <linux/timex.h> #include <linux/static_key.h> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 845aeaf36b8d..7be8e361ca55 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -17,6 +17,7 @@ #include <linux/kdebug.h> #include <asm/processor.h> #include <asm/insn.h> +#include <asm/insn-eval.h> #include <asm/mmu_context.h> #include <asm/nops.h> @@ -258,9 +259,8 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { insn_byte_t p; - int i; - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { insn_attr_t attr; attr = inat_get_opcode_attribute(p); @@ -1158,35 +1158,12 @@ unlock: mmap_write_unlock(mm); } -static bool insn_is_nop(struct insn *insn) -{ - return insn->opcode.nbytes == 1 && insn->opcode.bytes[0] == 0x90; -} - -static bool insn_is_nopl(struct insn *insn) -{ - if (insn->opcode.nbytes != 2) - return false; - - if (insn->opcode.bytes[0] != 0x0f || insn->opcode.bytes[1] != 0x1f) - return false; - - if (!insn->modrm.nbytes) - return false; - - if (X86_MODRM_REG(insn->modrm.bytes[0]) != 0) - return false; - - /* 0f 1f /0 - NOPL */ - return true; -} - static bool can_optimize(struct insn *insn, unsigned long vaddr) { if (!insn->x86_64 || insn->length != 5) return false; - if (!insn_is_nop(insn) && !insn_is_nopl(insn)) + if (!insn_is_nop(insn)) return false; /* We can't do cross page atomic writes yet. */ @@ -1426,19 +1403,14 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); insn_byte_t p; - int i; - /* x86_nops[insn->length]; same as jmp with .offs = 0 */ - if (insn->length <= ASM_NOP_MAX && - !memcmp(insn->kaddr, x86_nops[insn->length], insn->length)) + if (insn_is_nop(insn)) goto setup; switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ break; - case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ - goto setup; case 0xe8: /* call relative */ branch_clear_offset(auprobe, insn); @@ -1463,7 +1435,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { if (p == 0x66) return -ENOTSUPP; } @@ -1819,3 +1791,35 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, else return regs->sp <= ret->stack; } + +/* + * Heuristic-based check if uprobe is installed at the function entry. + * + * Under assumption of user code being compiled with frame pointers, + * `push %rbp/%ebp` is a good indicator that we indeed are. + * + * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. + * If we get this wrong, captured stack trace might have one extra bogus + * entry, but the rest of stack trace will still be meaningful. + */ +bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + struct arch_uprobe *auprobe; + + if (!current->utask) + return false; + + auprobe = current->utask->auprobe; + if (!auprobe) + return false; + + /* push %rbp/%ebp */ + if (auprobe->insn[0] == 0x55) + return true; + + /* endbr64 (64-bit only) */ + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) + return true; + + return false; +} diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 743ab25ba787..81b4a7acf72e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -78,7 +78,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, - [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f286b5706d7c..fef00546c885 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -216,7 +216,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) * This function is called from IOMMU driver to notify * SVM to schedule in a particular vCPU of a particular VM. */ -int avic_ga_log_notifier(u32 ga_tag) +static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; struct kvm_svm *kvm_svm; @@ -788,7 +788,7 @@ int avic_init_vcpu(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; INIT_LIST_HEAD(&svm->ir_list); - spin_lock_init(&svm->ir_list_lock); + raw_spin_lock_init(&svm->ir_list_lock); if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm)) return 0; @@ -816,9 +816,9 @@ static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd) if (!vcpu) return; - spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); list_del(&irqfd->vcpu_list); - spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); } int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, @@ -855,7 +855,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, * list of IRQs being posted to the vCPU, to ensure the IRTE * isn't programmed with stale pCPU/IsRunning information. */ - guard(spinlock_irqsave)(&svm->ir_list_lock); + guard(raw_spinlock_irqsave)(&svm->ir_list_lock); /* * Update the target pCPU for IOMMU doorbells if the vCPU is @@ -972,7 +972,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, * up-to-date entry information, or that this task will wait until * svm_ir_list_add() completes to set the new target pCPU. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); @@ -997,7 +997,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action); - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1035,7 +1035,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) * or that this task will wait until svm_ir_list_add() completes to * mark the vCPU as not running. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); avic_update_iommu_vcpu_affinity(vcpu, -1, action); @@ -1059,7 +1059,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) svm->avic_physical_id_entry = entry; - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_put(struct kvm_vcpu *vcpu) @@ -1243,3 +1243,9 @@ bool __init avic_hardware_setup(void) return true; } + +void avic_hardware_unsetup(void) +{ + if (avic) + amd_iommu_register_ga_log_notifier(NULL); +} diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a6443feab252..da6e80b3ac35 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -677,11 +677,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 */ svm_copy_lbrs(vmcb02, vmcb12); vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; - svm_update_lbrv(&svm->vcpu); - - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + } else { svm_copy_lbrs(vmcb02, vmcb01); } + svm_update_lbrv(&svm->vcpu); } static inline bool is_evtinj_soft(u32 evtinj) @@ -833,11 +832,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_next_rip = vmcb12_rip; } - vmcb02->control.virt_ext = vmcb01->control.virt_ext & - LBR_CTL_ENABLE_MASK; - if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV)) - vmcb02->control.virt_ext |= - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); + /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; @@ -1189,13 +1184,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) svm_copy_lbrs(vmcb12, vmcb02); - svm_update_lbrv(vcpu); - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + else svm_copy_lbrs(vmcb01, vmcb02); - svm_update_lbrv(vcpu); - } + + svm_update_lbrv(vcpu); if (vnmi) { if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 153c12dbf3eb..9d29b2e7e855 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -705,7 +705,11 @@ void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask) static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) { - bool intercept = !(to_svm(vcpu)->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + struct vcpu_svm *svm = to_svm(vcpu); + bool intercept = !(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + + if (intercept == svm->lbr_msrs_intercepted) + return; svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW, intercept); svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW, intercept); @@ -714,6 +718,8 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) if (sev_es_guest(vcpu->kvm)) svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept); + + svm->lbr_msrs_intercepted = intercept; } void svm_vcpu_free_msrpm(void *msrpm) @@ -806,60 +812,43 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) vmcb_mark_dirty(to_vmcb, VMCB_LBR); } -void svm_enable_lbrv(struct kvm_vcpu *vcpu) +static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - svm_recalc_lbr_msr_intercepts(vcpu); - - /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); + to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; } -static void svm_disable_lbrv(struct kvm_vcpu *vcpu) +void svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); - svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; + __svm_enable_lbrv(vcpu); svm_recalc_lbr_msr_intercepts(vcpu); - - /* - * Move the LBR msrs back to the vmcb01 to avoid copying them - * on nested guest entries. - */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); } -static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) +static void __svm_disable_lbrv(struct kvm_vcpu *vcpu) { - /* - * If LBR virtualization is disabled, the LBR MSRs are always kept in - * vmcb01. If LBR virtualization is enabled and L1 is running VMs of - * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. - */ - return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : - svm->vmcb01.ptr; + KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); + to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; } void svm_update_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || + bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) || (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); - if (enable_lbrv == current_enable_lbrv) - return; + if (enable_lbrv && !current_enable_lbrv) + __svm_enable_lbrv(vcpu); + else if (!enable_lbrv && current_enable_lbrv) + __svm_disable_lbrv(vcpu); - if (enable_lbrv) - svm_enable_lbrv(vcpu); - else - svm_disable_lbrv(vcpu); + /* + * During nested transitions, it is possible that the current VMCB has + * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). + * In this case, even though LBR_CTL does not need an update, intercepts + * do, so always recalculate the intercepts here. + */ + svm_recalc_lbr_msr_intercepts(vcpu); } void disable_nmi_singlestep(struct vcpu_svm *svm) @@ -921,6 +910,8 @@ static void svm_hardware_unsetup(void) { int cpu; + avic_hardware_unsetup(); + sev_hardware_unsetup(); for_each_possible_cpu(cpu) @@ -1236,6 +1227,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) } svm->x2avic_msrs_intercepted = true; + svm->lbr_msrs_intercepted = true; svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); @@ -2722,19 +2714,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; + msr_info->data = svm->vmcb->save.dbgctl; break; case MSR_IA32_LASTBRANCHFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; + msr_info->data = svm->vmcb->save.br_from; break; case MSR_IA32_LASTBRANCHTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; + msr_info->data = svm->vmcb->save.br_to; break; case MSR_IA32_LASTINTFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; + msr_info->data = svm->vmcb->save.last_excp_from; break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; + msr_info->data = svm->vmcb->save.last_excp_to; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -3002,7 +2994,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; - svm_get_lbr_vmcb(svm)->save.dbgctl = data; + if (svm->vmcb->save.dbgctl == data) + break; + + svm->vmcb->save.dbgctl = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); svm_update_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: @@ -5386,12 +5382,6 @@ static __init int svm_hardware_setup(void) svm_hv_hardware_setup(); - for_each_possible_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err; - } - enable_apicv = avic_hardware_setup(); if (!enable_apicv) { enable_ipiv = false; @@ -5435,6 +5425,13 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED; + + for_each_possible_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } + return 0; err: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e4b04f435b3d..dd78e6402345 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -329,13 +329,14 @@ struct vcpu_svm { * back into remapped mode). */ struct list_head ir_list; - spinlock_t ir_list_lock; + raw_spinlock_t ir_list_lock; struct vcpu_sev_es_state sev_es; bool guest_state_loaded; bool x2avic_msrs_intercepted; + bool lbr_msrs_intercepted; /* Guest GIF value, used when vGIF is not enabled */ bool guest_gif; @@ -805,7 +806,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; ) bool __init avic_hardware_setup(void); -int avic_ga_log_notifier(u32 ga_tag); +void avic_hardware_unsetup(void); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index bc5ece76533a..412d0829d7a2 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK) ? PFERR_PRESENT_MASK : 0; - if (error_code & EPT_VIOLATION_GVA_IS_VALID) + if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID) error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 76271962cb70..bcea087b642f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_NOTIFY: /* Notify VM exit is not exposed to L1 */ return false; + case EXIT_REASON_SEAMCALL: + case EXIT_REASON_TDCALL: + /* + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't + * virtualized by KVM for L1 hypervisors, i.e. L1 should + * never want or expect such an exit. + */ + return false; default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f87c216d976d..91b6f2f3edc2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6032,6 +6032,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) return 1; } +static int handle_tdx_instruction(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + #ifndef CONFIG_X86_SGX_KVM static int handle_encls(struct kvm_vcpu *vcpu) { @@ -6157,6 +6163,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_ENCLS] = handle_encls, [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, [EXIT_REASON_NOTIFY] = handle_notify, + [EXIT_REASON_SEAMCALL] = handle_tdx_instruction, + [EXIT_REASON_TDCALL] = handle_tdx_instruction, [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b4b5d2d09634..c9c2aa6f4705 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3874,15 +3874,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) /* * Returns true if the MSR in question is managed via XSTATE, i.e. is context - * switched with the rest of guest FPU state. Note! S_CET is _not_ context - * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. - * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, - * the value saved/restored via XSTATE is always the host's value. That detail - * is _extremely_ important, as the guest's S_CET must _never_ be resident in - * hardware while executing in the host. Loading guest values for U_CET and - * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to - * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower - * privilege levels, i.e. are effectively only consumed by userspace as well. + * switched with the rest of guest FPU state. + * + * Note, S_CET is _not_ saved/restored via XSAVES/XRSTORS. */ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) { @@ -3905,6 +3899,11 @@ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) * MSR that is managed via XSTATE. Note, the caller is responsible for doing * the initial FPU load, this helper only ensures that guest state is resident * in hardware (the kernel can load its FPU state in IRQ context). + * + * Note, loading guest values for U_CET and PL[0-3]_SSP while executing in the + * kernel is safe, as U_CET is specific to userspace, and PL[0-3]_SSP are only + * consumed when transitioning to lower privilege levels, i.e. are effectively + * only consumed by userspace as well. */ static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info, @@ -11807,6 +11806,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + /* Exclude PKRU, it's restored separately immediately after VM-Exit. */ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); trace_kvm_fpu(1); @@ -11815,6 +11817,9 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); @@ -12137,9 +12142,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int r; vcpu_load(vcpu); - if (kvm_mpx_supported()) - kvm_load_guest_fpu(vcpu); - kvm_vcpu_srcu_read_lock(vcpu); r = kvm_apic_accept_events(vcpu); @@ -12156,9 +12158,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, out: kvm_vcpu_srcu_read_unlock(vcpu); - - if (kvm_mpx_supported()) - kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return r; } @@ -12788,6 +12787,7 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) { struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; u64 xfeatures_mask; + bool fpu_in_use; int i; /* @@ -12811,13 +12811,23 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); /* - * All paths that lead to INIT are required to load the guest's FPU - * state (because most paths are buried in KVM_RUN). - */ - kvm_put_guest_fpu(vcpu); + * Unload guest FPU state (if necessary) before zeroing XSTATE fields + * as the kernel can only modify the state when its resident in memory, + * i.e. when it's not loaded into hardware. + * + * WARN if the vCPU's desire to run, i.e. whether or not its in KVM_RUN, + * doesn't match the loaded/in-use state of the FPU, as KVM_RUN is the + * only path that can trigger INIT emulation _and_ loads FPU state, and + * KVM_RUN should _always_ load FPU state. + */ + WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use); + fpu_in_use = fpstate->in_use; + if (fpu_in_use) + kvm_put_guest_fpu(vcpu); for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) fpstate_clear_xstate_component(fpstate, i); - kvm_load_guest_fpu(vcpu); + if (fpu_in_use) + kvm_load_guest_fpu(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index c5c60d07308c..824664c0ecbd 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -2,6 +2,7 @@ #include <asm/paravirt.h> #include <linux/smp.h> #include <linux/export.h> +#include <linux/kvm_types.h> static void __wbinvd(void *dummy) { @@ -12,7 +13,7 @@ void wbinvd_on_cpu(int cpu) { smp_call_function_single(cpu, __wbinvd, NULL, 1); } -EXPORT_SYMBOL(wbinvd_on_cpu); +EXPORT_SYMBOL_FOR_KVM(wbinvd_on_cpu); void wbinvd_on_all_cpus(void) { @@ -24,7 +25,7 @@ void wbinvd_on_cpus_mask(struct cpumask *cpus) { on_each_cpu_mask(cpus, __wbinvd, NULL, 1); } -EXPORT_SYMBOL_GPL(wbinvd_on_cpus_mask); +EXPORT_SYMBOL_FOR_KVM(wbinvd_on_cpus_mask); static void __wbnoinvd(void *dummy) { @@ -35,10 +36,10 @@ void wbnoinvd_on_all_cpus(void) { on_each_cpu(__wbnoinvd, NULL, 1); } -EXPORT_SYMBOL_GPL(wbnoinvd_on_all_cpus); +EXPORT_SYMBOL_FOR_KVM(wbnoinvd_on_all_cpus); void wbnoinvd_on_cpus_mask(struct cpumask *cpus) { on_each_cpu_mask(cpus, __wbnoinvd, NULL, 1); } -EXPORT_SYMBOL_GPL(wbnoinvd_on_cpus_mask); +EXPORT_SYMBOL_FOR_KVM(wbnoinvd_on_cpus_mask); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 4e385cbfd444..e03eeec55cfe 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -63,11 +63,10 @@ static bool is_string_insn(struct insn *insn) bool insn_has_rep_prefix(struct insn *insn) { insn_byte_t p; - int i; insn_get_prefixes(insn); - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { if (p == 0xf2 || p == 0xf3) return true; } @@ -92,13 +91,13 @@ bool insn_has_rep_prefix(struct insn *insn) static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; - int num_overrides = 0, i; + int num_overrides = 0; insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { insn_attr_t attr; attr = inat_get_opcode_attribute(p); @@ -1676,3 +1675,147 @@ enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes) return type; } + +/* + * Recognise typical NOP patterns for both 32bit and 64bit. + * + * Notably: + * - NOP, but not: REP NOP aka PAUSE + * - NOPL + * - MOV %reg, %reg + * - LEA 0(%reg),%reg + * - JMP +0 + * + * Must not have false-positives; instructions identified as a NOP might be + * emulated as a NOP (uprobe) or Run Length Encoded in a larger NOP + * (alternatives). + * + * False-negatives are fine; need not be exhaustive. + */ +bool insn_is_nop(struct insn *insn) +{ + u8 b3 = 0, x3 = 0, r3 = 0; + u8 b4 = 0, x4 = 0, r4 = 0, m = 0; + u8 modrm, modrm_mod, modrm_reg, modrm_rm; + u8 sib = 0, sib_scale, sib_index, sib_base; + u8 nrex, rex; + u8 p, rep = 0; + + if ((nrex = insn->rex_prefix.nbytes)) { + rex = insn->rex_prefix.bytes[nrex-1]; + + r3 = !!X86_REX_R(rex); + x3 = !!X86_REX_X(rex); + b3 = !!X86_REX_B(rex); + if (nrex > 1) { + r4 = !!X86_REX2_R(rex); + x4 = !!X86_REX2_X(rex); + b4 = !!X86_REX2_B(rex); + m = !!X86_REX2_M(rex); + } + + } else if (insn->vex_prefix.nbytes) { + /* + * Ignore VEX encoded NOPs + */ + return false; + } + + if (insn->modrm.nbytes) { + modrm = insn->modrm.bytes[0]; + modrm_mod = X86_MODRM_MOD(modrm); + modrm_reg = X86_MODRM_REG(modrm) + 8*r3 + 16*r4; + modrm_rm = X86_MODRM_RM(modrm) + 8*b3 + 16*b4; + modrm = 1; + } + + if (insn->sib.nbytes) { + sib = insn->sib.bytes[0]; + sib_scale = X86_SIB_SCALE(sib); + sib_index = X86_SIB_INDEX(sib) + 8*x3 + 16*x4; + sib_base = X86_SIB_BASE(sib) + 8*b3 + 16*b4; + sib = 1; + + modrm_rm = sib_base; + } + + for_each_insn_prefix(insn, p) { + if (p == 0xf3) /* REPE */ + rep = 1; + } + + /* + * Opcode map munging: + * + * REX2: 0 - single byte opcode + * 1 - 0f second byte opcode + */ + switch (m) { + case 0: break; + case 1: insn->opcode.value <<= 8; + insn->opcode.value |= 0x0f; + break; + default: + return false; + } + + switch (insn->opcode.bytes[0]) { + case 0x0f: /* 2nd byte */ + break; + + case 0x89: /* MOV */ + if (modrm_mod != 3) /* register-direct */ + return false; + + /* native size */ + if (insn->opnd_bytes != 4 * (1 + insn->x86_64)) + return false; + + return modrm_reg == modrm_rm; /* MOV %reg, %reg */ + + case 0x8d: /* LEA */ + if (modrm_mod == 0 || modrm_mod == 3) /* register-indirect with disp */ + return false; + + /* native size */ + if (insn->opnd_bytes != 4 * (1 + insn->x86_64)) + return false; + + if (insn->displacement.value != 0) + return false; + + if (sib && (sib_scale != 0 || sib_index != 4)) /* (%reg, %eiz, 1) */ + return false; + + for_each_insn_prefix(insn, p) { + if (p != 0x3e) /* DS */ + return false; + } + + return modrm_reg == modrm_rm; /* LEA 0(%reg), %reg */ + + case 0x90: /* NOP */ + if (b3 || b4) /* XCHG %r{8,16,24},%rax */ + return false; + + if (rep) /* REP NOP := PAUSE */ + return false; + + return true; + + case 0xe9: /* JMP.d32 */ + case 0xeb: /* JMP.d8 */ + return insn->immediate.value == 0; /* JMP +0 */ + + default: + return false; + } + + switch (insn->opcode.bytes[1]) { + case 0x1f: + return modrm_reg == 0; /* 0f 1f /0 -- NOPL */ + + default: + return false; + } +} diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index b5893928d55c..8c7cd115b484 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -22,7 +22,7 @@ #include <asm/setup.h> #define debug_putstr(v) early_printk("%s", v) -#define has_cpuflag(f) boot_cpu_has(f) +#define has_cpuflag(f) cpu_feature_enabled(f) #define get_boot_seed() kaslr_offset() #endif diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 4ef7c6dcbea6..dfdd1da89f36 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <asm/msr.h> @@ -103,7 +104,7 @@ int msr_set_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, true); } -EXPORT_SYMBOL_GPL(msr_set_bit); +EXPORT_SYMBOL_FOR_KVM(msr_set_bit); /** * msr_clear_bit - Clear @bit in a MSR @msr. @@ -119,7 +120,7 @@ int msr_clear_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, false); } -EXPORT_SYMBOL_GPL(msr_clear_bit); +EXPORT_SYMBOL_FOR_KVM(msr_clear_bit); #ifdef CONFIG_TRACEPOINTS void do_trace_write_msr(u32 msr, u64 val, int failed) diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h index fc1c887ca073..654bfe4e29a0 100644 --- a/arch/x86/math-emu/poly.h +++ b/arch/x86/math-emu/poly.h @@ -39,7 +39,7 @@ asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); asmlinkage void shr_Xsig(Xsig *, const int n); asmlinkage int round_Xsig(Xsig *); asmlinkage int norm_Xsig(Xsig *); -asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); +asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, Xsig *dest); /* Macro to extract the most significant 32 bits from a long long */ #define LL_MSW(x) (((unsigned long *)&x)[1]) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a4700ef6eb64..2afa7a23340e 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -486,7 +486,6 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, #endif ptdump_walk_pgd_level_core(m, mm, pgd, false, false); } -EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); void ptdump_walk_user_pgd_level_checkwx(void) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0e4270e20fad..1044aafd5d94 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -504,9 +504,6 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, continue; } - if (0) - pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, - pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); pages++; set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index b68200a0e0c6..8a3d9722f602 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -42,6 +42,7 @@ #include <linux/highmem.h> #include <linux/fs.h> #include <linux/rbtree.h> +#include <linux/kvm_types.h> #include <asm/cpu_device_id.h> #include <asm/cacheflush.h> @@ -697,7 +698,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) cm == _PAGE_CACHE_MODE_UC_MINUS || cm == _PAGE_CACHE_MODE_WC; } -EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); +EXPORT_SYMBOL_FOR_KVM(pat_pfn_immune_to_uc_mtrr); /** * memtype_reserve_io - Request a memory type mapping for a region of memory diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index fc3f3d3e2ef2..8d31c6b9e184 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -31,17 +31,6 @@ unsigned long __phys_addr(unsigned long x) return x; } EXPORT_SYMBOL(__phys_addr); - -unsigned long __phys_addr_symbol(unsigned long x) -{ - unsigned long y = x - __START_KERNEL_map; - - /* only check upper bounds since lower bounds will trigger carry */ - VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); - - return y + phys_base; -} -EXPORT_SYMBOL(__phys_addr_symbol); #endif bool __virt_addr_valid(unsigned long x) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5d221709353e..f5b93e01e347 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -12,6 +12,7 @@ #include <linux/task_work.h> #include <linux/mmu_notifier.h> #include <linux/mmu_context.h> +#include <linux/kvm_types.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -1582,7 +1583,7 @@ unsigned long __get_current_cr3_fast(void) VM_BUG_ON(cr3 != __read_cr3()); return cr3; } -EXPORT_SYMBOL_GPL(__get_current_cr3_fast); +EXPORT_SYMBOL_FOR_KVM(__get_current_cr3_fast); /* * Flush one page in the kernel mapping @@ -1723,7 +1724,7 @@ void __flush_tlb_all(void) flush_tlb_local(); } } -EXPORT_SYMBOL_GPL(__flush_tlb_all); +EXPORT_SYMBOL_FOR_KVM(__flush_tlb_all); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index eac403248462..5ce4ebe99774 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -29,6 +29,7 @@ #include <linux/acpi.h> #include <linux/suspend.h> #include <linux/idr.h> +#include <linux/kvm_types.h> #include <asm/page.h> #include <asm/special_insns.h> #include <asm/msr-index.h> @@ -181,7 +182,7 @@ int tdx_cpu_enable(void) return 0; } -EXPORT_SYMBOL_GPL(tdx_cpu_enable); +EXPORT_SYMBOL_FOR_KVM(tdx_cpu_enable); /* * Add a memory region as a TDX memory block. The caller must make sure @@ -662,7 +663,7 @@ void tdx_quirk_reset_page(struct page *page) { tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE); } -EXPORT_SYMBOL_GPL(tdx_quirk_reset_page); +EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page); static void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr) { @@ -1216,7 +1217,7 @@ int tdx_enable(void) return ret; } -EXPORT_SYMBOL_GPL(tdx_enable); +EXPORT_SYMBOL_FOR_KVM(tdx_enable); static bool is_pamt_page(unsigned long phys) { @@ -1477,13 +1478,13 @@ const struct tdx_sys_info *tdx_get_sysinfo(void) return p; } -EXPORT_SYMBOL_GPL(tdx_get_sysinfo); +EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo); u32 tdx_get_nr_guest_keyids(void) { return tdx_nr_guest_keyids; } -EXPORT_SYMBOL_GPL(tdx_get_nr_guest_keyids); +EXPORT_SYMBOL_FOR_KVM(tdx_get_nr_guest_keyids); int tdx_guest_keyid_alloc(void) { @@ -1491,13 +1492,13 @@ int tdx_guest_keyid_alloc(void) tdx_guest_keyid_start + tdx_nr_guest_keyids - 1, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(tdx_guest_keyid_alloc); +EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_alloc); void tdx_guest_keyid_free(unsigned int keyid) { ida_free(&tdx_guest_keyid_pool, keyid); } -EXPORT_SYMBOL_GPL(tdx_guest_keyid_free); +EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_free); static inline u64 tdx_tdr_pa(struct tdx_td *td) { @@ -1521,7 +1522,7 @@ noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args); } -EXPORT_SYMBOL_GPL(tdh_vp_enter); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_enter); u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) { @@ -1533,7 +1534,7 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) tdx_clflush_page(tdcs_page); return seamcall(TDH_MNG_ADDCX, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_addcx); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx); u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2) { @@ -1553,7 +1554,7 @@ u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_page_add); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_add); u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2) { @@ -1572,7 +1573,7 @@ u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_sept_add); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_sept_add); u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) { @@ -1584,7 +1585,7 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) tdx_clflush_page(tdcx_page); return seamcall(TDH_VP_ADDCX, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_addcx); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx); u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2) { @@ -1603,7 +1604,7 @@ u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_page_aug); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_aug); u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2) { @@ -1620,7 +1621,7 @@ u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u6 return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_range_block); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_range_block); u64 tdh_mng_key_config(struct tdx_td *td) { @@ -1630,7 +1631,7 @@ u64 tdh_mng_key_config(struct tdx_td *td) return seamcall(TDH_MNG_KEY_CONFIG, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_key_config); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_config); u64 tdh_mng_create(struct tdx_td *td, u16 hkid) { @@ -1642,7 +1643,7 @@ u64 tdh_mng_create(struct tdx_td *td, u16 hkid) tdx_clflush_page(td->tdr_page); return seamcall(TDH_MNG_CREATE, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_create); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_create); u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) { @@ -1654,7 +1655,7 @@ u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) tdx_clflush_page(vp->tdvpr_page); return seamcall(TDH_VP_CREATE, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_create); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_create); u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data) { @@ -1671,7 +1672,7 @@ u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data) return ret; } -EXPORT_SYMBOL_GPL(tdh_mng_rd); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_rd); u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2) { @@ -1688,7 +1689,7 @@ u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2) return ret; } -EXPORT_SYMBOL_GPL(tdh_mr_extend); +EXPORT_SYMBOL_FOR_KVM(tdh_mr_extend); u64 tdh_mr_finalize(struct tdx_td *td) { @@ -1698,7 +1699,7 @@ u64 tdh_mr_finalize(struct tdx_td *td) return seamcall(TDH_MR_FINALIZE, &args); } -EXPORT_SYMBOL_GPL(tdh_mr_finalize); +EXPORT_SYMBOL_FOR_KVM(tdh_mr_finalize); u64 tdh_vp_flush(struct tdx_vp *vp) { @@ -1708,7 +1709,7 @@ u64 tdh_vp_flush(struct tdx_vp *vp) return seamcall(TDH_VP_FLUSH, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_flush); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_flush); u64 tdh_mng_vpflushdone(struct tdx_td *td) { @@ -1718,7 +1719,7 @@ u64 tdh_mng_vpflushdone(struct tdx_td *td) return seamcall(TDH_MNG_VPFLUSHDONE, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_vpflushdone); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_vpflushdone); u64 tdh_mng_key_freeid(struct tdx_td *td) { @@ -1728,7 +1729,7 @@ u64 tdh_mng_key_freeid(struct tdx_td *td) return seamcall(TDH_MNG_KEY_FREEID, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_key_freeid); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_freeid); u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) { @@ -1744,7 +1745,7 @@ u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) return ret; } -EXPORT_SYMBOL_GPL(tdh_mng_init); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_init); u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) { @@ -1761,7 +1762,7 @@ u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) return ret; } -EXPORT_SYMBOL_GPL(tdh_vp_rd); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_rd); u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) { @@ -1774,7 +1775,7 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) return seamcall(TDH_VP_WR, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_wr); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_wr); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) { @@ -1787,7 +1788,7 @@ u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) /* apicid requires version == 1. */ return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args); } -EXPORT_SYMBOL_GPL(tdh_vp_init); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_init); /* * TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats. @@ -1809,7 +1810,7 @@ u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 return ret; } -EXPORT_SYMBOL_GPL(tdh_phymem_page_reclaim); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_reclaim); u64 tdh_mem_track(struct tdx_td *td) { @@ -1819,7 +1820,7 @@ u64 tdh_mem_track(struct tdx_td *td) return seamcall(TDH_MEM_TRACK, &args); } -EXPORT_SYMBOL_GPL(tdh_mem_track); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_track); u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2) { @@ -1836,7 +1837,7 @@ u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u6 return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_page_remove); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_remove); u64 tdh_phymem_cache_wb(bool resume) { @@ -1846,7 +1847,7 @@ u64 tdh_phymem_cache_wb(bool resume) return seamcall(TDH_PHYMEM_CACHE_WB, &args); } -EXPORT_SYMBOL_GPL(tdh_phymem_cache_wb); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_cache_wb); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) { @@ -1856,7 +1857,7 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } -EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr); u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) { @@ -1866,7 +1867,7 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } -EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid); #ifdef CONFIG_KEXEC_CORE void tdx_cpu_flush_cache_for_kexec(void) @@ -1884,5 +1885,5 @@ void tdx_cpu_flush_cache_for_kexec(void) wbinvd(); this_cpu_write(cache_state_incoherent, false); } -EXPORT_SYMBOL_GPL(tdx_cpu_flush_cache_for_kexec); +EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec); #endif diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 374e4cb788d8..438a3b170402 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns |
