diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 18:33:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 18:33:04 -0700 |
commit | ee6740fd34eb53c5c76be01201c15310f461b69f (patch) | |
tree | 09a51108245a2e8f644d1956a9e5b9f974bc23b8 | |
parent | a86c6d0b2ad12f6ce6560f735f4799cf1f631ab2 (diff) | |
parent | acf9f8da5e19fc1cbf26f2ecb749369e13e7cd85 (diff) |
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers:
"Another set of improvements to the kernel's CRC (cyclic redundancy
check) code:
- Rework the CRC64 library functions to be directly optimized, like
what I did last cycle for the CRC32 and CRC-T10DIF library
functions
- Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ
support and acceleration for crc64_be and crc64_nvme
- Rewrite the riscv Zbc-optimized CRC code, and add acceleration for
crc_t10dif, crc64_be, and crc64_nvme
- Remove crc_t10dif and crc64_rocksoft from the crypto API, since
they are no longer needed there
- Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect
- Add kunit test cases for crc64_nvme and crc7
- Eliminate redundant functions for calculating the Castagnoli CRC32,
settling on just crc32c()
- Remove unnecessary prompts from some of the CRC kconfig options
- Further optimize the x86 crc32c code"
* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (36 commits)
x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512
lib/crc: remove unnecessary prompt for CONFIG_CRC64
lib/crc: remove unnecessary prompt for CONFIG_LIBCRC32C
lib/crc: remove unnecessary prompt for CONFIG_CRC8
lib/crc: remove unnecessary prompt for CONFIG_CRC7
lib/crc: remove unnecessary prompt for CONFIG_CRC4
lib/crc7: unexport crc7_be_syndrome_table
lib/crc_kunit.c: update comment in crc_benchmark()
lib/crc_kunit.c: add test and benchmark for crc7_be()
x86/crc32: optimize tail handling for crc32c short inputs
riscv/crc64: add Zbc optimized CRC64 functions
riscv/crc-t10dif: add Zbc optimized CRC-T10DIF function
riscv/crc32: reimplement the CRC32 functions using new template
riscv/crc: add "template" for Zbc optimized CRC functions
x86/crc: add ANNOTATE_NOENDBR to suppress objtool warnings
x86/crc32: improve crc32c_arch() code generation with clang
x86/crc64: implement crc64_be and crc64_nvme using new template
x86/crc-t10dif: implement crc_t10dif using new template
x86/crc32: implement crc32_le using new template
x86/crc: add "template" for [V]PCLMULQDQ based CRC functions
...
115 files changed, 2121 insertions, 1969 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 5fa62f61851e..eda1b54e69d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6147,6 +6147,7 @@ F: Documentation/staging/crc* F: arch/*/lib/crc* F: include/linux/crc* F: lib/crc* +F: scripts/gen-crc-consts.py CREATIVE SB0540 M: Bastien Nocera <hadess@hadess.net> diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index 2849d17f5856..b382a2e175fb 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -129,7 +129,6 @@ CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_MARVELL_CESA=y CONFIG_CRC_CCITT=y -CONFIG_LIBCRC32C=y CONFIG_PRINTK_TIME=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 7dece9d98828..2248afaf35b5 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -113,7 +113,6 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_LIBCRC32C=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SLAB=y CONFIG_DEBUG_SPINLOCK=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 87841e5cafe2..297c6a7b978a 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -483,8 +483,6 @@ CONFIG_CRYPTO_DEV_SAHARA=y CONFIG_CRYPTO_DEV_MXS_DCP=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y -CONFIG_CRC7=m -CONFIG_LIBCRC32C=m CONFIG_CMA_SIZE_MBYTES=64 CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig index f55c231e0870..2aa2ac8c6507 100644 --- a/arch/arm/configs/lpc18xx_defconfig +++ b/arch/arm/configs/lpc18xx_defconfig @@ -148,7 +148,6 @@ CONFIG_EXT2_FS=y CONFIG_JFFS2_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_CRC_ITU_T=y -CONFIG_CRC7=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 34d079e03b3c..fa06d98e43fc 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -118,7 +118,6 @@ CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_KEYS=y -CONFIG_CRC32_BIT=y CONFIG_DMA_API_DEBUG=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index 3f4ddcf49ec7..db81862bdb93 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -290,7 +290,6 @@ CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y CONFIG_CRC_CCITT=y -CONFIG_LIBCRC32C=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 2467afd32146..a518d4a2581e 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -188,7 +188,6 @@ CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y CONFIG_CRC_CCITT=y -CONFIG_LIBCRC32C=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index 43bc1255a5db..d8a6e43c401e 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -161,7 +161,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_CRYPTO_DEV_MXS_DCP=y CONFIG_CRC_ITU_T=m -CONFIG_CRC7=m CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 025b595dd837..661e5d6894bd 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -221,7 +221,6 @@ CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_LIBCRC32C=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 6de45d7f6078..d62b17f14d54 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -710,8 +710,6 @@ CONFIG_CRYPTO_DEV_OMAP_DES=m CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_CRC_ITU_T=y -CONFIG_CRC7=y -CONFIG_LIBCRC32C=y CONFIG_DMA_CMA=y CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index 294d16ddeb18..ac5b7a5aaff6 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -235,7 +235,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRC_CCITT=y -CONFIG_LIBCRC32C=m CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 77048b5e1ec4..423bb41c4225 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -75,7 +75,6 @@ CONFIG_EXT3_FS=y # CONFIG_INOTIFY_USER is not set CONFIG_NLS=y CONFIG_CRC_ITU_T=y -CONFIG_CRC7=y CONFIG_PRINTK_TIME=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/wpcm450_defconfig b/arch/arm/configs/wpcm450_defconfig index 45483deab034..5e4397f7f828 100644 --- a/arch/arm/configs/wpcm450_defconfig +++ b/arch/arm/configs/wpcm450_defconfig @@ -193,7 +193,6 @@ CONFIG_PKCS7_MESSAGE_PARSER=y CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_CRC_CCITT=y CONFIG_CRC_ITU_T=m -CONFIG_LIBCRC32C=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/lib/crc-t10dif-glue.c b/arch/arm/lib/crc-t10dif-glue.c index d24dee62670e..f3584ba70e57 100644 --- a/arch/arm/lib/crc-t10dif-glue.c +++ b/arch/arm/lib/crc-t10dif-glue.c @@ -69,12 +69,6 @@ static void __exit crc_t10dif_arm_exit(void) } module_exit(crc_t10dif_arm_exit); -bool crc_t10dif_is_optimized(void) -{ - return static_key_enabled(&have_neon); -} -EXPORT_SYMBOL(crc_t10dif_is_optimized); - MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions"); MODULE_LICENSE("GPL v2"); diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c index 2c30ba3d80e6..4340351dbde8 100644 --- a/arch/arm/lib/crc32-glue.c +++ b/arch/arm/lib/crc32-glue.c @@ -59,14 +59,14 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_le_arch); -static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len) +static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len) { if (static_branch_likely(&have_crc32)) return crc32c_armv8_le(crc, p, len); - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); } -u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { if (len >= PMULL_MIN_LEN + 15 && static_branch_likely(&have_pmull) && crypto_simd_usable()) { @@ -74,7 +74,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) /* align p to 16-byte boundary */ if (n) { - crc = crc32c_le_scalar(crc, p, n); + crc = crc32c_scalar(crc, p, n); p += n; len -= n; } @@ -85,9 +85,9 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) p += n; len -= n; } - return crc32c_le_scalar(crc, p, len); + return crc32c_scalar(crc, p, len); } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif-glue.c index dab7e3796232..a007d0c5f3fe 100644 --- a/arch/arm64/lib/crc-t10dif-glue.c +++ b/arch/arm64/lib/crc-t10dif-glue.c @@ -70,12 +70,6 @@ static void __exit crc_t10dif_arm64_exit(void) } module_exit(crc_t10dif_arm64_exit); -bool crc_t10dif_is_optimized(void) -{ - return static_key_enabled(&have_asimd); -} -EXPORT_SYMBOL(crc_t10dif_is_optimized); - MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions"); MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index 15c4c9db573e..ed3acd71178f 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -22,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_le_base(crc, p, len); @@ -43,10 +43,10 @@ u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_le_arch); -u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { kernel_neon_begin(); @@ -62,9 +62,9 @@ u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) return crc32c_le_arm64(crc, p, len); } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); -u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_be_base(crc, p, len); diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index 6cb764947596..469c025297c6 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -75,7 +75,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=y CONFIG_FRAME_WARN=0 CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c index 8af8113ecd9d..c44ee4f32557 100644 --- a/arch/loongarch/lib/crc32-loongarch.c +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -65,10 +65,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_le_arch); -u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { if (!static_branch_likely(&have_crc32)) - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); while (len >= sizeof(u64)) { u64 value = get_unaligned_le64(p); @@ -100,7 +100,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) return crc; } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig index 6a68a96d13f8..f56e8db5da95 100644 --- a/arch/mips/configs/bcm47xx_defconfig +++ b/arch/mips/configs/bcm47xx_defconfig @@ -69,7 +69,6 @@ CONFIG_USB_HCD_BCMA=y CONFIG_USB_HCD_SSB=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -CONFIG_CRC32_SARWATE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_INFO_REDUCED=y diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index f7c4b3529a2c..fe282630b51c 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -239,7 +239,6 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m CONFIG_CRC_T10DIF=m -CONFIG_CRC7=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index e835730ea7fa..b0b551efac7c 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -70,4 +70,3 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y -CONFIG_LIBCRC32C=y diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index 6eff21ff15d5..281dd7d0f805 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -216,7 +216,6 @@ CONFIG_CRYPTO_USER=y CONFIG_CRYPTO_CRYPTD=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_USER_API_SKCIPHER=y -CONFIG_CRC32_SLICEBY4=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index da51b9731db0..9655567614aa 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -180,7 +180,6 @@ CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 424e3f011fc2..1539fe8eb34d 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index cfc8bf791792..58c36720c94a 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 00329bb5de5a..5ab149cd3178 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -219,4 +219,3 @@ CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=y -CONFIG_CRC7=m diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 930c5f6ed182..121e7e48fa77 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -178,7 +178,6 @@ CONFIG_CRYPTO_TEA=y CONFIG_CRYPTO_TWOFISH=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index 332f9094e847..8404e0a9d8b2 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -129,7 +129,6 @@ CONFIG_SQUASHFS=y CONFIG_SQUASHFS_XZ=y CONFIG_CRYPTO_ARC4=m CONFIG_CRC_ITU_T=m -CONFIG_CRC32_SARWATE=y # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set # CONFIG_XZ_DEC_IA64 is not set diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index 08c0aa03fd56..7b91edfe3e07 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -141,7 +141,6 @@ CONFIG_SQUASHFS=y CONFIG_SQUASHFS_XZ=y CONFIG_CRYPTO_ARC4=m CONFIG_CRC_ITU_T=m -CONFIG_CRC32_SARWATE=y CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c index 083e5d693a16..676a4b3e290b 100644 --- a/arch/mips/lib/crc32-mips.c +++ b/arch/mips/lib/crc32-mips.c @@ -16,15 +16,6 @@ #include <asm/mipsregs.h> #include <linux/unaligned.h> -enum crc_op_size { - b, h, w, d, -}; - -enum crc_type { - crc32, - crc32c, -}; - #ifndef TOOLCHAIN_SUPPORTS_CRC #define _ASM_SET_CRC(OP, SZ, TYPE) \ _ASM_MACRO_3R(OP, rt, rs, rt2, \ @@ -117,10 +108,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_le_arch); -u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { if (!static_branch_likely(&have_crc32)) - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); if (IS_ENABLED(CONFIG_64BIT)) { for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { @@ -158,7 +149,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) } return crc; } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index ea623f910748..44197d30b8af 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -293,7 +293,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_DEFLATE=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=m -CONFIG_LIBCRC32C=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index da6fc203e2dc..6f58ee1edf1f 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -223,7 +223,6 @@ CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=y CONFIG_MAGIC_SYSRQ=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_MD5=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 97f4d4851735..3c6445c98a85 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -44,7 +44,6 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index 50cc59eb36cf..354180ab94bc 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -47,7 +47,6 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 6f449411abf7..a0d27c59ea78 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -39,4 +39,3 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC_CCITT=y -CONFIG_CRC32_SLICEBY4=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 77306be62e9e..89da51d724fb 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -70,7 +70,6 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRYPTO=y CONFIG_CRYPTO_DEV_TALITOS=y -CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 9d44e6630908..1eb446452fc0 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -281,7 +281,6 @@ CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_CRYPTO_HW is not set CONFIG_CRC16=y CONFIG_CRC_ITU_T=y -CONFIG_LIBCRC32C=y # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_IA64 is not set # CONFIG_XZ_DEC_ARM is not set diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 383c0966e92f..425f10837a18 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -54,7 +54,6 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/lib/crc-t10dif-glue.c b/arch/powerpc/lib/crc-t10dif-glue.c index 730850dbc51d..f411b0120cc5 100644 --- a/arch/powerpc/lib/crc-t10dif-glue.c +++ b/arch/powerpc/lib/crc-t10dif-glue.c @@ -78,12 +78,6 @@ static void __exit crc_t10dif_powerpc_exit(void) } module_exit(crc_t10dif_powerpc_exit); -bool crc_t10dif_is_optimized(void) -{ - return static_key_enabled(&have_vec_crypto); -} -EXPORT_SYMBOL(crc_t10dif_is_optimized); - MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions"); MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c index 79cc954f499f..dbd10f339183 100644 --- a/arch/powerpc/lib/crc32-glue.c +++ b/arch/powerpc/lib/crc32-glue.c @@ -23,18 +23,18 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_le_arch); -u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { unsigned int prealign; unsigned int tail; if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); if ((unsigned long)p & VMX_ALIGN_MASK) { prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); - crc = crc32c_le_base(crc, p, prealign); + crc = crc32c_base(crc, p, prealign); len -= prealign; p += prealign; } @@ -52,12 +52,12 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) tail = len & VMX_ALIGN_MASK; if (tail) { p += len & ~VMX_ALIGN_MASK; - crc = crc32c_le_base(crc, p, tail); + crc = crc32c_base(crc, p, tail); } return crc; } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0409be768666..10116f68569d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -25,6 +25,8 @@ config RISCV select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CRC32 if RISCV_ISA_ZBC + select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC + select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 79368a895fee..b1c46153606a 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -16,6 +16,11 @@ lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o +crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o +obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o +crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o +crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h new file mode 100644 index 000000000000..8d73449235ef --- /dev/null +++ b/arch/riscv/lib/crc-clmul-consts.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CRC constants generated by: + * + * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 + * + * Do not edit manually. + */ + +struct crc_clmul_consts { + unsigned long fold_across_2_longs_const_hi; + unsigned long fold_across_2_longs_const_lo; + unsigned long barrett_reduction_const_1; + unsigned long barrett_reduction_const_2; +}; + +/* + * Constants generated for most-significant-bit-first CRC-16 using + * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */ + .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */ + .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */ +#else + .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */ + .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */ + .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */ + .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */ +#endif +}; + +/* + * Constants generated for most-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */ + .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */ +#else + .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */ + .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */ + .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */ +#endif +}; + +/* + * Constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */ + .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */ +#else + .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */ + .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */ + .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */ +#endif +}; + +/* + * Constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 + + * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 + */ +static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */ + .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */ +#else + .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */ + .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */ + .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */ +#endif +}; + +/* + * Constants generated for most-significant-bit-first CRC-64 using + * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x^1 + x^0 + */ +#ifdef CONFIG_64BIT +static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = { + .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */ + .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */ + .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */ +}; +#endif + +/* + * Constants generated for least-significant-bit-first CRC-64 using + * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + + * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + + * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + + * x^4 + x^3 + x^0 + */ +#ifdef CONFIG_64BIT +static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = { + .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */ + .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */ + .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */ +}; +#endif diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h new file mode 100644 index 000000000000..77187e7f1762 --- /dev/null +++ b/arch/riscv/lib/crc-clmul-template.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2025 Google LLC */ + +/* + * This file is a "template" that generates a CRC function optimized using the + * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this + * file must define the following parameters to specify the type of CRC: + * + * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC + * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural + * mapping between bits and polynomial coefficients + * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected + * mapping between bits and polynomial coefficients + */ + +#include <asm/byteorder.h> +#include <linux/minmax.h> + +#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */ + +static inline unsigned long clmul(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +static inline unsigned long clmulh(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmulh %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +static inline unsigned long clmulr(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmulr %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* + * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a + * polynomial whose bit order matches the CRC's bit order. + */ +#ifdef CONFIG_64BIT +# if LSB_CRC +# define crc_load_long(x) le64_to_cpup(x) +# else +# define crc_load_long(x) be64_to_cpup(x) +# endif +#else +# if LSB_CRC +# define crc_load_long(x) le32_to_cpup(x) +# else +# define crc_load_long(x) be32_to_cpup(x) +# endif +#endif + +/* XOR @crc into the end of @msgpoly that represents the high-order terms. */ +static inline unsigned long +crc_clmul_prep(crc_t crc, unsigned long msgpoly) +{ +#if LSB_CRC + return msgpoly ^ crc; +#else + return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS)); +#endif +} + +/* + * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it + * modulo the generator polynomial G. This gives the CRC of @msgpoly. + */ +static inline crc_t +crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts) +{ + unsigned long tmp; + + /* + * First step of Barrett reduction with integrated multiplication by + * x^n: calculate floor((msgpoly * x^n) / G). This is the value by + * which G needs to be multiplied to cancel out the x^n and higher terms + * of msgpoly * x^n. Do it using the following formula: + * + * msb-first: + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1)) + * lsb-first: + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG) + * + * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G), + * which fits a long exactly. Using any lower power of x there would + * not carry enough precision through the calculation, while using any + * higher power of x would require extra instructions to handle a wider + * multiplication. In the msb-first case, using this power of x results + * in needing a floored division by x^(BITS_PER_LONG-1), which matches + * what clmulr produces. In the lsb-first case, a factor of x gets + * implicitly introduced by each carryless multiplication (shown as + * '* x' above), and the floored division instead needs to be by + * x^BITS_PER_LONG which matches what clmul produces. + */ +#if LSB_CRC + tmp = clmul(msgpoly, consts->barrett_reduction_const_1); +#else + tmp = clmulr(msgpoly, consts->barrett_reduction_const_1); +#endif + + /* + * Second step of Barrett reduction: + * + * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G)) + * + * This reduces (msgpoly * x^n) modulo G by adding the appropriate + * multiple of G to it. The result uses only the x^0..x^(n-1) terms. + * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those + * terms in the first place, it is more efficient to do the equivalent: + * + * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n + * + * In the lsb-first case further modify it to the following which avoids + * a shift, as the crc ends up in the physically low n bits from clmulr: + * + * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x + * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n + * + * barrett_reduction_const_2 contains the constant multiplier (G - x^n) + * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The + * cast of the result to crc_t is essential, as it applies the mod x^n! + */ +#if LSB_CRC + return clmulr(tmp, consts->barrett_reduction_const_2); +#else + return clmul(tmp, consts->barrett_reduction_const_2); +#endif +} + +/* Update @crc with the data from @msgpoly. */ +static inline crc_t +crc_clmul_update_long(crc_t crc, unsigned long msgpoly, + const struct crc_clmul_consts *consts) +{ + return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts); +} + +/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */ +static inline crc_t +crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len, + const struct crc_clmul_consts *consts) +{ + unsigned long msgpoly; + size_t i; + +#if LSB_CRC + msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8); + for (i = 1; i < len; i++) + msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8)); +#else + msgpoly = p[0]; + for (i = 1; i < len; i++) + msgpoly = (msgpoly << 8) ^ p[i]; +#endif + + if (len >= sizeof(crc_t)) { + #if LSB_CRC + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); + #else + msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS); + #endif + return crc_clmul_long(msgpoly, consts); + } +#if LSB_CRC + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); + return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len)); +#else + msgpoly ^= crc >> (CRC_BITS - 8*len); + return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len)); +#endif +} + +static inline crc_t +crc_clmul(crc_t crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + size_t align; + + /* This implementation assumes that the CRC fits in an unsigned long. */ + BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long)); + + /* If the buffer is not long-aligned, align it. */ + align = (unsigned long)p % sizeof(unsigned long); + if (align && len) { + align = min(sizeof(unsigned long) - align, len); + crc = crc_clmul_update_partial(crc, p, align, consts); + p += align; + len -= align; + } + + if (len >= 4 * sizeof(unsigned long)) { + unsigned long m0, m1; + + m0 = crc_clmul_prep(crc, crc_load_long(p)); + m1 = crc_load_long(p + sizeof(unsigned long)); + p += 2 * sizeof(unsigned long); + len -= 2 * sizeof(unsigned long); + /* + * Main loop. Each iteration starts with a message polynomial + * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two + * more longs of data to form x^(3*BITS_PER_LONG)*m0 + + * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then + * "folds" that back into a congruent (modulo G) value that uses + * just m0 and m1 again. This is done by multiplying m0 by the + * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by + * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then + * adding the results to m2 and m3 as appropriate. Each such + * multiplication produces a result twice the length of a long, + * which in RISC-V is two instructions clmul and clmulh. + * + * This could be changed to fold across more than 2 longs at a + * time if there is a CPU that can take advantage of it. + */ + do { + unsigned long p0, p1, p2, p3; + + p0 = clmulh(m0, consts->fold_across_2_longs_const_hi); + p1 = clmul(m0, consts->fold_across_2_longs_const_hi); + p2 = clmulh(m1, consts->fold_across_2_longs_const_lo); + p3 = clmul(m1, consts->fold_across_2_longs_const_lo); + m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p); + m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^ + crc_load_long(p + sizeof(unsigned long)); + + p += 2 * sizeof(unsigned long); + len -= 2 * sizeof(unsigned long); + } while (len >= 2 * sizeof(unsigned long)); + + crc = crc_clmul_long(m0, consts); + crc = crc_clmul_update_long(crc, m1, consts); + } + + while (len >= sizeof(unsigned long)) { + crc = crc_clmul_update_long(crc, crc_load_long(p), consts); + p += sizeof(unsigned long); + len -= sizeof(unsigned long); + } + + if (len) + crc = crc_clmul_update_partial(crc, p, len, consts); + + return crc; +} diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h new file mode 100644 index 000000000000..dd1736245815 --- /dev/null +++ b/arch/riscv/lib/crc-clmul.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2025 Google LLC */ + +#ifndef _RISCV_CRC_CLMUL_H +#define _RISCV_CRC_CLMUL_H + +#include <linux/types.h> +#include "crc-clmul-consts.h" + +u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +#ifdef CONFIG_64BIT +u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +#endif + +#endif /* _RISCV_CRC_CLMUL_H */ diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c new file mode 100644 index 000000000000..e6b0051ccd86 --- /dev/null +++ b/arch/riscv/lib/crc-t10dif.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC-T10DIF function + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc-t10dif.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts); + return crc_t10dif_generic(crc, p, len); +} +EXPORT_SYMBOL(crc_t10dif_arch); + +MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c new file mode 100644 index 000000000000..554d295e95f5 --- /dev/null +++ b/arch/riscv/lib/crc16_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC16 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u16 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c deleted file mode 100644 index 53d56ab422c7..000000000000 --- a/arch/riscv/lib/crc32-riscv.c +++ /dev/null @@ -1,311 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC32 implementation with Zbc extension. - * - * Copyright (C) 2024 Intel Corporation - */ - -#include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <asm/byteorder.h> - -#include <linux/types.h> -#include <linux/minmax.h> -#include <linux/crc32poly.h> -#include <linux/crc32.h> -#include <linux/byteorder/generic.h> -#include <linux/module.h> - -/* - * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for - * better understanding of how this math works. - * - * let "+" denotes polynomial add (XOR) - * let "-" denotes polynomial sub (XOR) - * let "*" denotes polynomial multiplication - * let "/" denotes polynomial floor division - * let "S" denotes source data, XLEN bit wide - * let "P" denotes CRC32 polynomial - * let "T" denotes 2^(XLEN+32) - * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit - * - * crc32(S, P) - * => S * (2^32) - S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) * (T / P) / T * P - * => lowest 32 bits of: S * (2^32) * quotient / T * P - * => lowest 32 bits of: S * quotient / 2^XLEN * P - * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P - * => clmul_low_part(clmul_high_part(S, QT) + S, P) - * - * In terms of below implementations, the BE case is more intuitive, since the - * higher order bit sits at more significant position. - */ - -#if __riscv_xlen == 64 -/* Slide by XLEN bits per iteration */ -# define STEP_ORDER 3 - -/* Each below polynomial quotient has an implicit bit for 2^XLEN */ - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ -# define CRC32_POLY_QT_LE 0x5a72d812fb808b20 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ -# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be - * the same as the bit-reversed version of CRC32_POLY_QT_LE - */ -# define CRC32_POLY_QT_BE 0x04d101df481b4e5a - -static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - "srli %0, %0, 32\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" ((u64)poly << 32) - :); - return crc; -} - -static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); -} - -#elif __riscv_xlen == 32 -# define STEP_ORDER 2 -/* Each quotient should match the upper half of its analog in RV64 */ -# define CRC32_POLY_QT_LE 0xfb808b20 -# define CRC32C_POLY_QT_LE 0x6f5389f8 -# define CRC32_POLY_QT_BE 0x04d101df - -static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_le32(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" (poly) - :); - return crc; -} - -static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_be32(*ptr); -} - -#else -# error "Unexpected __riscv_xlen" -#endif - -static inline u32 crc32_be_zbc(unsigned long s) -{ - u32 crc; - - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmulh %0, %1, %2\n" - "xor %0, %0, %1\n" - "clmul %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (CRC32_POLY_QT_BE), - "r" (CRC32_POLY_BE) - :); - return crc; -} - -#define STEP (1 << STEP_ORDER) -#define OFFSET_MASK (STEP - 1) - -typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); - -static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - for (int i = 0; i < len; i++) - s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); - - s ^= (unsigned long)crc << (__riscv_xlen - bits); - if (__riscv_xlen == 32 || len < sizeof(u32)) - crc_low = crc >> bits; - - crc = crc32_le_zbc(s, poly, poly_qt); - crc ^= crc_low; - - return crc; -} - -static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt, - fallback crc_fb) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_le_prep(crc, p_ul); - crc = crc32_le_zbc(s, poly, poly_qt); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); - - return crc; - -legacy: - return crc_fb(crc, p, len); -} - -u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, - crc32_le_base); -} -EXPORT_SYMBOL(crc32_le_arch); - -u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, crc32c_le_base); -} -EXPORT_SYMBOL(crc32c_le_arch); - -static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, - size_t len) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - s = 0; - for (int i = 0; i < len; i++) - s = *p++ | (s << 8); - - if (__riscv_xlen == 32 || len < sizeof(u32)) { - s ^= crc >> (32 - bits); - crc_low = crc << bits; - } else { - s ^= (unsigned long)crc << (bits - 32); - } - - crc = crc32_be_zbc(s); - crc ^= crc_low; - - return crc; -} - -u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_be_unaligned(crc, p, head_len); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_be_prep(crc, p_ul); - crc = crc32_be_zbc(s); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_be_unaligned(crc, p, tail_len); - - return crc; - -legacy: - return crc32_be_base(crc, p, len); -} -EXPORT_SYMBOL(crc32_be_arch); - -u32 crc32_optimizations(void) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return CRC32_LE_OPTIMIZATION | - CRC32_BE_OPTIMIZATION | - CRC32C_OPTIMIZATION; - return 0; -} -EXPORT_SYMBOL(crc32_optimizations); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c new file mode 100644 index 000000000000..a3188b7d9c40 --- /dev/null +++ b/arch/riscv/lib/crc32.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC32 functions + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc32.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_lsb_clmul(crc, p, len, + &crc32_lsb_0xedb88320_consts); + return crc32_le_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_msb_clmul(crc, p, len, + &crc32_msb_0x04c11db7_consts); + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_lsb_clmul(crc, p, len, + &crc32_lsb_0x82f63b78_consts); + return crc32c_base(crc, p, len); +} +EXPORT_SYMBOL(crc32c_arch); + +u32 crc32_optimizations(void) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_DESCRIPTION("RISC-V optimized CRC32 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c new file mode 100644 index 000000000000..72fd67e7470c --- /dev/null +++ b/arch/riscv/lib/crc32_lsb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized least-significant-bit-first CRC32 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u32 crc_t; +#define LSB_CRC 1 +#include "crc-clmul-template.h" + +u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c new file mode 100644 index 000000000000..fdbeaccc369f --- /dev/null +++ b/arch/riscv/lib/crc32_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC32 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u32 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c new file mode 100644 index 000000000000..f0015a27836a --- /dev/null +++ b/arch/riscv/lib/crc64.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC64 functions + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc64.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc64_msb_clmul(crc, p, len, + &crc64_msb_0x42f0e1eba9ea3693_consts); + return crc64_be_generic(crc, p, len); +} +EXPORT_SYMBOL(crc64_be_arch); + +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc64_lsb_clmul(crc, p, len, + &crc64_lsb_0x9a6c9329ac4bc9b5_consts); + return crc64_nvme_generic(crc, p, len); +} +EXPORT_SYMBOL(crc64_nvme_arch); + +MODULE_DESCRIPTION("RISC-V optimized CRC64 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c new file mode 100644 index 000000000000..c5371bb85d90 --- /dev/null +++ b/arch/riscv/lib/crc64_lsb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized least-significant-bit-first CRC64 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u64 crc_t; +#define LSB_CRC 1 +#include "crc-clmul-template.h" + +u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c new file mode 100644 index 000000000000..1925d1dbe225 --- /dev/null +++ b/arch/riscv/lib/crc64_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC64 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u64 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 80bdfbae6e5b..26aa455d4068 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -815,9 +815,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 449a0e996b96..8392f8a5ad6d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -803,9 +803,6 @@ CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c index 137080e61f90..124214a27340 100644 --- a/arch/s390/lib/crc32-glue.c +++ b/arch/s390/lib/crc32-glue.c @@ -62,7 +62,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs); DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) -DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) +DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) static int __init crc32_s390_init(void) { diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 78e0e7be57ee..472fdf365cad 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -104,5 +104,3 @@ CONFIG_CRYPTO_LZO=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_ITU_T=y -CONFIG_CRC7=y -CONFIG_LIBCRC32C=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 977ae405a75e..1b1174a07e36 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -195,4 +195,3 @@ CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=y CONFIG_CRC16=y -CONFIG_LIBCRC32C=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 99bc0e889287..11ff5fd510de 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -266,4 +266,3 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC16=m -CONFIG_LIBCRC32C=m diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index 5010164de3e4..f6341b063b01 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -94,4 +94,3 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 0bb0d4da5227..01b2bdfbf9a8 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -230,7 +230,6 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC16=m -CONFIG_LIBCRC32C=m CONFIG_VCC=m CONFIG_PATA_CMD64X=y CONFIG_IP_PNP=y diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c index 41076d2b1fd2..a70752c729cf 100644 --- a/arch/sparc/lib/crc32_glue.c +++ b/arch/sparc/lib/crc32_glue.c @@ -27,17 +27,17 @@ EXPORT_SYMBOL(crc32_le_arch); void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len); -u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len) +u32 crc32c_arch(u32 crc, const u8 *data, size_t len) { size_t n = -(uintptr_t)data & 7; if (!static_branch_likely(&have_crc32c_opcode)) - return crc32c_le_base(crc, data, len); + return crc32c_base(crc, data, len); if (n) { /* Data isn't 8-byte aligned. Align it. */ n = min(n, len); - crc = crc32c_le_base(crc, data, n); + crc = crc32c_base(crc, data, n); data += n; len -= n; } @@ -48,10 +48,10 @@ u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len) len -= n; } if (len) - crc = crc32c_le_base(crc, data, len); + crc = crc32c_base(crc, data, len); return crc; } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *data, size_t len) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 05b4eca156cf..9427b5292ca2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -77,7 +77,8 @@ config X86 select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CPU_PASID if IOMMU_SVA select ARCH_HAS_CRC32 - select ARCH_HAS_CRC_T10DIF if X86_64 + select ARCH_HAS_CRC64 if X86_64 + select ARCH_HAS_CRC_T10DIF select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 11e95fc62636..3e9ab5cdade4 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1536,26 +1536,6 @@ DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, AES_GCM_KEY_AVX10_SIZE, 800); #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ -/* - * This is a list of CPU models that are known to suffer from downclocking when - * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode - * implementations with zmm registers won't be used by default. Implementations - * with ymm registers (256-bit vectors) will be used by default instead. - */ -static const struct x86_cpu_id zmm_exclusion_list[] = { - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), - X86_MATCH_VFM(INTEL_ICELAKE, 0), - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), - X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE, 0), - /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ - /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ - {}, -}; - static int __init register_avx_algs(void) { int err; @@ -1600,7 +1580,7 @@ static int __init register_avx_algs(void) if (err) return err; - if (x86_match_cpu(zmm_exclusion_list)) { + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { int i; aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 70e1516b15da..6c2c152d8a67 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -480,6 +480,7 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4cbb2e69bea1..cdc9813871ef 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -512,6 +512,25 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); } +/* + * This is a list of Intel CPUs that are known to suffer from downclocking when + * ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel + * executes SIMD-optimized code such as cryptography functions or CRCs, it + * should prefer 256-bit (YMM) code to 512-bit (ZMM) code. + */ +static const struct x86_cpu_id zmm_exclusion_list[] = { + X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE, 0), + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ + {}, +}; + static void init_intel(struct cpuinfo_x86 *c) { early_init_intel(c); @@ -590,6 +609,9 @@ static void init_intel(struct cpuinfo_x86 *c) } #endif + if (x86_match_cpu(zmm_exclusion_list)) + set_cpu_cap(c, X86_FEATURE_PREFER_YMM); + /* Work around errata */ srat_detect_node(c); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 64ccecedc9f8..1c50352eb49f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -42,8 +42,11 @@ obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o crc32-x86-y := crc32-glue.o crc32-pclmul.o crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o +obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o +crc64-x86-y := crc64-glue.o crc64-pclmul.o + obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o -crc-t10dif-x86-y := crc-t10dif-glue.o crct10dif-pcl-asm_64.o +crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o obj-y += iomem.o diff --git a/arch/x86/lib/crc-pclmul-consts.h b/arch/x86/lib/crc-pclmul-consts.h new file mode 100644 index 000000000000..fcc63c064333 --- /dev/null +++ b/arch/x86/lib/crc-pclmul-consts.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CRC constants generated by: + * + * ./scripts/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 + * + * Do not edit manually. + */ + +/* + * CRC folding constants generated for most-significant-bit-first CRC-16 using + * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct { + u8 bswap_mask[16]; + u64 fold_across_2048_bits_consts[2]; + u64 fold_across_1024_bits_consts[2]; + u64 fold_across_512_bits_consts[2]; + u64 fold_across_256_bits_consts[2]; + u64 fold_across_128_bits_consts[2]; + u8 shuf_table[48]; + u64 barrett_reduction_consts[2]; +} crc16_msb_0x8bb7_consts ____cacheline_aligned __maybe_unused = { + .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, + .fold_across_2048_bits_consts = { + 0xdccf000000000000, /* LO64_TERMS: (x^2000 mod G) * x^48 */ + 0x4b0b000000000000, /* HI64_TERMS: (x^2064 mod G) * x^48 */ + }, + .fold_across_1024_bits_consts = { + 0x9d9d000000000000, /* LO64_TERMS: (x^976 mod G) * x^48 */ + 0x7cf5000000000000, /* HI64_TERMS: (x^1040 mod G) * x^48 */ + }, + .fold_across_512_bits_consts = { + 0x044c000000000000, /* LO64_TERMS: (x^464 mod G) * x^48 */ + 0xe658000000000000, /* HI64_TERMS: (x^528 mod G) * x^48 */ + }, + .fold_across_256_bits_consts = { + 0x6ee3000000000000, /* LO64_TERMS: (x^208 mod G) * x^48 */ + 0xe7b5000000000000, /* HI64_TERMS: (x^272 mod G) * x^48 */ + }, + .fold_across_128_bits_consts = { + 0x2d56000000000000, /* LO64_TERMS: (x^80 mod G) * x^48 */ + 0x06df000000000000, /* HI64_TERMS: (x^144 mod G) * x^48 */ + }, + .shuf_table = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + .barrett_reduction_consts = { + 0x8bb7000000000000, /* LO64_TERMS: (G - x^16) * x^48 */ + 0xf65a57f81d33a48a, /* HI64_TERMS: (floor(x^79 / G) * x) - x^64 */ + }, +}; + +/* + * CRC folding constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct { + u64 fold_across_2048_bits_consts[2]; + u64 fold_across_1024_bits_consts[2]; + u64 fold_across_512_bits_consts[2]; + u64 fold_across_256_bits_consts[2]; + u64 fold_across_128_bits_consts[2]; + u8 shuf_table[48]; + u64 barrett_reduction_consts[2]; +} crc32_lsb_0xedb88320_consts ____cacheline_aligned __maybe_unused = { + .fold_across_2048_bits_consts = { + 0x00000000ce3371cb, /* HI64_TERMS: (x^2079 mod G) * x^32 */ + 0x00000000e95c1271, /* LO64_TERMS: (x^2015 mod G) * x^32 */ + }, + .fold_across_1024_bits_consts = { + 0x0000000033fff533, /* HI64_TERMS: (x^1055 mod G) * x^32 */ + 0x00000000910eeec1, /* LO64_TERMS: (x^991 mod G) * x^32 */ + }, + .fold_across_512_bits_consts = { + 0x000000008f352d95, /* HI64_TERMS: (x^543 mod G) * x^32 */ + 0x000000001d9513d7, /* LO64_TERMS: (x^479 mod G) * x^32 */ + }, + .fold_across_256_bits_consts = { + 0x00000000f1da05aa, /* HI64_TERMS: (x^287 mod G) * x^32 */ + 0x0000000081256527, /* LO64_TERMS: (x^223 mod G) * x^32 */ + }, + .fold_across_128_bits_consts = { + 0x00000000ae689191, /* HI64_TERMS: (x^159 mod G) * x^32 */ + 0x00000000ccaa009e, /* LO64_TERMS: (x^95 mod G) * x^32 */ + }, + .shuf_table = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + .barrett_reduction_consts = { + 0xb4e5b025f7011641, /* HI64_TERMS: floor(x^95 / G) */ + 0x00000001db710640, /* LO64_TERMS: (G - x^32) * x^31 */ + }, +}; + +/* + * CRC folding constants generated for most-significant-bit-first CRC-64 using + * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x^1 + x^0 + */ +static const struct { + u8 bswap_mask[16]; + u64 fold_across_2048_bits_consts[2]; + u64 fold_across_1024_bits_consts[2]; + u64 fold_across_512_bits_consts[2]; + u64 fold_across_256_bits_consts[2]; + u64 fold_across_128_bits_consts[2]; + u8 shuf_table[48]; + u64 barrett_reduction_consts[2]; +} crc64_msb_0x42f0e1eba9ea3693_consts ____cacheline_aligned __maybe_unused = { + .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, + .fold_across_2048_bits_consts = { + 0x7f52691a60ddc70d, /* LO64_TERMS: (x^2048 mod G) * x^0 */ + 0x7036b0389f6a0c82, /* HI64_TERMS: (x^2112 mod G) * x^0 */ + }, + .fold_across_1024_bits_consts = { + 0x05cf79dea9ac37d6, /* LO64_TERMS: (x^1024 mod G) * x^0 */ + 0x001067e571d7d5c2, /* HI64_TERMS: (x^1088 mod G) * x^0 */ + }, + .fold_across_512_bits_consts = { + 0x5f6843ca540df020, /* LO64_TERMS: (x^512 mod G) * x^0 */ + 0xddf4b6981205b83f, /* HI64_TERMS: (x^576 mod G) * x^0 */ + }, + .fold_across_256_bits_consts = { + 0x571bee0a227ef92b, /* LO64_TERMS: (x^256 mod G) * x^0 */ + 0x44bef2a201b5200c, /* HI64_TERMS: (x^320 mod G) * x^0 */ + }, + .fold_across_128_bits_consts = { + 0x05f5c3c7eb52fab6, /* LO64_TERMS: (x^128 mod G) * x^0 */ + 0x4eb938a7d257740e, /* HI64_TERMS: (x^192 mod G) * x^0 */ + }, + .shuf_table = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + .barrett_reduction_consts = { + 0x42f0e1eba9ea3693, /* LO64_TERMS: (G - x^64) * x^0 */ + 0x578d29d06cc4f872, /* HI64_TERMS: (floor(x^127 / G) * x) - x^64 */ + }, +}; + +/* + * CRC folding constants generated for least-significant-bit-first CRC-64 using + * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + + * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + + * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + + * x^4 + x^3 + x^0 + */ +static const struct { + u64 fold_across_2048_bits_consts[2]; + u64 fold_across_1024_bits_consts[2]; + u64 fold_across_512_bits_consts[2]; + u64 fold_across_256_bits_consts[2]; + u64 fold_across_128_bits_consts[2]; + u8 shuf_table[48]; + u64 barrett_reduction_consts[2]; +} crc64_lsb_0x9a6c9329ac4bc9b5_consts ____cacheline_aligned __maybe_unused = { + .fold_across_2048_bits_consts = { + 0x37ccd3e14069cabc, /* HI64_TERMS: (x^2111 mod G) * x^0 */ + 0xa043808c0f782663, /* LO64_TERMS: (x^2047 mod G) * x^0 */ + }, + .fold_across_1024_bits_consts = { + 0xa1ca681e733f9c40, /* HI64_TERMS: (x^1087 mod G) * x^0 */ + 0x5f852fb61e8d92dc, /* LO64_TERMS: (x^1023 mod G) * x^0 */ + }, + .fold_across_512_bits_consts = { + 0x0c32cdb31e18a84a, /* HI64_TERMS: (x^575 mod G) * x^0 */ + 0x62242240ace5045a, /* LO64_TERMS: (x^511 mod G) * x^0 */ + }, + .fold_across_256_bits_consts = { + 0xb0bc2e589204f500, /* HI64_TERMS: (x^319 mod G) * x^0 */ + 0xe1e0bb9d45d7a44c, /* LO64_TERMS: (x^255 mod G) * x^0 */ + }, + .fold_across_128_bits_consts = { + 0xeadc41fd2ba3d420, /* HI64_TERMS: (x^191 mod G) * x^0 */ + 0x21e9761e252621ac, /* LO64_TERMS: (x^127 mod G) * x^0 */ + }, + .shuf_table = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + .barrett_reduction_consts = { + 0x27ecfa329aef9f77, /* HI64_TERMS: floor(x^127 / G) */ + 0x34d926535897936a, /* LO64_TERMS: (G - x^64 - x^0) / x */ + }, +}; diff --git a/arch/x86/lib/crc-pclmul-template.S b/arch/x86/lib/crc-pclmul-template.S new file mode 100644 index 000000000000..ae0b6144c503 --- /dev/null +++ b/arch/x86/lib/crc-pclmul-template.S @@ -0,0 +1,582 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +// +// Template to generate [V]PCLMULQDQ-based CRC functions for x86 +// +// Copyright 2025 Google LLC +// +// Author: Eric Biggers <ebiggers@google.com> + +#include <linux/linkage.h> +#include <linux/objtool.h> + +// Offsets within the generated constants table +.set OFFSETOF_BSWAP_MASK, -5*16 // msb-first CRCs only +.set OFFSETOF_FOLD_ACROSS_2048_BITS_CONSTS, -4*16 // must precede next +.set OFFSETOF_FOLD_ACROSS_1024_BITS_CONSTS, -3*16 // must precede next +.set OFFSETOF_FOLD_ACROSS_512_BITS_CONSTS, -2*16 // must precede next +.set OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS, -1*16 // must precede next +.set OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS, 0*16 // must be 0 +.set OFFSETOF_SHUF_TABLE, 1*16 +.set OFFSETOF_BARRETT_REDUCTION_CONSTS, 4*16 + +// Emit a VEX (or EVEX) coded instruction if allowed, or emulate it using the +// corresponding non-VEX instruction plus any needed moves. The supported +// instruction formats are: +// +// - Two-arg [src, dst], where the non-VEX format is the same. +// - Three-arg [src1, src2, dst] where the non-VEX format is +// [src1, src2_and_dst]. If src2 != dst, then src1 must != dst too. +// +// \insn gives the instruction without a "v" prefix and including any immediate +// argument if needed to make the instruction follow one of the above formats. +// If \unaligned_mem_tmp is given, then the emitted non-VEX code moves \arg1 to +// it first; this is needed when \arg1 is an unaligned mem operand. +.macro _cond_vex insn:req, arg1:req, arg2:req, arg3, unaligned_mem_tmp +.if AVX_LEVEL == 0 + // VEX not allowed. Emulate it. + .ifnb \arg3 // Three-arg [src1, src2, dst] + .ifc "\arg2", "\arg3" // src2 == dst? + .ifnb \unaligned_mem_tmp + movdqu \arg1, \unaligned_mem_tmp + \insn \unaligned_mem_tmp, \arg3 + .else + \insn \arg1, \arg3 + .endif + .else // src2 != dst + .ifc "\arg1", "\arg3" + .error "Can't have src1 == dst when src2 != dst" + .endif + .ifnb \unaligned_mem_tmp + movdqu \arg1, \unaligned_mem_tmp + movdqa \arg2, \arg3 + \insn \unaligned_mem_tmp, \arg3 + .else + movdqa \arg2, \arg3 + \insn \arg1, \arg3 + .endif + .endif + .else // Two-arg [src, dst] + .ifnb \unaligned_mem_tmp + movdqu \arg1, \unaligned_mem_tmp + \insn \unaligned_mem_tmp, \arg2 + .else + \insn \arg1, \arg2 + .endif + .endif +.else + // VEX is allowed. Emit the desired instruction directly. + .ifnb \arg3 + v\insn \arg1, \arg2, \arg3 + .else + v\insn \arg1, \arg2 + .endif +.endif +.endm + +// Broadcast an aligned 128-bit mem operand to all 128-bit lanes of a vector +// register of length VL. +.macro _vbroadcast src, dst +.if VL == 16 + _cond_vex movdqa, \src, \dst +.elseif VL == 32 + vbroadcasti128 \src, \dst +.else + vbroadcasti32x4 \src, \dst +.endif +.endm + +// Load \vl bytes from the unaligned mem operand \src into \dst, and if the CRC +// is msb-first use \bswap_mask to reflect the bytes within each 128-bit lane. +.macro _load_data vl, src, bswap_mask, dst +.if \vl < 64 + _cond_vex movdqu, "\src", \dst +.else + vmovdqu8 \src, \dst +.endif +.if !LSB_CRC + _cond_vex pshufb, \bswap_mask, \dst, \dst +.endif +.endm + +.macro _prepare_v0 vl, v0, v1, bswap_mask +.if LSB_CRC + .if \vl < 64 + _cond_vex pxor, (BUF), \v0, \v0, unaligned_mem_tmp=\v1 + .else + vpxorq (BUF), \v0, \v0 + .endif +.else + _load_data \vl, (BUF), \bswap_mask, \v1 + .if \vl < 64 + _cond_vex pxor, \v1, \v0, \v0 + .else + vpxorq \v1, \v0, \v0 + .endif +.endif +.endm + +// The x^0..x^63 terms, i.e. poly128 mod x^64, i.e. the physically low qword for +// msb-first order or the physically high qword for lsb-first order +#define LO64_TERMS 0 + +// The x^64..x^127 terms, i.e. floor(poly128 / x^64), i.e. the physically high +// qword for msb-first order or the physically low qword for lsb-first order +#define HI64_TERMS 1 + +// Multiply the given \src1_terms of each 128-bit lane of \src1 by the given +// \src2_terms of each 128-bit lane of \src2, and write the result(s) to \dst. +.macro _pclmulqdq src1, src1_terms, src2, src2_terms, dst + _cond_vex "pclmulqdq $((\src1_terms ^ LSB_CRC) << 4) ^ (\src2_terms ^ LSB_CRC),", \ + \src1, \src2, \dst +.endm + +// Fold \acc into \data and store the result back into \acc. \data can be an +// unaligned mem operand if using VEX is allowed and the CRC is lsb-first so no +// byte-reflection is needed; otherwise it must be a vector register. \consts +// is a vector register containing the needed fold constants, and \tmp is a +// temporary vector register. All arguments must be the same length. +.macro _fold_vec acc, data, consts, tmp + _pclmulqdq \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp + _pclmulqdq \consts, LO64_TERMS, \acc, LO64_TERMS, \acc +.if AVX_LEVEL <= 2 + _cond_vex pxor, \data, \tmp, \tmp + _cond_vex pxor, \tmp, \acc, \acc +.else + vpternlogq $0x96, \data, \tmp, \acc +.endif +.endm + +// Fold \acc into \data and store the result back into \acc. \data is an +// unaligned mem operand, \consts is a vector register containing the needed +// fold constants, \bswap_mask is a vector register containing the +// byte-reflection table if the CRC is msb-first, and \tmp1 and \tmp2 are +// temporary vector registers. All arguments must have length \vl. +.macro _fold_vec_mem vl, acc, data, consts, bswap_mask, tmp1, tmp2 +.if AVX_LEVEL == 0 || !LSB_CRC + _load_data \vl, \data, \bswap_mask, \tmp1 + _fold_vec \acc, \tmp1, \consts, \tmp2 +.else + _fold_vec \acc, \data, \consts, \tmp1 +.endif +.endm + +// Load the constants for folding across 2**i vectors of length VL at a time +// into all 128-bit lanes of the vector register CONSTS. +.macro _load_vec_folding_consts i + _vbroadcast OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS+(4-LOG2_VL-\i)*16(CONSTS_PTR), \ + CONSTS +.endm + +// Given vector registers \v0 and \v1 of length \vl, fold \v0 into \v1 and store +// the result back into \v0. If the remaining length mod \vl is nonzero, also +// fold \vl data bytes from BUF. For both operations the fold distance is \vl. +// \consts must be a register of length \vl containing the fold constants. +.macro _fold_vec_final vl, v0, v1, consts, bswap_mask, tmp1, tmp2 + _fold_vec \v0, \v1, \consts, \tmp1 + test $\vl, LEN8 + jz .Lfold_vec_final_done\@ + _fold_vec_mem \vl, \v0, (BUF), \consts, \bswap_mask, \tmp1, \tmp2 + add $\vl, BUF +.Lfold_vec_final_done\@: +.endm + +// This macro generates the body of a CRC function with the following prototype: +// +// crc_t crc_func(crc_t crc, const u8 *buf, size_t len, const void *consts); +// +// |crc| is the initial CRC, and crc_t is a data type wide enough to hold it. +// |buf| is the data to checksum. |len| is the data length in bytes, which must +// be at least 16. |consts| is a pointer to the fold_across_128_bits_consts +// field of the constants struct that was generated for the chosen CRC variant. +// +// Moving onto the macro parameters, \n is the number of bits in the CRC, e.g. +// 32 for a CRC-32. Currently the supported values are 8, 16, 32, and 64. If +// the file is compiled in i386 mode, then the maximum supported value is 32. +// +// \lsb_crc is 1 if the CRC processes the least significant bit of each byte +// first, i.e. maps bit0 to x^7, bit1 to x^6, ..., bit7 to x^0. \lsb_crc is 0 +// if the CRC processes the most significant bit of each byte first, i.e. maps +// bit0 to x^0, bit1 to x^1, bit7 to x^7. +// +// \vl is the maximum length of vector register to use in bytes: 16, 32, or 64. +// +// \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or +// 512 for AVX512. +// +// If \vl == 16 && \avx_level == 0, the generated code requires: +// PCLMULQDQ && SSE4.1. (Note: all known CPUs with PCLMULQDQ also have SSE4.1.) +// +// If \vl == 32 && \avx_level == 2, the generated code requires: +// VPCLMULQDQ && AVX2. +// +// If \vl == 64 && \avx_level == 512, the generated code requires: +// VPCLMULQDQ && AVX512BW && AVX512VL. +// +// Other \vl and \avx_level combinations are either not supported or not useful. +.macro _crc_pclmul n, lsb_crc, vl, avx_level + .set LSB_CRC, \lsb_crc + .set VL, \vl + .set AVX_LEVEL, \avx_level + + // Define aliases for the xmm, ymm, or zmm registers according to VL. +.irp i, 0,1,2,3,4,5,6,7 + .if VL == 16 + .set V\i, %xmm\i + .set LOG2_VL, 4 + .elseif VL == 32 + .set V\i, %ymm\i + .set LOG2_VL, 5 + .elseif VL == 64 + .set V\i, %zmm\i + .set LOG2_VL, 6 + .else + .error "Unsupported vector length" + .endif +.endr + // Define aliases for the function parameters. + // Note: when crc_t is shorter than u32, zero-extension to 32 bits is + // guaranteed by the ABI. Zero-extension to 64 bits is *not* guaranteed + // when crc_t is shorter than u64. +#ifdef __x86_64__ +.if \n <= 32 + .set CRC, %edi +.else + .set CRC, %rdi +.endif + .set BUF, %rsi + .set LEN, %rdx + .set LEN32, %edx + .set LEN8, %dl + .set CONSTS_PTR, %rcx +#else + // 32-bit support, assuming -mregparm=3 and not including support for + // CRC-64 (which would use both eax and edx to pass the crc parameter). + .set CRC, %eax + .set BUF, %edx + .set LEN, %ecx + .set LEN32, %ecx + .set LEN8, %cl + .set CONSTS_PTR, %ebx // Passed on stack +#endif + + // Define aliases for some local variables. V0-V5 are used without + // aliases (for accumulators, data, temporary values, etc). Staying + // within the first 8 vector registers keeps the code 32-bit SSE + // compatible and reduces the size of 64-bit SSE code slightly. + .set BSWAP_MASK, V6 + .set BSWAP_MASK_YMM, %ymm6 + .set BSWAP_MASK_XMM, %xmm6 + .set CONSTS, V7 + .set CONSTS_YMM, %ymm7 + .set CONSTS_XMM, %xmm7 + + // Use ANNOTATE_NOENDBR to suppress an objtool warning, since the + // functions generated by this macro are called only by static_call. + ANNOTATE_NOENDBR + +#ifdef __i386__ + push CONSTS_PTR + mov 8(%esp), CONSTS_PTR +#endif + + // Create a 128-bit vector that contains the initial CRC in the end + // representing the high-order polynomial coefficients, and the rest 0. + // If the CRC is msb-first, also load the byte-reflection table. +.if \n <= 32 + _cond_vex movd, CRC, %xmm0 +.else + _cond_vex movq, CRC, %xmm0 +.endif +.if !LSB_CRC + _cond_vex pslldq, $(128-\n)/8, %xmm0, %xmm0 + _vbroadcast OFFSETOF_BSWAP_MASK(CONSTS_PTR), BSWAP_MASK +.endif + + // Load the first vector of data and XOR the initial CRC into the + // appropriate end of the first 128-bit lane of data. If LEN < VL, then + // use a short vector and jump ahead to the final reduction. (LEN >= 16 + // is guaranteed here but not necessarily LEN >= VL.) +.if VL >= 32 + cmp $VL, LEN + jae .Lat_least_1vec\@ + .if VL == 64 + cmp $32, LEN32 + jb .Lless_than_32bytes\@ + _prepare_v0 32, %ymm0, %ymm1, BSWAP_MASK_YMM + add $32, BUF + jmp .Lreduce_256bits_to_128bits\@ +.Lless_than_32bytes\@: + .endif + _prepare_v0 16, %xmm0, %xmm1, BSWAP_MASK_XMM + add $16, BUF + vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM + jmp .Lcheck_for_partial_block\@ +.Lat_least_1vec\@: +.endif + _prepare_v0 VL, V0, V1, BSWAP_MASK + + // Handle VL <= LEN < 4*VL. + cmp $4*VL-1, LEN + ja .Lat_least_4vecs\@ + add $VL, BUF + // If VL <= LEN < 2*VL, then jump ahead to the reduction from 1 vector. + // If VL==16 then load fold_across_128_bits_consts first, as the final + // reduction depends on it and it won't be loaded anywhere else. + cmp $2*VL-1, LEN32 +.if VL == 16 + _cond_vex movdqa, OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM +.endif + jbe .Lreduce_1vec_to_128bits\@ + // Otherwise 2*VL <= LEN < 4*VL. Load one more vector and jump ahead to + // the reduction from 2 vectors. + _load_data VL, (BUF), BSWAP_MASK, V1 + add $VL, BUF + jmp .Lreduce_2vecs_to_1\@ + +.Lat_least_4vecs\@: + // Load 3 more vectors of data. + _load_data VL, 1*VL(BUF), BSWAP_MASK, V1 + _load_data VL, 2*VL(BUF), BSWAP_MASK, V2 + _load_data VL, 3*VL(BUF), BSWAP_MASK, V3 + sub $-4*VL, BUF // Shorter than 'add 4*VL' when VL=32 + add $-4*VL, LEN // Shorter than 'sub 4*VL' when VL=32 + + // Main loop: while LEN >= 4*VL, fold the 4 vectors V0-V3 into the next + // 4 vectors of data and write the result back to V0-V3. + cmp $4*VL-1, LEN // Shorter than 'cmp 4*VL' when VL=32 + jbe .Lreduce_4vecs_to_2\@ + _load_vec_folding_consts 2 +.Lfold_4vecs_loop\@: + _fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 + _fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 + _fold_vec_mem VL, V2, 2*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 + _fold_vec_mem VL, V3, 3*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 + sub $-4*VL, BUF + add $-4*VL, LEN + cmp $4*VL-1, LEN + ja .Lfold_4vecs_loop\@ + + // Fold V0,V1 into V2,V3 and write the result back to V0,V1. Then fold + // two more vectors of data from BUF, if at least that much remains. +.Lreduce_4vecs_to_2\@: + _load_vec_folding_consts 1 + _fold_vec V0, V2, CONSTS, V4 + _fold_vec V1, V3, CONSTS, V4 + test $2*VL, LEN8 + jz .Lreduce_2vecs_to_1\@ + _fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 + _fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 + sub $-2*VL, BUF + + // Fold V0 into V1 and write the result back to V0. Then fold one more + // vector of data from BUF, if at least that much remains. +.Lreduce_2vecs_to_1\@: + _load_vec_folding_consts 0 + _fold_vec_final VL, V0, V1, CONSTS, BSWAP_MASK, V4, V5 + +.Lreduce_1vec_to_128bits\@: +.if VL == 64 + // Reduce 512-bit %zmm0 to 256-bit %ymm0. Then fold 256 more bits of + // data from BUF, if at least that much remains. + vbroadcasti128 OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS(CONSTS_PTR), CONSTS_YMM + vextracti64x4 $1, %zmm0, %ymm1 + _fold_vec_final 32, %ymm0, %ymm1, CONSTS_YMM, BSWAP_MASK_YMM, %ymm4, %ymm5 +.Lreduce_256bits_to_128bits\@: +.endif +.if VL >= 32 + // Reduce 256-bit %ymm0 to 128-bit %xmm0. Then fold 128 more bits of + // data from BUF, if at least that much remains. + vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM + vextracti128 $1, %ymm0, %xmm1 + _fold_vec_final 16, %xmm0, %xmm1, CONSTS_XMM, BSWAP_MASK_XMM, %xmm4, %xmm5 +.Lcheck_for_partial_block\@: +.endif + and $15, LEN32 + jz .Lreduce_128bits_to_crc\@ + + // 1 <= LEN <= 15 data bytes remain in BUF. The polynomial is now + // A*(x^(8*LEN)) + B, where A is the 128-bit polynomial stored in %xmm0 + // and B is the polynomial of the remaining LEN data bytes. To reduce + // this to 128 bits without needing fold constants for each possible + // LEN, rearrange this expression into C1*(x^128) + C2, where + // C1 = floor(A / x^(128 - 8*LEN)) and C2 = A*x^(8*LEN) + B mod x^128. + // Then fold C1 into C2, which is just another fold across 128 bits. + +.if !LSB_CRC || AVX_LEVEL == 0 + // Load the last 16 data bytes. Note that originally LEN was >= 16. + _load_data 16, "-16(BUF,LEN)", BSWAP_MASK_XMM, %xmm2 +.endif // Else will use vpblendvb mem operand later. +.if !LSB_CRC + neg LEN // Needed for indexing shuf_table +.endif + + // tmp = A*x^(8*LEN) mod x^128 + // lsb: pshufb by [LEN, LEN+1, ..., 15, -1, -1, ..., -1] + // i.e. right-shift by LEN bytes. + // msb: pshufb by [-1, -1, ..., -1, 0, 1, ..., 15-LEN] + // i.e. left-shift by LEN bytes. + _cond_vex movdqu, "OFFSETOF_SHUF_TABLE+16(CONSTS_PTR,LEN)", %xmm3 + _cond_vex pshufb, %xmm3, %xmm0, %xmm1 + + // C1 = floor(A / x^(128 - 8*LEN)) + // lsb: pshufb by [-1, -1, ..., -1, 0, 1, ..., LEN-1] + // i.e. left-shift by 16-LEN bytes. + // msb: pshufb by [16-LEN, 16-LEN+1, ..., 15, -1, -1, ..., -1] + // i.e. right-shift by 16-LEN bytes. + _cond_vex pshufb, "OFFSETOF_SHUF_TABLE+32*!LSB_CRC(CONSTS_PTR,LEN)", \ + %xmm0, %xmm0, unaligned_mem_tmp=%xmm4 + + // C2 = tmp + B. This is just a blend of tmp with the last 16 data + // bytes (reflected if msb-first). The blend mask is the shuffle table + // that was used to create tmp. 0 selects tmp, and 1 last16databytes. +.if AVX_LEVEL == 0 + movdqa %xmm0, %xmm4 + movdqa %xmm3, %xmm0 + pblendvb %xmm2, %xmm1 // uses %xmm0 as implicit operand + movdqa %xmm4, %xmm0 +.elseif LSB_CRC + vpblendvb %xmm3, -16(BUF,LEN), %xmm1, %xmm1 +.else + vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 +.endif + + // Fold C1 into C2 and store the 128-bit result in %xmm0. + _fold_vec %xmm0, %xmm1, CONSTS_XMM, %xmm4 + +.Lreduce_128bits_to_crc\@: + // Compute the CRC as %xmm0 * x^n mod G. Here %xmm0 means the 128-bit + // polynomial stored in %xmm0 (using either lsb-first or msb-first bit + // order according to LSB_CRC), and G is the CRC's generator polynomial. + + // First, multiply %xmm0 by x^n and reduce the result to 64+n bits: + // + // t0 := (x^(64+n) mod G) * floor(%xmm0 / x^64) + + // x^n * (%xmm0 mod x^64) + // + // Store t0 * x^(64-n) in %xmm0. I.e., actually do: + // + // %xmm0 := ((x^(64+n) mod G) * x^(64-n)) * floor(%xmm0 / x^64) + + // x^64 * (%xmm0 mod x^64) + // + // The extra unreduced factor of x^(64-n) makes floor(t0 / x^n) aligned + // to the HI64_TERMS of %xmm0 so that the next pclmulqdq can easily + // select it. The 64-bit constant (x^(64+n) mod G) * x^(64-n) in the + // msb-first case, or (x^(63+n) mod G) * x^(64-n) in the lsb-first case + // (considering the extra factor of x that gets implicitly introduced by + // each pclmulqdq when using lsb-first order), is identical to the + // constant that was used earlier for folding the LO64_TERMS across 128 + // bits. Thus it's already available in LO64_TERMS of CONSTS_XMM. + _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm0, HI64_TERMS, %xmm1 +.if LSB_CRC + _cond_vex psrldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64) +.else + _cond_vex pslldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64) +.endif + _cond_vex pxor, %xmm1, %xmm0, %xmm0 + // The HI64_TERMS of %xmm0 now contain floor(t0 / x^n). + // The LO64_TERMS of %xmm0 now contain (t0 mod x^n) * x^(64-n). + + // First step of Barrett reduction: Compute floor(t0 / G). This is the + // polynomial by which G needs to be multiplied to cancel out the x^n + // and higher terms of t0, i.e. to reduce t0 mod G. First do: + // + // t1 := floor(x^(63+n) / G) * x * floor(t0 / x^n) + // + // Then the desired value floor(t0 / G) is floor(t1 / x^64). The 63 in + // x^(63+n) is the maximum degree of floor(t0 / x^n) and thus the lowest + // value that makes enough precision be carried through the calculation. + // + // The '* x' makes it so the result is floor(t1 / x^64) rather than + // floor(t1 / x^63), making it qword-aligned in HI64_TERMS so that it + // can be extracted much more easily in the next step. In the lsb-first + // case the '* x' happens implicitly. In the msb-first case it must be + // done explicitly; floor(x^(63+n) / G) * x is a 65-bit constant, so the + // constant passed to pclmulqdq is (floor(x^(63+n) / G) * x) - x^64, and + // the multiplication by the x^64 term is handled using a pxor. The + // pxor causes the low 64 terms of t1 to be wrong, but they are unused. + _cond_vex movdqa, OFFSETOF_BARRETT_REDUCTION_CONSTS(CONSTS_PTR), CONSTS_XMM + _pclmulqdq CONSTS_XMM, HI64_TERMS, %xmm0, HI64_TERMS, %xmm1 +.if !LSB_CRC + _cond_vex pxor, %xmm0, %xmm1, %xmm1 // += x^64 * floor(t0 / x^n) +.endif + // The HI64_TERMS of %xmm1 now contain floor(t1 / x^64) = floor(t0 / G). + + // Second step of Barrett reduction: Cancel out the x^n and higher terms + // of t0 by subtracting the needed multiple of G. This gives the CRC: + // + // crc := t0 - (G * floor(t0 / G)) + // + // But %xmm0 contains t0 * x^(64-n), so it's more convenient to do: + // + // crc := ((t0 * x^(64-n)) - ((G * x^(64-n)) * floor(t0 / G))) / x^(64-n) + // + // Furthermore, since the resulting CRC is n-bit, if mod x^n is + // explicitly applied to it then the x^n term of G makes no difference + // in the result and can be omitted. This helps keep the constant + // multiplier in 64 bits in most cases. This gives the following: + // + // %xmm0 := %xmm0 - (((G - x^n) * x^(64-n)) * floor(t0 / G)) + // crc := (%xmm0 / x^(64-n)) mod x^n + // + // In the lsb-first case, each pclmulqdq implicitly introduces + // an extra factor of x, so in that case the constant that needs to be + // passed to pclmulqdq is actually '(G - x^n) * x^(63-n)' when n <= 63. + // For lsb-first CRCs where n=64, the extra factor of x cannot be as + // easily avoided. In that case, instead pass '(G - x^n - x^0) / x' to + // pclmulqdq and handle the x^0 term (i.e. 1) separately. (All CRC + // polynomials have nonzero x^n and x^0 terms.) It works out as: the + // CRC has be XORed with the physically low qword of %xmm1, representing + // floor(t0 / G). The most efficient way to do that is to move it to + // the physically high qword and use a ternlog to combine the two XORs. +.if LSB_CRC && \n == 64 + _cond_vex punpcklqdq, %xmm1, %xmm2, %xmm2 + _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1 + .if AVX_LEVEL <= 2 + _cond_vex pxor, %xmm2, %xmm0, %xmm0 + _cond_vex pxor, %xmm1, %xmm0, %xmm0 + .else + vpternlogq $0x96, %xmm2, %xmm1, %xmm0 + .endif + _cond_vex "pextrq $1,", %xmm0, %rax // (%xmm0 / x^0) mod x^64 +.else + _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1 + _cond_vex pxor, %xmm1, %xmm0, %xmm0 + .if \n == 8 + _cond_vex "pextrb $7 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^56) mod x^8 + .elseif \n == 16 + _cond_vex "pextrw $3 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^48) mod x^16 + .elseif \n == 32 + _cond_vex "pextrd $1 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^32) mod x^32 + .else // \n == 64 && !LSB_CRC + _cond_vex movq, %xmm0, %rax // (%xmm0 / x^0) mod x^64 + .endif +.endif + +.if VL > 16 + vzeroupper // Needed when ymm or zmm registers may have been used. +.endif +#ifdef __i386__ + pop CONSTS_PTR +#endif + RET +.endm + +#ifdef CONFIG_AS_VPCLMULQDQ +#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \ +SYM_FUNC_START(prefix##_pclmul_sse); \ + _crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \ +SYM_FUNC_END(prefix##_pclmul_sse); \ + \ +SYM_FUNC_START(prefix##_vpclmul_avx2); \ + _crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=2; \ +SYM_FUNC_END(prefix##_vpclmul_avx2); \ + \ +SYM_FUNC_START(prefix##_vpclmul_avx512); \ + _crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=512; \ +SYM_FUNC_END(prefix##_vpclmul_avx512); +#else +#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \ +SYM_FUNC_START(prefix##_pclmul_sse); \ + _crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \ +SYM_FUNC_END(prefix##_pclmul_sse); +#endif // !CONFIG_AS_VPCLMULQDQ diff --git a/arch/x86/lib/crc-pclmul-template.h b/arch/x86/lib/crc-pclmul-template.h new file mode 100644 index 000000000000..c5b3bfe11d8d --- /dev/null +++ b/arch/x86/lib/crc-pclmul-template.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Macros for accessing the [V]PCLMULQDQ-based CRC functions that are + * instantiated by crc-pclmul-template.S + * + * Copyright 2025 Google LLC + * + * Author: Eric Biggers <ebiggers@google.com> + */ +#ifndef _CRC_PCLMUL_TEMPLATE_H +#define _CRC_PCLMUL_TEMPLATE_H + +#include <asm/cpufeatures.h> +#include <asm/simd.h> +#include <crypto/internal/simd.h> +#include <linux/static_call.h> +#include "crc-pclmul-consts.h" + +#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \ +crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \ + const void *consts_ptr); \ +crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \ + const void *consts_ptr); \ +crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \ + const void *consts_ptr); \ +DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse) + +#define INIT_CRC_PCLMUL(prefix) \ +do { \ + if (IS_ENABLED(CONFIG_AS_VPCLMULQDQ) && \ + boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && \ + boot_cpu_has(X86_FEATURE_AVX2) && \ + cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \ + if (boot_cpu_has(X86_FEATURE_AVX512BW) && \ + boot_cpu_has(X86_FEATURE_AVX512VL) && \ + !boot_cpu_has(X86_FEATURE_PREFER_YMM) && \ + cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \ + static_call_update(prefix##_pclmul, \ + prefix##_vpclmul_avx512); \ + } else { \ + static_call_update(prefix##_pclmul, \ + prefix##_vpclmul_avx2); \ + } \ + } \ +} while (0) + +/* + * Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16 + * bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD. + * + * 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions. + * There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(), + * varying by CPU and factors such as which parts of the "FPU" state userspace + * has touched, which could result in a larger cutoff being better. Indeed, a + * larger cutoff is usually better for a *single* message. However, the + * overhead of the FPU section gets amortized if multiple FPU sections get + * executed before returning to userspace, since the XSAVE and XRSTOR occur only + * once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on + * the dcache than the table-based code is, a 16-byte cutoff seems to work well. + */ +#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \ +do { \ + if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \ + crypto_simd_usable()) { \ + const void *consts_ptr; \ + \ + consts_ptr = (consts).fold_across_128_bits_consts; \ + kernel_fpu_begin(); \ + crc = static_call(prefix##_pclmul)((crc), (p), (len), \ + consts_ptr); \ + kernel_fpu_end(); \ + return crc; \ + } \ +} while (0) + +#endif /* _CRC_PCLMUL_TEMPLATE_H */ diff --git a/arch/x86/lib/crc-t10dif-glue.c b/arch/x86/lib/crc-t10dif-glue.c index 13f07ddc9122..f89c335cde3c 100644 --- a/arch/x86/lib/crc-t10dif-glue.c +++ b/arch/x86/lib/crc-t10dif-glue.c @@ -1,37 +1,32 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * CRC-T10DIF using PCLMULQDQ instructions + * CRC-T10DIF using [V]PCLMULQDQ instructions * * Copyright 2024 Google LLC */ -#include <asm/cpufeatures.h> -#include <asm/simd.h> -#include <crypto/internal/simd.h> #include <linux/crc-t10dif.h> #include <linux/module.h> +#include "crc-pclmul-template.h" static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); -asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); +DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16); u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) { - if (len >= 16 && - static_key_enabled(&have_pclmulqdq) && crypto_simd_usable()) { - kernel_fpu_begin(); - crc = crc_t10dif_pcl(crc, p, len); - kernel_fpu_end(); - return crc; - } + CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts, + have_pclmulqdq); return crc_t10dif_generic(crc, p, len); } EXPORT_SYMBOL(crc_t10dif_arch); static int __init crc_t10dif_x86_init(void) { - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); + INIT_CRC_PCLMUL(crc16_msb); + } return 0; } arch_initcall(crc_t10dif_x86_init); @@ -41,11 +36,5 @@ static void __exit crc_t10dif_x86_exit(void) } module_exit(crc_t10dif_x86_exit); -bool crc_t10dif_is_optimized(void) -{ - return static_key_enabled(&have_pclmulqdq); -} -EXPORT_SYMBOL(crc_t10dif_is_optimized); - -MODULE_DESCRIPTION("CRC-T10DIF using PCLMULQDQ instructions"); +MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions"); MODULE_LICENSE("GPL"); diff --git a/arch/x86/lib/crc16-msb-pclmul.S b/arch/x86/lib/crc16-msb-pclmul.S new file mode 100644 index 000000000000..e9fe248093a8 --- /dev/null +++ b/arch/x86/lib/crc16-msb-pclmul.S @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +// Copyright 2025 Google LLC + +#include "crc-pclmul-template.S" + +DEFINE_CRC_PCLMUL_FUNCS(crc16_msb, /* bits= */ 16, /* lsb= */ 0) diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c index 2dd18a886ded..e3f93b17ac3f 100644 --- a/arch/x86/lib/crc32-glue.c +++ b/arch/x86/lib/crc32-glue.c @@ -7,43 +7,20 @@ * Copyright 2024 Google LLC */ -#include <asm/cpufeatures.h> -#include <asm/simd.h> -#include <crypto/internal/simd.h> #include <linux/crc32.h> -#include <linux/linkage.h> #include <linux/module.h> - -/* minimum size of buffer for crc32_pclmul_le_16 */ -#define CRC32_PCLMUL_MIN_LEN 64 +#include "crc-pclmul-template.h" static DEFINE_STATIC_KEY_FALSE(have_crc32); static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); -u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); +DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32); u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) { - if (len >= CRC32_PCLMUL_MIN_LEN + 15 && - static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { - size_t n = -(uintptr_t)p & 15; - - /* align p to 16-byte boundary */ - if (n) { - crc = crc32_le_base(crc, p, n); - p += n; - len -= n; - } - n = round_down(len, 16); - kernel_fpu_begin(); - crc = crc32_pclmul_le_16(crc, p, n); - kernel_fpu_end(); - p += n; - len -= n; - } - if (len) - crc = crc32_le_base(crc, p, len); - return crc; + CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts, + have_pclmulqdq); + return crc32_le_base(crc, p, len); } EXPORT_SYMBOL(crc32_le_arch); @@ -61,12 +38,12 @@ EXPORT_SYMBOL(crc32_le_arch); asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); -u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { size_t num_longs; if (!static_branch_likely(&have_crc32)) - return crc32c_le_base(crc, p, len); + return crc32c_base(crc, p, len); if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { @@ -78,14 +55,22 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) for (num_longs = len / sizeof(unsigned long); num_longs != 0; num_longs--, p += sizeof(unsigned long)) - asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); + asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p)); - for (len %= sizeof(unsigned long); len; len--, p++) - asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); + if (sizeof(unsigned long) > 4 && (len & 4)) { + asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p)); + p += 4; + } + if (len & 2) { + asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p)); + p += 2; + } + if (len & 1) + asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p)); return crc; } -EXPORT_SYMBOL(crc32c_le_arch); +EXPORT_SYMBOL(crc32c_arch); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { @@ -97,8 +82,10 @@ static int __init crc32_x86_init(void) { if (boot_cpu_has(X86_FEATURE_XMM4_2)) static_branch_enable(&have_crc32); - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); + INIT_CRC_PCLMUL(crc32_lsb); + } return 0; } arch_initcall(crc32_x86_init); diff --git a/arch/x86/lib/crc32-pclmul.S b/arch/x86/lib/crc32-pclmul.S index f9637789cac1..f20f40fb0172 100644 --- a/arch/x86/lib/crc32-pclmul.S +++ b/arch/x86/lib/crc32-pclmul.S @@ -1,217 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2012 Xyratex Technology Limited - * - * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 - * calculation. - * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) - * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found - * at: - * http://www.intel.com/products/processor/manuals/ - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual - * Volume 2B: Instruction Set Reference, N-Z - * - * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> - * Alexander Boyko <Alexander_Boyko@xyratex.com> - */ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +// Copyright 2025 Google LLC -#include <linux/linkage.h> +#include "crc-pclmul-template.S" - -.section .rodata -.align 16 -/* - * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 - * #define CONSTANT_R1 0x154442bd4LL - * - * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 - * #define CONSTANT_R2 0x1c6e41596LL - */ -.Lconstant_R2R1: - .octa 0x00000001c6e415960000000154442bd4 -/* - * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 - * #define CONSTANT_R3 0x1751997d0LL - * - * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e - * #define CONSTANT_R4 0x0ccaa009eLL - */ -.Lconstant_R4R3: - .octa 0x00000000ccaa009e00000001751997d0 -/* - * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 - * #define CONSTANT_R5 0x163cd6124LL - */ -.Lconstant_R5: - .octa 0x00000000000000000000000163cd6124 -.Lconstant_mask32: - .octa 0x000000000000000000000000FFFFFFFF -/* - * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL - * - * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL - * #define CONSTANT_RU 0x1F7011641LL - */ -.Lconstant_RUpoly: - .octa 0x00000001F701164100000001DB710641 - -#define CONSTANT %xmm0 - -#ifdef __x86_64__ -#define CRC %edi -#define BUF %rsi -#define LEN %rdx -#else -#define CRC %eax -#define BUF %edx -#define LEN %ecx -#endif - - - -.text -/** - * Calculate crc32 - * CRC - initial crc32 - * BUF - buffer (16 bytes aligned) - * LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63 - * return %eax crc32 - * u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); - */ - -SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ - movdqa (BUF), %xmm1 - movdqa 0x10(BUF), %xmm2 - movdqa 0x20(BUF), %xmm3 - movdqa 0x30(BUF), %xmm4 - movd CRC, CONSTANT - pxor CONSTANT, %xmm1 - sub $0x40, LEN - add $0x40, BUF - cmp $0x40, LEN - jb .Lless_64 - -#ifdef __x86_64__ - movdqa .Lconstant_R2R1(%rip), CONSTANT -#else - movdqa .Lconstant_R2R1, CONSTANT -#endif - -.Lloop_64:/* 64 bytes Full cache line folding */ - prefetchnta 0x40(BUF) - movdqa %xmm1, %xmm5 - movdqa %xmm2, %xmm6 - movdqa %xmm3, %xmm7 -#ifdef __x86_64__ - movdqa %xmm4, %xmm8 -#endif - pclmulqdq $0x00, CONSTANT, %xmm1 - pclmulqdq $0x00, CONSTANT, %xmm2 - pclmulqdq $0x00, CONSTANT, %xmm3 -#ifdef __x86_64__ - pclmulqdq $0x00, CONSTANT, %xmm4 -#endif - pclmulqdq $0x11, CONSTANT, %xmm5 - pclmulqdq $0x11, CONSTANT, %xmm6 - pclmulqdq $0x11, CONSTANT, %xmm7 -#ifdef __x86_64__ - pclmulqdq $0x11, CONSTANT, %xmm8 -#endif - pxor %xmm5, %xmm1 - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 -#ifdef __x86_64__ - pxor %xmm8, %xmm4 -#else - /* xmm8 unsupported for x32 */ - movdqa %xmm4, %xmm5 - pclmulqdq $0x00, CONSTANT, %xmm4 - pclmulqdq $0x11, CONSTANT, %xmm5 - pxor %xmm5, %xmm4 -#endif - - pxor (BUF), %xmm1 - pxor 0x10(BUF), %xmm2 - pxor 0x20(BUF), %xmm3 - pxor 0x30(BUF), %xmm4 - - sub $0x40, LEN - add $0x40, BUF - cmp $0x40, LEN - jge .Lloop_64 -.Lless_64:/* Folding cache line into 128bit */ -#ifdef __x86_64__ - movdqa .Lconstant_R4R3(%rip), CONSTANT -#else - movdqa .Lconstant_R4R3, CONSTANT -#endif - prefetchnta (BUF) - - movdqa %xmm1, %xmm5 - pclmulqdq $0x00, CONSTANT, %xmm1 - pclmulqdq $0x11, CONSTANT, %xmm5 - pxor %xmm5, %xmm1 - pxor %xmm2, %xmm1 - - movdqa %xmm1, %xmm5 - pclmulqdq $0x00, CONSTANT, %xmm1 - pclmulqdq $0x11, CONSTANT, %xmm5 - pxor %xmm5, %xmm1 - pxor %xmm3, %xmm1 - - movdqa %xmm1, %xmm5 - pclmulqdq $0x00, CONSTANT, %xmm1 - pclmulqdq $0x11, CONSTANT, %xmm5 - pxor %xmm5, %xmm1 - pxor %xmm4, %xmm1 - - cmp $0x10, LEN - jb .Lfold_64 -.Lloop_16:/* Folding rest buffer into 128bit */ - movdqa %xmm1, %xmm5 - pclmulqdq $0x00, CONSTANT, %xmm1 - pclmulqdq $0x11, CONSTANT, %xmm5 - pxor %xmm5, %xmm1 - pxor (BUF), %xmm1 - sub $0x10, LEN - add $0x10, BUF - cmp $0x10, LEN - jge .Lloop_16 - -.Lfold_64: - /* perform the last 64 bit fold, also adds 32 zeroes - * to the input stream */ - pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ - psrldq $0x08, %xmm1 - pxor CONSTANT, %xmm1 - - /* final 32-bit fold */ - movdqa %xmm1, %xmm2 -#ifdef __x86_64__ - movdqa .Lconstant_R5(%rip), CONSTANT - movdqa .Lconstant_mask32(%rip), %xmm3 -#else - movdqa .Lconstant_R5, CONSTANT - movdqa .Lconstant_mask32, %xmm3 -#endif - psrldq $0x04, %xmm2 - pand %xmm3, %xmm1 - pclmulqdq $0x00, CONSTANT, %xmm1 - pxor %xmm2, %xmm1 - - /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ -#ifdef __x86_64__ - movdqa .Lconstant_RUpoly(%rip), CONSTANT -#else - movdqa .Lconstant_RUpoly, CONSTANT -#endif - movdqa %xmm1, %xmm2 - pand %xmm3, %xmm1 - pclmulqdq $0x10, CONSTANT, %xmm1 - pand %xmm3, %xmm1 - pclmulqdq $0x00, CONSTANT, %xmm1 - pxor %xmm2, %xmm1 - pextrd $0x01, %xmm1, %eax - - RET -SYM_FUNC_END(crc32_pclmul_le_16) +DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1) diff --git a/arch/x86/lib/crc64-glue.c b/arch/x86/lib/crc64-glue.c new file mode 100644 index 000000000000..b0e1b719ecbf --- /dev/null +++ b/arch/x86/lib/crc64-glue.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * CRC64 using [V]PCLMULQDQ instructions + * + * Copyright 2025 Google LLC + */ + +#include <linux/crc64.h> +#include <linux/module.h> +#include "crc-pclmul-template.h" + +static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); + +DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64); +DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64); + +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) +{ + CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts, + have_pclmulqdq); + return crc64_be_generic(crc, p, len); +} +EXPORT_SYMBOL_GPL(crc64_be_arch); + +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) +{ + CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts, + have_pclmulqdq); + return crc64_nvme_generic(crc, p, len); +} +EXPORT_SYMBOL_GPL(crc64_nvme_arch); + +static int __init crc64_x86_init(void) +{ + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { + static_branch_enable(&have_pclmulqdq); + INIT_CRC_PCLMUL(crc64_msb); + INIT_CRC_PCLMUL(crc64_lsb); + } + return 0; +} +arch_initcall(crc64_x86_init); + +static void __exit crc64_x86_exit(void) +{ +} +module_exit(crc64_x86_exit); + +MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/lib/crc64-pclmul.S b/arch/x86/lib/crc64-pclmul.S new file mode 100644 index 000000000000..4173051b5197 --- /dev/null +++ b/arch/x86/lib/crc64-pclmul.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +// Copyright 2025 Google LLC + +#include "crc-pclmul-template.S" + +DEFINE_CRC_PCLMUL_FUNCS(crc64_msb, /* bits= */ 64, /* lsb= */ 0) +DEFINE_CRC_PCLMUL_FUNCS(crc64_lsb, /* bits= */ 64, /* lsb= */ 1) diff --git a/arch/x86/lib/crct10dif-pcl-asm_64.S b/arch/x86/lib/crct10dif-pcl-asm_64.S deleted file mode 100644 index 5286db5b8165..000000000000 --- a/arch/x86/lib/crct10dif-pcl-asm_64.S +++ /dev/null @@ -1,332 +0,0 @@ -######################################################################## -# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions -# -# Copyright (c) 2013, Intel Corporation -# -# Authors: -# Erdinc Ozturk <erdinc.ozturk@intel.com> -# Vinodh Gopal <vinodh.gopal@intel.com> -# James Guilford <james.guilford@intel.com> -# Tim Chen <tim.c.chen@linux.intel.com> -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# OpenIB.org BSD license below: -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# * Neither the name of the Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Reference paper titled "Fast CRC Computation for Generic -# Polynomials Using PCLMULQDQ Instruction" -# URL: http://www.intel.com/content/dam/www/public/us/en/documents -# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -# - -#include <linux/linkage.h> - -.text - -#define init_crc %edi -#define buf %rsi -#define len %rdx - -#define FOLD_CONSTS %xmm10 -#define BSWAP_MASK %xmm11 - -# Fold reg1, reg2 into the next 32 data bytes, storing the result back into -# reg1, reg2. -.macro fold_32_bytes offset, reg1, reg2 - movdqu \offset(buf), %xmm9 - movdqu \offset+16(buf), %xmm12 - pshufb BSWAP_MASK, %xmm9 - pshufb BSWAP_MASK, %xmm12 - movdqa \reg1, %xmm8 - movdqa \reg2, %xmm13 - pclmulqdq $0x00, FOLD_CONSTS, \reg1 - pclmulqdq $0x11, FOLD_CONSTS, %xmm8 - pclmulqdq $0x00, FOLD_CONSTS, \reg2 - pclmulqdq $0x11, FOLD_CONSTS, %xmm13 - pxor %xmm9 , \reg1 - xorps %xmm8 , \reg1 - pxor %xmm12, \reg2 - xorps %xmm13, \reg2 -.endm - -# Fold src_reg into dst_reg. -.macro fold_16_bytes src_reg, dst_reg - movdqa \src_reg, %xmm8 - pclmulqdq $0x11, FOLD_CONSTS, \src_reg - pclmulqdq $0x00, FOLD_CONSTS, %xmm8 - pxor %xmm8, \dst_reg - xorps \src_reg, \dst_reg -.endm - -# -# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len); -# -# Assumes len >= 16. -# -SYM_FUNC_START(crc_t10dif_pcl) - - movdqa .Lbswap_mask(%rip), BSWAP_MASK - - # For sizes less than 256 bytes, we can't fold 128 bytes at a time. - cmp $256, len - jl .Lless_than_256_bytes - - # Load the first 128 data bytes. Byte swapping is necessary to make the - # bit order match the polynomial coefficient order. - movdqu 16*0(buf), %xmm0 - movdqu 16*1(buf), %xmm1 - movdqu 16*2(buf), %xmm2 - movdqu 16*3(buf), %xmm3 - movdqu 16*4(buf), %xmm4 - movdqu 16*5(buf), %xmm5 - movdqu 16*6(buf), %xmm6 - movdqu 16*7(buf), %xmm7 - add $128, buf - pshufb BSWAP_MASK, %xmm0 - pshufb BSWAP_MASK, %xmm1 - pshufb BSWAP_MASK, %xmm2 - pshufb BSWAP_MASK, %xmm3 - pshufb BSWAP_MASK, %xmm4 - pshufb BSWAP_MASK, %xmm5 - pshufb BSWAP_MASK, %xmm6 - pshufb BSWAP_MASK, %xmm7 - - # XOR the first 16 data *bits* with the initial CRC value. - pxor %xmm8, %xmm8 - pinsrw $7, init_crc, %xmm8 - pxor %xmm8, %xmm0 - - movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS - - # Subtract 128 for the 128 data bytes just consumed. Subtract another - # 128 to simplify the termination condition of the following loop. - sub $256, len - - # While >= 128 data bytes remain (not counting xmm0-7), fold the 128 - # bytes xmm0-7 into them, storing the result back into xmm0-7. -.Lfold_128_bytes_loop: - fold_32_bytes 0, %xmm0, %xmm1 - fold_32_bytes 32, %xmm2, %xmm3 - fold_32_bytes 64, %xmm4, %xmm5 - fold_32_bytes 96, %xmm6, %xmm7 - add $128, buf - sub $128, len - jge .Lfold_128_bytes_loop - - # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7. - - # Fold across 64 bytes. - movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS - fold_16_bytes %xmm0, %xmm4 - fold_16_bytes %xmm1, %xmm5 - fold_16_bytes %xmm2, %xmm6 - fold_16_bytes %xmm3, %xmm7 - # Fold across 32 bytes. - movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS - fold_16_bytes %xmm4, %xmm6 - fold_16_bytes %xmm5, %xmm7 - # Fold across 16 bytes. - movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS - fold_16_bytes %xmm6, %xmm7 - - # Add 128 to get the correct number of data bytes remaining in 0...127 - # (not counting xmm7), following the previous extra subtraction by 128. - # Then subtract 16 to simplify the termination condition of the - # following loop. - add $128-16, len - - # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes - # xmm7 into them, storing the result back into xmm7. - jl .Lfold_16_bytes_loop_done -.Lfold_16_bytes_loop: - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 - pclmulqdq $0x00, FOLD_CONSTS, %xmm8 - pxor %xmm8, %xmm7 - movdqu (buf), %xmm0 - pshufb BSWAP_MASK, %xmm0 - pxor %xmm0 , %xmm7 - add $16, buf - sub $16, len - jge .Lfold_16_bytes_loop - -.Lfold_16_bytes_loop_done: - # Add 16 to get the correct number of data bytes remaining in 0...15 - # (not counting xmm7), following the previous extra subtraction by 16. - add $16, len - je .Lreduce_final_16_bytes - -.Lhandle_partial_segment: - # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16 - # bytes are in xmm7 and the rest are the remaining data in 'buf'. To do - # this without needing a fold constant for each possible 'len', redivide - # the bytes into a first chunk of 'len' bytes and a second chunk of 16 - # bytes, then fold the first chunk into the second. - - movdqa %xmm7, %xmm2 - - # xmm1 = last 16 original data bytes - movdqu -16(buf, len), %xmm1 - pshufb BSWAP_MASK, %xmm1 - - # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes. - lea .Lbyteshift_table+16(%rip), %rax - sub len, %rax - movdqu (%rax), %xmm0 - pshufb %xmm0, %xmm2 - - # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes. - pxor .Lmask1(%rip), %xmm0 - pshufb %xmm0, %xmm7 - - # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes), - # then '16-len' bytes from xmm2 (high-order bytes). - pblendvb %xmm2, %xmm1 #xmm0 is implicit - - # Fold the first chunk into the second chunk, storing the result in xmm7. - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 - pclmulqdq $0x00, FOLD_CONSTS, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm1, %xmm7 - -.Lreduce_final_16_bytes: - # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC - - # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'. - movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS - - # Fold the high 64 bits into the low 64 bits, while also multiplying by - # x^64. This produces a 128-bit value congruent to x^64 * M(x) and - # whose low 48 bits are 0. - movdqa %xmm7, %xmm0 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x)) - pslldq $8, %xmm0 - pxor %xmm0, %xmm7 # + low bits * x^64 - - # Fold the high 32 bits into the low 96 bits. This produces a 96-bit - # value congruent to x^64 * M(x) and whose low 48 bits are 0. - movdqa %xmm7, %xmm0 - pand .Lmask2(%rip), %xmm0 # zero high 32 bits - psrldq $12, %xmm7 # extract high 32 bits - pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x)) - pxor %xmm0, %xmm7 # + low bits - - # Load G(x) and floor(x^48 / G(x)). - movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS - - # Use Barrett reduction to compute the final CRC value. - movdqa %xmm7, %xmm0 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x)) - psrlq $32, %xmm7 # /= x^32 - pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x) - psrlq $48, %xmm0 - pxor %xmm7, %xmm0 # + low 16 nonzero bits - # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. - - pextrw $0, %xmm0, %eax - RET - -.align 16 -.Lless_than_256_bytes: - # Checksumming a buffer of length 16...255 bytes - - # Load the first 16 data bytes. - movdqu (buf), %xmm7 - pshufb BSWAP_MASK, %xmm7 - add $16, buf - - # XOR the first 16 data *bits* with the initial CRC value. - pxor %xmm0, %xmm0 - pinsrw $7, init_crc, %xmm0 - pxor %xmm0, %xmm7 - - movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS - cmp $16, len - je .Lreduce_final_16_bytes # len == 16 - sub $32, len - jge .Lfold_16_bytes_loop # 32 <= len <= 255 - add $16, len - jmp .Lhandle_partial_segment # 17 <= len <= 31 -SYM_FUNC_END(crc_t10dif_pcl) - -.section .rodata, "a", @progbits -.align 16 - -# Fold constants precomputed from the polynomial 0x18bb7 -# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 -.Lfold_across_128_bytes_consts: - .quad 0x0000000000006123 # x^(8*128) mod G(x) - .quad 0x0000000000002295 # x^(8*128+64) mod G(x) -.Lfold_across_64_bytes_consts: - .quad 0x0000000000001069 # x^(4*128) mod G(x) - .quad 0x000000000000dd31 # x^(4*128+64) mod G(x) -.Lfold_across_32_bytes_consts: - .quad 0x000000000000857d # x^(2*128) mod G(x) - .quad 0x0000000000007acc # x^(2*128+64) mod G(x) -.Lfold_across_16_bytes_consts: - .quad 0x000000000000a010 # x^(1*128) mod G(x) - .quad 0x0000000000001faa # x^(1*128+64) mod G(x) -.Lfinal_fold_consts: - .quad 0x1368000000000000 # x^48 * (x^48 mod G(x)) - .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x)) -.Lbarrett_reduction_consts: - .quad 0x0000000000018bb7 # G(x) - .quad 0x00000001f65a57f8 # floor(x^48 / G(x)) - -.section .rodata.cst16.mask1, "aM", @progbits, 16 -.align 16 -.Lmask1: - .octa 0x80808080808080808080808080808080 - -.section .rodata.cst16.mask2, "aM", @progbits, 16 -.align 16 -.Lmask2: - .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF - -.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 -.align 16 -.Lbswap_mask: - .octa 0x000102030405060708090A0B0C0D0E0F - -.section .rodata.cst32.byteshift_table, "aM", @progbits, 32 -.align 16 -# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len] -# is the index vector to shift left by 'len' bytes, and is also {0x80, ..., -# 0x80} XOR the index vector to shift right by '16 - len' bytes. -.Lbyteshift_table: - .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 - .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f - .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 - .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/block/Kconfig b/block/Kconfig index 5b623b876d3b..df8973bc0539 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -63,7 +63,7 @@ config BLK_DEV_BSGLIB config BLK_DEV_INTEGRITY bool "Block layer data integrity support" select CRC_T10DIF - select CRC64_ROCKSOFT + select CRC64 help Some storage devices allow extra information to be stored/retrieved to help protect the data. The block layer diff --git a/block/t10-pi.c b/block/t10-pi.c index 2d05421f0fa5..2577114ff20c 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -210,7 +210,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len) { - return cpu_to_be64(crc64_rocksoft_update(crc, data, len)); + return cpu_to_be64(crc64_nvme(crc, data, len)); } static void ext_pi_crc64_generate(struct blk_integrity_iter *iter, diff --git a/crypto/Kconfig b/crypto/Kconfig index 74ae5f52b784..b8a436edf4c3 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1081,26 +1081,6 @@ config CRYPTO_CRC32 Used by RoCEv2 and f2fs. -config CRYPTO_CRCT10DIF - tristate "CRCT10DIF" - select CRYPTO_HASH - select CRC_T10DIF - help - CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) - - CRC algorithm used by the SCSI Block Commands standard. - -config CRYPTO_CRC64_ROCKSOFT - tristate "CRC64 based on Rocksoft Model algorithm" - depends on CRC64 - select CRYPTO_HASH - help - CRC64 CRC algorithm based on the Rocksoft Model CRC Algorithm - - Used by the NVMe implementation of T10 DIF (BLK_DEV_INTEGRITY) - - See https://zlib.net/crc_v3.txt - endmenu menu "Compression" diff --git a/crypto/Makefile b/crypto/Makefile index f67e853c4690..2f7d64eeb008 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -155,9 +155,6 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o CFLAGS_crc32c_generic.o += -DARCH=$(ARCH) CFLAGS_crc32_generic.o += -DARCH=$(ARCH) -obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_generic.o -CFLAGS_crct10dif_generic.o += -DARCH=$(ARCH) -obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 985da981d6e2..b1a36d32dc50 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -85,7 +85,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - ctx->crc = crc32c_le_base(ctx->crc, data, length); + ctx->crc = crc32c_base(ctx->crc, data, length); return 0; } @@ -94,7 +94,7 @@ static int chksum_update_arch(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - ctx->crc = __crc32c_le(ctx->crc, data, length); + ctx->crc = crc32c(ctx->crc, data, length); return 0; } @@ -108,14 +108,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out); + put_unaligned_le32(~crc32c_base(*crcp, data, len), out); return 0; } static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - put_unaligned_le32(~__crc32c_le(*crcp, data, len), out); + put_unaligned_le32(~crc32c(*crcp, data, len), out); return 0; } diff --git a/crypto/crc64_rocksoft_generic.c b/crypto/crc64_rocksoft_generic.c deleted file mode 100644 index ce0f3059b912..000000000000 --- a/crypto/crc64_rocksoft_generic.c +++ /dev/null @@ -1,89 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include <linux/crc64.h> -#include <linux/module.h> -#include <crypto/internal/hash.h> -#include <linux/unaligned.h> - -static int chksum_init(struct shash_desc *desc) -{ - u64 *crc = shash_desc_ctx(desc); - - *crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u64 *crc = shash_desc_ctx(desc); - - *crc = crc64_rocksoft_generic(*crc, data, length); - - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - u64 *crc = shash_desc_ctx(desc); - - put_unaligned_le64(*crc, out); - return 0; -} - -static int __chksum_finup(u64 crc, const u8 *data, unsigned int len, u8 *out) -{ - crc = crc64_rocksoft_generic(crc, data, len); - put_unaligned_le64(crc, out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - u64 *crc = shash_desc_ctx(desc); - - return __chksum_finup(*crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - return __chksum_finup(0, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = sizeof(u64), - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(u64), - .base = { - .cra_name = CRC64_ROCKSOFT_STRING, - .cra_driver_name = "crc64-rocksoft-generic", - .cra_priority = 200, - .cra_blocksize = 1, - .cra_module = THIS_MODULE, - } -}; - -static int __init crc64_rocksoft_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit crc64_rocksoft_exit(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc64_rocksoft_init); -module_exit(crc64_rocksoft_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Rocksoft model CRC64 calculation."); -MODULE_ALIAS_CRYPTO("crc64-rocksoft"); -MODULE_ALIAS_CRYPTO("crc64-rocksoft-generic"); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c deleted file mode 100644 index 259cb01932cb..000000000000 --- a/crypto/crct10dif_generic.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform - * - * Copyright (c) 2007 Oracle Corporation. All rights reserved. - * Written by Martin K. Petersen <martin.petersen@oracle.com> - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/module.h> -#include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/kernel.h> - -struct chksum_desc_ctx { - __u16 crc; -}; - -/* - * Steps through buffer one byte at a time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_update_arch(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc_t10dif_update(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) -{ - *(__u16 *)out = crc_t10dif_generic(crc, data, len); - return 0; -} - -static int __chksum_finup_arch(__u16 crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__u16 *)out = crc_t10dif_update(crc, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksum_finup_arch(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup_arch(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - return __chksum_finup(0, data, length, out); -} - -static int chksum_digest_arch(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - return __chksum_finup_arch(0, data, length, out); -} - -static struct shash_alg algs[] = {{ - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base.cra_name = "crct10dif", - .base.cra_driver_name = "crct10dif-generic", - .base.cra_priority = 100, - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update_arch, - .final = chksum_final, - .finup = chksum_finup_arch, - .digest = chksum_digest_arch, - .descsize = sizeof(struct chksum_desc_ctx), - .base.cra_name = "crct10dif", - .base.cra_driver_name = "crct10dif-" __stringify(ARCH), - .base.cra_priority = 150, - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}}; - -static int num_algs; - -static int __init crct10dif_mod_init(void) -{ - /* register the arch flavor only if it differs from the generic one */ - num_algs = 1 + crc_t10dif_is_optimized(); - - return crypto_register_shashes(algs, num_algs); -} - -static void __exit crct10dif_mod_fini(void) -{ - crypto_unregister_shashes(algs, num_algs); -} - -subsys_initcall(crct10dif_mod_init); -module_exit(crct10dif_mod_fini); - -MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); -MODULE_DESCRIPTION("T10 DIF CRC calculation."); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("crct10dif"); -MODULE_ALIAS_CRYPTO("crct10dif-generic"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e1a74cb2cfbe..879fc21dcc16 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1654,10 +1654,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret = min(ret, tcrypt_test("ghash")); break; - case 47: - ret = min(ret, tcrypt_test("crct10dif")); - break; - case 48: ret = min(ret, tcrypt_test("sha3-224")); break; @@ -2272,10 +2268,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("crc32c", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; - case 320: - test_hash_speed("crct10dif", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; case 321: test_hash_speed("poly1305", sec, poly1305_speed_template); if (mode > 300 && mode < 400) break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index e61490ba4095..1e42582bc7f1 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4760,20 +4760,6 @@ static const struct alg_test_desc alg_test_descs[] = { .hash = __VECS(crc32c_tv_template) } }, { - .alg = "crc64-rocksoft", - .test = alg_test_hash, - .fips_allowed = 1, - .suite = { - .hash = __VECS(crc64_rocksoft_tv_template) - } - }, { - .alg = "crct10dif", - .test = alg_test_hash, - .fips_allowed = 1, - .suite = { - .hash = __VECS(crct10dif_tv_template) - } - }, { .alg = "ctr(aes)", .test = alg_test_skcipher, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index d754ab997186..d3a99d15a3e5 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -6017,309 +6017,6 @@ static const struct hash_testvec rmd160_tv_template[] = { } }; -static const u8 zeroes[4096] = { [0 ... 4095] = 0 }; -static const u8 ones[4096] = { [0 ... 4095] = 0xff }; - -static const struct hash_testvec crc64_rocksoft_tv_template[] = { - { - .plaintext = zeroes, - .psize = 4096, - .digest = "\x4e\xb6\x22\xeb\x67\xd3\x82\x64", - }, { - .plaintext = ones, - .psize = 4096, - .digest = "\xac\xa3\xec\x02\x73\xba\xdd\xc0", - } -}; - -static const struct hash_testvec crct10dif_tv_template[] = { - { - .plaintext = "abc", - .psize = 3, - .digest = (u8 *)(u16 []){ 0x443b }, - }, { - .plaintext = "1234567890123456789012345678901234567890" - "123456789012345678901234567890123456789", - .psize = 79, - .digest = (u8 *)(u16 []){ 0x4b70 }, - }, { - .plaintext = "abcdddddddddddddddddddddddddddddddddddddddd" - "ddddddddddddd", - .psize = 56, - .digest = (u8 *)(u16 []){ 0x9ce3 }, - }, { - .plaintext = "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890" - "123456789012345678901234567890123456789", - .psize = 319, - .digest = (u8 *)(u16 []){ 0x44c6 }, - }, { - .plaintext = "\x6e\x05\x79\x10\xa7\x1b\xb2\x49" - "\xe0\x54\xeb\x82\x19\x8d\x24\xbb" - "\x2f\xc6\x5d\xf4\x68\xff\x96\x0a" - "\xa1\x38\xcf\x43\xda\x71\x08\x7c" - "\x13\xaa\x1e\xb5\x4c\xe3\x57\xee" - "\x85\x1c\x90\x27\xbe\x32\xc9\x60" - "\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2" - "\x46\xdd\x74\x0b\x7f\x16\xad\x21" - "\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93" - "\x2a\xc1\x35\xcc\x63\xfa\x6e\x05" - "\x9c\x10\xa7\x3e\xd5\x49\xe0\x77" - "\x0e\x82\x19\xb0\x24\xbb\x52\xe9" - "\x5d\xf4\x8b\x22\x96\x2d\xc4\x38" - "\xcf\x66\xfd\x71\x08\x9f\x13\xaa" - "\x41\xd8\x4c\xe3\x7a\x11\x85\x1c" - "\xb3\x27\xbe\x55\xec\x60\xf7\x8e" - "\x02\x99\x30\xc7\x3b\xd2\x69\x00" - "\x74\x0b\xa2\x16\xad\x44\xdb\x4f" - "\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1" - "\x58\xef\x63\xfa\x91\x05\x9c\x33" - "\xca\x3e\xd5\x6c\x03\x77\x0e\xa5" - "\x19\xb0\x47\xde\x52\xe9\x80\x17" - "\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66" - "\xfd\x94\x08\x9f\x36\xcd\x41\xd8" - "\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a" - "\xe1\x55\xec\x83\x1a\x8e\x25\xbc" - "\x30\xc7\x5e\xf5\x69\x00\x97\x0b" - "\xa2\x39\xd0\x44\xdb\x72\x09\x7d" - "\x14\xab\x1f\xb6\x4d\xe4\x58\xef" - "\x86\x1d\x91\x28\xbf\x33\xca\x61" - "\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3" - "\x47\xde\x75\x0c\x80\x17\xae\x22" - "\xb9\x50\xe7\x5b\xf2\x89\x20\x94" - "\x2b\xc2\x36\xcd\x64\xfb\x6f\x06" - "\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78" - "\x0f\x83\x1a\xb1\x25\xbc\x53\xea" - "\x5e\xf5\x8c\x00\x97\x2e\xc5\x39" - "\xd0\x67\xfe\x72\x09\xa0\x14\xab" - "\x42\xd9\x4d\xe4\x7b\x12\x86\x1d" - "\xb4\x28\xbf\x56\xed\x61\xf8\x8f" - "\x03\x9a\x31\xc8\x3c\xd3\x6a\x01" - "\x75\x0c\xa3\x17\xae\x45\xdc\x50" - "\xe7\x7e\x15\x89\x20\xb7\x2b\xc2" - "\x59\xf0\x64\xfb\x92\x06\x9d\x34" - "\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6" - "\x1a\xb1\x48\xdf\x53\xea\x81\x18" - "\x8c\x23\xba\x2e\xc5\x5c\xf3\x67" - "\xfe\x95\x09\xa0\x37\xce\x42\xd9" - "\x70\x07\x7b\x12\xa9\x1d\xb4\x4b" - "\xe2\x56\xed\x84\x1b\x8f\x26\xbd" - "\x31\xc8\x5f\xf6\x6a\x01\x98\x0c" - "\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e" - "\x15\xac\x20\xb7\x4e\xe5\x59\xf0" - "\x87\x1e\x92\x29\xc0\x34\xcb\x62" - "\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4" - "\x48\xdf\x76\x0d\x81\x18\xaf\x23" - "\xba\x51\xe8\x5c\xf3\x8a\x21\x95" - "\x2c\xc3\x37\xce\x65\xfc\x70\x07" - "\x9e\x12\xa9\x40\xd7\x4b\xe2\x79" - "\x10\x84\x1b\xb2\x26\xbd\x54\xeb" - "\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a" - "\xd1\x68\xff\x73\x0a\xa1\x15\xac" - "\x43\xda\x4e\xe5\x7c\x13\x87\x1e" - "\xb5\x29\xc0\x57\xee\x62\xf9\x90" - "\x04\x9b\x32\xc9\x3d\xd4\x6b\x02" - "\x76\x0d\xa4\x18\xaf\x46\xdd\x51" - "\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3" - "\x5a\xf1\x65\xfc\x93\x07\x9e\x35" - "\xcc\x40\xd7\x6e\x05\x79\x10\xa7" - "\x1b\xb2\x49\xe0\x54\xeb\x82\x19" - "\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68" - "\xff\x96\x0a\xa1\x38\xcf\x43\xda" - "\x71\x08\x7c\x13\xaa\x1e\xb5\x4c" - "\xe3\x57\xee\x85\x1c\x90\x27\xbe" - "\x32\xc9\x60\xf7\x6b\x02\x99\x0d" - "\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f" - "\x16\xad\x21\xb8\x4f\xe6\x5a\xf1" - "\x88\x1f\x93\x2a\xc1\x35\xcc\x63" - "\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5" - "\x49\xe0\x77\x0e\x82\x19\xb0\x24" - "\xbb\x52\xe9\x5d\xf4\x8b\x22\x96" - "\x2d\xc4\x38\xcf\x66\xfd\x71\x08" - "\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a" - "\x11\x85\x1c\xb3\x27\xbe\x55\xec" - "\x60\xf7\x8e\x02\x99\x30\xc7\x3b" - "\xd2\x69\x00\x74\x0b\xa2\x16\xad" - "\x44\xdb\x4f\xe6\x7d\x14\x88\x1f" - "\xb6\x2a\xc1\x58\xef\x63\xfa\x91" - "\x05\x9c\x33\xca\x3e\xd5\x6c\x03" - "\x77\x0e\xa5\x19\xb0\x47\xde\x52" - "\xe9\x80\x17\x8b\x22\xb9\x2d\xc4" - "\x5b\xf2\x66\xfd\x94\x08\x9f\x36" - "\xcd\x41\xd8\x6f\x06\x7a\x11\xa8" - "\x1c\xb3\x4a\xe1\x55\xec\x83\x1a" - "\x8e\x25\xbc\x30\xc7\x5e\xf5\x69" - "\x00\x97\x0b\xa2\x39\xd0\x44\xdb" - "\x72\x09\x7d\x14\xab\x1f\xb6\x4d" - "\xe4\x58\xef\x86\x1d\x91\x28\xbf" - "\x33\xca\x61\xf8\x6c\x03\x9a\x0e" - "\xa5\x3c\xd3\x47\xde\x75\x0c\x80" - "\x17\xae\x22\xb9\x50\xe7\x5b\xf2" - "\x89\x20\x94\x2b\xc2\x36\xcd\x64" - "\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6" - "\x4a\xe1\x78\x0f\x83\x1a\xb1\x25" - "\xbc\x53\xea\x5e\xf5\x8c\x00\x97" - "\x2e\xc5\x39\xd0\x67\xfe\x72\x09" - "\xa0\x14\xab\x42\xd9\x4d\xe4\x7b" - "\x12\x86\x1d\xb4\x28\xbf\x56\xed" - "\x61\xf8\x8f\x03\x9a\x31\xc8\x3c" - "\xd3\x6a\x01\x75\x0c\xa3\x17\xae" - "\x45\xdc\x50\xe7\x7e\x15\x89\x20" - "\xb7\x2b\xc2\x59\xf0\x64\xfb\x92" - "\x06\x9d\x34\xcb\x3f\xd6\x6d\x04" - "\x78\x0f\xa6\x1a\xb1\x48\xdf\x53" - "\xea\x81\x18\x8c\x23\xba\x2e\xc5" - "\x5c\xf3\x67\xfe\x95\x09\xa0\x37" - "\xce\x42\xd9\x70\x07\x7b\x12\xa9" - "\x1d\xb4\x4b\xe2\x56\xed\x84\x1b" - "\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a" - "\x01\x98\x0c\xa3\x3a\xd1\x45\xdc" - "\x73\x0a\x7e\x15\xac\x20\xb7\x4e" - "\xe5\x59\xf0\x87\x1e\x92\x29\xc0" - "\x34\xcb\x62\xf9\x6d\x04\x9b\x0f" - "\xa6\x3d\xd4\x48\xdf\x76\x0d\x81" - "\x18\xaf\x23\xba\x51\xe8\x5c\xf3" - "\x8a\x21\x95\x2c\xc3\x37\xce\x65" - "\xfc\x70\x07\x9e\x12\xa9\x40\xd7" - "\x4b\xe2\x79\x10\x84\x1b\xb2\x26" - "\xbd\x54\xeb\x5f\xf6\x8d\x01\x98" - "\x2f\xc6\x3a\xd1\x68\xff\x73\x0a" - "\xa1\x15\xac\x43\xda\x4e\xe5\x7c" - "\x13\x87\x1e\xb5\x29\xc0\x57\xee" - "\x62\xf9\x90\x04\x9b\x32\xc9\x3d" - "\xd4\x6b\x02\x76\x0d\xa4\x18\xaf" - "\x46\xdd\x51\xe8\x7f\x16\x8a\x21" - "\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93" - "\x07\x9e\x35\xcc\x40\xd7\x6e\x05" - "\x79\x10\xa7\x1b\xb2\x49\xe0\x54" - "\xeb\x82\x19\x8d\x24\xbb\x2f\xc6" - "\x5d\xf4\x68\xff\x96\x0a\xa1\x38" - "\xcf\x43\xda\x71\x08\x7c\x13\xaa" - "\x1e\xb5\x4c\xe3\x57\xee\x85\x1c" - "\x90\x27\xbe\x32\xc9\x60\xf7\x6b" - "\x02\x99\x0d\xa4\x3b\xd2\x46\xdd" - "\x74\x0b\x7f\x16\xad\x21\xb8\x4f" - "\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1" - "\x35\xcc\x63\xfa\x6e\x05\x9c\x10" - "\xa7\x3e\xd5\x49\xe0\x77\x0e\x82" - "\x19\xb0\x24\xbb\x52\xe9\x5d\xf4" - "\x8b\x22\x96\x2d\xc4\x38\xcf\x66" - "\xfd\x71\x08\x9f\x13\xaa\x41\xd8" - "\x4c\xe3\x7a\x11\x85\x1c\xb3\x27" - "\xbe\x55\xec\x60\xf7\x8e\x02\x99" - "\x30\xc7\x3b\xd2\x69\x00\x74\x0b" - "\xa2\x16\xad\x44\xdb\x4f\xe6\x7d" - "\x14\x88\x1f\xb6\x2a\xc1\x58\xef" - "\x63\xfa\x91\x05\x9c\x33\xca\x3e" - "\xd5\x6c\x03\x77\x0e\xa5\x19\xb0" - "\x47\xde\x52\xe9\x80\x17\x8b\x22" - "\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94" - "\x08\x9f\x36\xcd\x41\xd8\x6f\x06" - "\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55" - "\xec\x83\x1a\x8e\x25\xbc\x30\xc7" - "\x5e\xf5\x69\x00\x97\x0b\xa2\x39" - "\xd0\x44\xdb\x72\x09\x7d\x14\xab" - "\x1f\xb6\x4d\xe4\x58\xef\x86\x1d" - "\x91\x28\xbf\x33\xca\x61\xf8\x6c" - "\x03\x9a\x0e\xa5\x3c\xd3\x47\xde" - "\x75\x0c\x80\x17\xae\x22\xb9\x50" - "\xe7\x5b\xf2\x89\x20\x94\x2b\xc2" - "\x36\xcd\x64\xfb\x6f\x06\x9d\x11" - "\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83" - "\x1a\xb1\x25\xbc\x53\xea\x5e\xf5" - "\x8c\x00\x97\x2e\xc5\x39\xd0\x67" - "\xfe\x72\x09\xa0\x14\xab\x42\xd9" - "\x4d\xe4\x7b\x12\x86\x1d\xb4\x28" - "\xbf\x56\xed\x61\xf8\x8f\x03\x9a" - "\x31\xc8\x3c\xd3\x6a\x01\x75\x0c" - "\xa3\x17\xae\x45\xdc\x50\xe7\x7e" - "\x15\x89\x20\xb7\x2b\xc2\x59\xf0" - "\x64\xfb\x92\x06\x9d\x34\xcb\x3f" - "\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1" - "\x48\xdf\x53\xea\x81\x18\x8c\x23" - "\xba\x2e\xc5\x5c\xf3\x67\xfe\x95" - "\x09\xa0\x37\xce\x42\xd9\x70\x07" - "\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56" - "\xed\x84\x1b\x8f\x26\xbd\x31\xc8" - "\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a" - "\xd1\x45\xdc\x73\x0a\x7e\x15\xac" - "\x20\xb7\x4e\xe5\x59\xf0\x87\x1e" - "\x92\x29\xc0\x34\xcb\x62\xf9\x6d" - "\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf" - "\x76\x0d\x81\x18\xaf\x23\xba\x51" - "\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3" - "\x37\xce\x65\xfc\x70\x07\x9e\x12" - "\xa9\x40\xd7\x4b\xe2\x79\x10\x84" - "\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6" - "\x8d\x01\x98\x2f\xc6\x3a\xd1\x68" - "\xff\x73\x0a\xa1\x15\xac\x43\xda" - "\x4e\xe5\x7c\x13\x87\x1e\xb5\x29" - "\xc0\x57\xee\x62\xf9\x90\x04\x9b" - "\x32\xc9\x3d\xd4\x6b\x02\x76\x0d" - "\xa4\x18\xaf\x46\xdd\x51\xe8\x7f" - "\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1" - "\x65\xfc\x93\x07\x9e\x35\xcc\x40" - "\xd7\x6e\x05\x79\x10\xa7\x1b\xb2" - "\x49\xe0\x54\xeb\x82\x19\x8d\x24" - "\xbb\x2f\xc6\x5d\xf4\x68\xff\x96" - "\x0a\xa1\x38\xcf\x43\xda\x71\x08" - "\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57" - "\xee\x85\x1c\x90\x27\xbe\x32\xc9" - "\x60\xf7\x6b\x02\x99\x0d\xa4\x3b" - "\xd2\x46\xdd\x74\x0b\x7f\x16\xad" - "\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f" - "\x93\x2a\xc1\x35\xcc\x63\xfa\x6e" - "\x05\x9c\x10\xa7\x3e\xd5\x49\xe0" - "\x77\x0e\x82\x19\xb0\x24\xbb\x52" - "\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4" - "\x38\xcf\x66\xfd\x71\x08\x9f\x13" - "\xaa\x41\xd8\x4c\xe3\x7a\x11\x85" - "\x1c\xb3\x27\xbe\x55\xec\x60\xf7" - "\x8e\x02\x99\x30\xc7\x3b\xd2\x69" - "\x00\x74\x0b\xa2\x16\xad\x44\xdb" - "\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a" - "\xc1\x58\xef\x63\xfa\x91\x05\x9c" - "\x33\xca\x3e\xd5\x6c\x03\x77\x0e" - "\xa5\x19\xb0\x47\xde\x52\xe9\x80" - "\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2" - "\x66\xfd\x94\x08\x9f\x36\xcd\x41" - "\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3" - "\x4a\xe1\x55\xec\x83\x1a\x8e\x25" - "\xbc\x30\xc7\x5e\xf5\x69\x00\x97" - "\x0b\xa2\x39\xd0\x44\xdb\x72\x09" - "\x7d\x14\xab\x1f\xb6\x4d\xe4\x58" - "\xef\x86\x1d\x91\x28\xbf\x33\xca" - "\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c" - "\xd3\x47\xde\x75\x0c\x80\x17\xae" - "\x22\xb9\x50\xe7\x5b\xf2\x89\x20" - "\x94\x2b\xc2\x36\xcd\x64\xfb\x6f" - "\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1" - "\x78\x0f\x83\x1a\xb1\x25\xbc\x53" - "\xea\x5e\xf5\x8c\x00\x97\x2e\xc5" - "\x39\xd0\x67\xfe\x72\x09\xa0\x14" - "\xab\x42\xd9\x4d\xe4\x7b\x12\x86" - "\x1d\xb4\x28\xbf\x56\xed\x61\xf8" - "\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a" - "\x01\x75\x0c\xa3\x17\xae\x45\xdc" - "\x50\xe7\x7e\x15\x89\x20\xb7\x2b" - "\xc2\x59\xf0\x64\xfb\x92\x06\x9d" - "\x34\xcb\x3f\xd6\x6d\x04\x78\x0f" - "\xa6\x1a\xb1\x48\xdf\x53\xea\x81" - "\x18\x8c\x23\xba\x2e\xc5\x5c\xf3" - "\x67\xfe\x95\x09\xa0\x37\xce\x42" - "\xd9\x70\x07\x7b\x12\xa9\x1d\xb4" - "\x4b\xe2\x56\xed\x84\x1b\x8f\x26" - "\xbd\x31\xc8\x5f\xf6\x6a\x01\x98", - .psize = 2048, - .digest = (u8 *)(u16 []){ 0x23ca }, - } -}; - /* * Streebog test vectors from RFC 6986 and GOST R 34.11-2012 */ diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index de4d0402f133..fd29785a3ecf 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -162,7 +162,7 @@ static int burst_update(struct shash_desc *desc, const u8 *d8, if (mctx->poly == CRC32_POLY_LE) ctx->partial = crc32_le(ctx->partial, d8, length); else - ctx->partial = __crc32c_le(ctx->partial, d8, length); + ctx->partial = crc32c(ctx->partial, d8, length); goto pm_out; } diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index ea5eee50dc39..4e692de1da93 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -676,8 +676,8 @@ static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum) static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { - return (__force __wsum)__crc32c_le_combine((__force __u32)csum, - (__force __u32)csum2, len); + return (__force __wsum)crc32c_combine((__force __u32)csum, + (__force __u32)csum2, len); } static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index e530271cb86b..ba768ca7f422 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -714,7 +714,7 @@ static void r5l_submit_current_io(struct r5l_log *log) block = page_address(io->meta_page); block->meta_size = cpu_to_le32(io->meta_offset); - crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE); + crc = crc32c(log->uuid_checksum, block, PAGE_SIZE); block->checksum = cpu_to_le32(crc); log->current_io = NULL; @@ -1020,8 +1020,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh) if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) continue; addr = kmap_local_page(sh->dev[i].page); - sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum, - addr, PAGE_SIZE); + sh->dev[i].log_checksum = crc32c(log->uuid_checksum, + addr, PAGE_SIZE); kunmap_local(addr); } parity_pages = 1 + !!(sh->qd_idx >= 0); @@ -1741,7 +1741,7 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log, le64_to_cpu(mb->position) != ctx->pos) return -EINVAL; - crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); + crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE); if (stored_crc != crc) return -EINVAL; @@ -1780,8 +1780,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, return -ENOMEM; r5l_recovery_create_empty_meta_block(log, page, pos, seq); mb = page_address(page); - mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, - mb, PAGE_SIZE)); + mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, mb, PAGE_SIZE)); if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false)) { __free_page(page); @@ -1976,7 +1975,7 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log, r5l_recovery_read_page(log, ctx, page, log_offset); addr = kmap_local_page(page); - checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE); + checksum = crc32c(log->uuid_checksum, addr, PAGE_SIZE); kunmap_local(addr); return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL; } @@ -2379,8 +2378,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, raid5_compute_blocknr(sh, i, 0)); addr = kmap_local_page(dev->page); payload->checksum[0] = cpu_to_le32( - crc32c_le(log->uuid_checksum, addr, - PAGE_SIZE)); + crc32c(log->uuid_checksum, addr, + PAGE_SIZE)); kunmap_local(addr); sync_page_io(log->rdev, write_pos, PAGE_SIZE, dev->page, REQ_OP_WRITE, false); @@ -2392,8 +2391,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, } } mb->meta_size = cpu_to_le32(offset); - mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, - mb, PAGE_SIZE)); + mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, + mb, PAGE_SIZE)); sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false); sh->log_start = ctx->pos; @@ -2885,8 +2884,8 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh) if (!test_bit(R5_Wantwrite, &sh->dev[i].flags)) continue; addr = kmap_local_page(sh->dev[i].page); - sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum, - addr, PAGE_SIZE); + sh->dev[i].log_checksum = crc32c(log->uuid_checksum, + addr, PAGE_SIZE); kunmap_local(addr); pages++; } @@ -2969,7 +2968,7 @@ static int r5l_load_log(struct r5l_log *log) } stored_crc = le32_to_cpu(mb->checksum); mb->checksum = 0; - expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); + expected_crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE); if (stored_crc != expected_crc) { create_super = true; goto create; @@ -3077,8 +3076,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) return -ENOMEM; log->rdev = rdev; log->need_cache_flush = bdev_write_cache(rdev->bdev); - log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, - sizeof(rdev->mddev->uuid)); + log->uuid_checksum = crc32c(~0, rdev->mddev->uuid, + sizeof(rdev->mddev->uuid)); mutex_init(&log->io_mutex); diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 37c4da5311ca..c0fb335311aa 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -346,9 +346,9 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) { le32_add_cpu(&e->pp_size, PAGE_SIZE); io->pp_size += PAGE_SIZE; - e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum), - page_address(sh->ppl_page), - PAGE_SIZE)); + e->checksum = cpu_to_le32(crc32c(le32_to_cpu(e->checksum), + page_address(sh->ppl_page), + PAGE_SIZE)); } list_add_tail(&sh->log_list, &io->stripe_list); @@ -454,7 +454,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) } pplhdr->entries_count = cpu_to_le32(io->entries_count); - pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); + pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PPL_HEADER_SIZE)); /* Rewind the buffer if current PPL is larger then remaining space */ if (log->use_multippl && @@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, goto out; } - crc = crc32c_le(crc, page_address(page), s); + crc = crc32c(crc, page_address(page), s); pp_size -= s; sector += s >> 9; @@ -1052,7 +1052,7 @@ static int ppl_write_empty_header(struct ppl_log *log) log->rdev->ppl.size, GFP_NOIO, 0); memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); - pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); + pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PAGE_SIZE)); if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | @@ -1106,7 +1106,7 @@ static int ppl_load_distributed(struct ppl_log *log) /* check header validity */ crc_stored = le32_to_cpu(pplhdr->checksum); pplhdr->checksum = 0; - crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); + crc = ~crc32c(~0, pplhdr, PAGE_SIZE); if (crc_stored != crc) { pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n", @@ -1390,7 +1390,7 @@ int ppl_init_log(struct r5conf *conf) spin_lock_init(&ppl_conf->no_mem_stripes_lock); if (!mddev->external) { - ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); + ppl_conf->signature = ~crc32c(~0, mddev->uuid, sizeof(mddev->uuid)); ppl_conf->block_size = 512; } else { ppl_conf->block_size = diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 8e04552d2216..02c8213915a5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2593,7 +2593,7 @@ void bnx2x_init_rx_mode_obj(struct bnx2x *bp, /********************* Multicast verbs: SET, CLEAR ****************************/ static inline u8 bnx2x_mcast_bin_from_mac(u8 *mac) { - return (crc32c_le(0, mac, ETH_ALEN) >> 24) & 0xff; + return (crc32c(0, mac, ETH_ALEN) >> 24) & 0xff; } struct bnx2x_mcast_mac_elem { diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index dc1f456736dc..cd15e84c47f4 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -312,7 +312,7 @@ static void tb_cfg_print_error(struct tb_ctl *ctl, enum tb_cfg_space space, static __be32 tb_crc(const void *data, size_t len) { - return cpu_to_be32(~__crc32c_le(~0, data, len)); + return cpu_to_be32(~crc32c(~0, data, len)); } static void tb_ctl_pkg_free(struct ctl_pkg *pkg) diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c index 9c1d65d26553..e66183a72cf9 100644 --- a/drivers/thunderbolt/eeprom.c +++ b/drivers/thunderbolt/eeprom.c @@ -211,7 +211,7 @@ static u8 tb_crc8(u8 *data, int len) static u32 tb_crc32(void *data, size_t len) { - return ~__crc32c_le(~0, data, len); + return ~crc32c(~0, data, len); } #define TB_DROM_DATA_START 13 diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index 16787c1cee21..a559fdff3f7e 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -4,9 +4,6 @@ #include <linux/types.h> -#define CRC_T10DIF_DIGEST_SIZE 2 -#define CRC_T10DIF_BLOCK_SIZE 1 - u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len); u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); @@ -22,13 +19,4 @@ static inline u16 crc_t10dif(const u8 *p, size_t len) return crc_t10dif_update(0, p, len); } -#if IS_ENABLED(CONFIG_CRC_T10DIF_ARCH) -bool crc_t10dif_is_optimized(void); -#else -static inline bool crc_t10dif_is_optimized(void) -{ - return false; -} -#endif - #endif diff --git a/include/linux/crc32.h b/include/linux/crc32.h index e9bd40056687..69c2e8bb3782 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -8,33 +8,32 @@ #include <linux/types.h> #include <linux/bitrev.h> -u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len); -u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len); -u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len); -u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len); -u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len); -u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len); +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len); +u32 crc32_le_base(u32 crc, const u8 *p, size_t len); +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len); +u32 crc32_be_base(u32 crc, const u8 *p, size_t len); +u32 crc32c_arch(u32 crc, const u8 *p, size_t len); +u32 crc32c_base(u32 crc, const u8 *p, size_t len); -static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len) +static inline u32 crc32_le(u32 crc, const void *p, size_t len) { if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32_le_arch(crc, p, len); return crc32_le_base(crc, p, len); } -static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len) +static inline u32 crc32_be(u32 crc, const void *p, size_t len) { if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32_be_arch(crc, p, len); return crc32_be_base(crc, p, len); } -/* TODO: leading underscores should be dropped once callers have been updated */ -static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) +static inline u32 crc32c(u32 crc, const void *p, size_t len) { if (IS_ENABLED(CONFIG_CRC32_ARCH)) - return crc32c_le_arch(crc, p, len); - return crc32c_le_base(crc, p, len); + return crc32c_arch(crc, p, len); + return crc32c_base(crc, p, len); } /* @@ -45,7 +44,7 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) */ #define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ #define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ -#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ +#define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */ #if IS_ENABLED(CONFIG_CRC32_ARCH) u32 crc32_optimizations(void); #else @@ -70,36 +69,34 @@ static inline u32 crc32_optimizations(void) { return 0; } * with the same initializer as crc1, and crc2 seed was 0. See * also crc32_combine_test(). */ -u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len); +u32 crc32_le_shift(u32 crc, size_t len); static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } +u32 crc32c_shift(u32 crc, size_t len); + /** - * __crc32c_le_combine - Combine two crc32c check values into one. For two - * sequences of bytes, seq1 and seq2 with lengths len1 - * and len2, __crc32c_le() check values were calculated - * for each, crc1 and crc2. + * crc32c_combine - Combine two crc32c check values into one. For two sequences + * of bytes, seq1 and seq2 with lengths len1 and len2, crc32c() + * check values were calculated for each, crc1 and crc2. * * @crc1: crc32c of the first block * @crc2: crc32c of the second block * @len2: length of the second block * - * Return: The __crc32c_le() check value of seq1 and seq2 concatenated, - * requiring only crc1, crc2, and len2. Note: If seq_full denotes - * the concatenated memory area of seq1 with seq2, and crc_full - * the __crc32c_le() value of seq_full, then crc_full == - * __crc32c_le_combine(crc1, crc2, len2) when crc_full was - * seeded with the same initializer as crc1, and crc2 seed - * was 0. See also crc32c_combine_test(). + * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring + * only crc1, crc2, and len2. Note: If seq_full denotes the concatenated + * memory area of seq1 with seq2, and crc_full the crc32c() value of + * seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when + * crc_full was seeded with the same initializer as crc1, and crc2 seed + * was 0. See also crc_combine_test(). */ -u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len); - -static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2) { - return __crc32c_le_shift(crc1, len2) ^ crc2; + return crc32c_shift(crc1, len2) ^ crc2; } #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 47eb78003c26..b8cff2f4309a 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -4,12 +4,4 @@ #include <linux/crc32.h> -static inline u32 crc32c(u32 crc, const void *address, unsigned int length) -{ - return __crc32c_le(crc, address, length); -} - -/* This macro exists for backwards-compatibility. */ -#define crc32c_le crc32c - #endif /* _LINUX_CRC32C_H */ diff --git a/include/linux/crc64.h b/include/linux/crc64.h index e044c60d1e61..41de30b907df 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -7,12 +7,40 @@ #include <linux/types.h> -#define CRC64_ROCKSOFT_STRING "crc64-rocksoft" +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len); +u64 crc64_be_generic(u64 crc, const u8 *p, size_t len); +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len); +u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len); -u64 __pure crc64_be(u64 crc, const void *p, size_t len); -u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); +/** + * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 + * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, + * or the previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + */ +static inline u64 crc64_be(u64 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC64_ARCH)) + return crc64_be_arch(crc, p, len); + return crc64_be_generic(crc, p, len); +} -u64 crc64_rocksoft(const unsigned char *buffer, size_t len); -u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len); +/** + * crc64_nvme - Calculate CRC64-NVME + * @crc: seed value for computation. 0 for a new CRC calculation, or the + * previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + * + * This computes the CRC64 defined in the NVME NVM Command Set Specification, + * *including the bitwise inversion at the beginning and end*. + */ +static inline u64 crc64_nvme(u64 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC64_ARCH)) + return ~crc64_nvme_arch(~crc, p, len); + return ~crc64_nvme_generic(~crc, p, len); +} #endif /* _LINUX_CRC64_H */ diff --git a/include/linux/crc7.h b/include/linux/crc7.h index b462842f3c32..61d34749e437 100644 --- a/include/linux/crc7.h +++ b/include/linux/crc7.h @@ -3,13 +3,6 @@ #define _LINUX_CRC7_H #include <linux/types.h> -extern const u8 crc7_be_syndrome_table[256]; - -static inline u8 crc7_be_byte(u8 crc, u8 data) -{ - return crc7_be_syndrome_table[crc ^ data]; -} - extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len); #endif diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index f514a0aa849e..291465c25810 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -30,17 +30,14 @@ static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) { - /* This uses the crypto implementation of crc32c, which is either - * implemented w/ hardware support or resolves to __crc32c_le(). - */ return (__force __wsum)crc32c((__force __u32)sum, buff, len); } static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { - return (__force __wsum)__crc32c_le_combine((__force __u32)csum, - (__force __u32)csum2, len); + return (__force __wsum)crc32c_combine((__force __u32)csum, + (__force __u32)csum2, len); } static const struct skb_checksum_ops sctp_csum_ops = { diff --git a/lib/Kconfig b/lib/Kconfig index dccb61b7d698..61cce0686b53 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -168,15 +168,6 @@ config CRC_T10DIF_ARCH tristate default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS -config CRC64_ROCKSOFT - tristate "CRC calculation for the Rocksoft model CRC64" - select CRC64 - select CRYPTO - select CRYPTO_CRC64_ROCKSOFT - help - This option provides a CRC64 API to a registered crypto driver. - This is used with the block layer's data integrity subsystem. - config CRC_ITU_T tristate "CRC ITU-T V.41 functions" help @@ -203,42 +194,30 @@ config CRC32_ARCH default CRC32 if ARCH_HAS_CRC32 && CRC_OPTIMIZATIONS config CRC64 - tristate "CRC64 functions" - help - This option is provided for the case where no in-kernel-tree - modules require CRC64 functions, but a module built outside - the kernel tree does. Such modules that use library CRC64 - functions require M here. + tristate + +config ARCH_HAS_CRC64 + bool + +config CRC64_ARCH + tristate + default CRC64 if ARCH_HAS_CRC64 && CRC_OPTIMIZATIONS config CRC4 - tristate "CRC4 functions" - help - This option is provided for the case where no in-kernel-tree - modules require CRC4 functions, but a module built outside - the kernel tree does. Such modules that use library CRC4 - functions require M here. + tristate config CRC7 - tristate "CRC7 functions" - help - This option is provided for the case where no in-kernel-tree - modules require CRC7 functions, but a module built outside - the kernel tree does. Such modules that use library CRC7 - functions require M here. + tristate config LIBCRC32C - tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check" + tristate select CRC32 help This option just selects CRC32 and is provided for compatibility purposes until the users are updated to select CRC32 directly. config CRC8 - tristate "CRC8 function" - help - This option provides CRC8 function. Drivers may select this - when they need to do cyclic redundancy check according CRC8 - algorithm. Module will be called crc8. + tristate config CRC_OPTIMIZATIONS bool "Enable optimized CRC implementations" if EXPERT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b1b92a9a8f24..c7086ed09cde 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2889,6 +2889,7 @@ config CRC_KUNIT_TEST tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS + select CRC7 select CRC16 select CRC_T10DIF select CRC32 diff --git a/lib/Makefile b/lib/Makefile index b752fcbc83d2..0fdcd6d635ad 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -159,7 +159,6 @@ obj-$(CONFIG_CRC64) += crc64.o obj-$(CONFIG_CRC4) += crc4.o obj-$(CONFIG_CRC7) += crc7.o obj-$(CONFIG_CRC8) += crc8.o -obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o obj-$(CONFIG_XXHASH) += xxhash.o obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o diff --git a/lib/crc32.c b/lib/crc32.c index ede6131f66fc..fddd424ff224 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -37,7 +37,7 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); -u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) +u32 crc32_le_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++]; @@ -45,20 +45,20 @@ u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_le_base); -u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) +u32 crc32c_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++]; return crc; } -EXPORT_SYMBOL(crc32c_le_base); +EXPORT_SYMBOL(crc32c_base); /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit * represents the highest power of x, and the msbit represents x^0. */ -static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus) +static u32 gf2_multiply(u32 x, u32 y, u32 modulus) { u32 product = x & 1 ? y : 0; int i; @@ -84,8 +84,7 @@ static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus) * as appending len bytes of zero to the data), in time proportional * to log(len). */ -static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, - u32 polynomial) +static u32 crc32_generic_shift(u32 crc, size_t len, u32 polynomial) { u32 power = polynomial; /* CRC of x^32 */ int i; @@ -114,19 +113,19 @@ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, return crc; } -u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) +u32 crc32_le_shift(u32 crc, size_t len) { return crc32_generic_shift(crc, len, CRC32_POLY_LE); } +EXPORT_SYMBOL(crc32_le_shift); -u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) +u32 crc32c_shift(u32 crc, size_t len) { return crc32_generic_shift(crc, len, CRC32C_POLY_LE); } -EXPORT_SYMBOL(crc32_le_shift); -EXPORT_SYMBOL(__crc32c_le_shift); +EXPORT_SYMBOL(crc32c_shift); -u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) +u32 crc32_be_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++]; diff --git a/lib/crc64-rocksoft.c b/lib/crc64-rocksoft.c deleted file mode 100644 index fc9ae0da5df7..000000000000 --- a/lib/crc64-rocksoft.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/crc64.h> -#include <linux/err.h> -#include <linux/init.h> -#include <crypto/hash.h> -#include <crypto/algapi.h> -#include <linux/static_key.h> -#include <linux/notifier.h> - -static struct crypto_shash __rcu *crc64_rocksoft_tfm; -static DEFINE_STATIC_KEY_TRUE(crc64_rocksoft_fallback); -static DEFINE_MUTEX(crc64_rocksoft_mutex); -static struct work_struct crc64_rocksoft_rehash_work; - -static int crc64_rocksoft_notify(struct notifier_block *self, unsigned long val, void *data) -{ - struct crypto_alg *alg = data; - - if (val != CRYPTO_MSG_ALG_LOADED || - strcmp(alg->cra_name, CRC64_ROCKSOFT_STRING)) - return NOTIFY_DONE; - - schedule_work(&crc64_rocksoft_rehash_work); - return NOTIFY_OK; -} - -static void crc64_rocksoft_rehash(struct work_struct *work) -{ - struct crypto_shash *new, *old; - - mutex_lock(&crc64_rocksoft_mutex); - old = rcu_dereference_protected(crc64_rocksoft_tfm, - lockdep_is_held(&crc64_rocksoft_mutex)); - new = crypto_alloc_shash(CRC64_ROCKSOFT_STRING, 0, 0); - if (IS_ERR(new)) { - mutex_unlock(&crc64_rocksoft_mutex); - return; - } - rcu_assign_pointer(crc64_rocksoft_tfm, new); - mutex_unlock(&crc64_rocksoft_mutex); - - if (old) { - synchronize_rcu(); - crypto_free_shash(old); - } else { - static_branch_disable(&crc64_rocksoft_fallback); - } -} - -static struct notifier_block crc64_rocksoft_nb = { - .notifier_call = crc64_rocksoft_notify, -}; - -u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len) -{ - struct { - struct shash_desc shash; - u64 crc; - } desc; - int err; - - if (static_branch_unlikely(&crc64_rocksoft_fallback)) - return crc64_rocksoft_generic(crc, buffer, len); - - rcu_read_lock(); - desc.shash.tfm = rcu_dereference(crc64_rocksoft_tfm); - desc.crc = crc; - err = crypto_shash_update(&desc.shash, buffer, len); - rcu_read_unlock(); - - BUG_ON(err); - - return desc.crc; -} -EXPORT_SYMBOL_GPL(crc64_rocksoft_update); - -u64 crc64_rocksoft(const unsigned char *buffer, size_t len) -{ - return crc64_rocksoft_update(0, buffer, len); -} -EXPORT_SYMBOL_GPL(crc64_rocksoft); - -static int __init crc64_rocksoft_mod_init(void) -{ - INIT_WORK(&crc64_rocksoft_rehash_work, crc64_rocksoft_rehash); - crypto_register_notifier(&crc64_rocksoft_nb); - crc64_rocksoft_rehash(&crc64_rocksoft_rehash_work); - return 0; -} - -static void __exit crc64_rocksoft_mod_fini(void) -{ - crypto_unregister_notifier(&crc64_rocksoft_nb); - cancel_work_sync(&crc64_rocksoft_rehash_work); - crypto_free_shash(rcu_dereference_protected(crc64_rocksoft_tfm, 1)); -} - -module_init(crc64_rocksoft_mod_init); -module_exit(crc64_rocksoft_mod_fini); - -static int crc64_rocksoft_transform_show(char *buffer, const struct kernel_param *kp) -{ - struct crypto_shash *tfm; - int len; - - if (static_branch_unlikely(&crc64_rocksoft_fallback)) - return sprintf(buffer, "fallback\n"); - - rcu_read_lock(); - tfm = rcu_dereference(crc64_rocksoft_tfm); - len = snprintf(buffer, PAGE_SIZE, "%s\n", - crypto_shash_driver_name(tfm)); - rcu_read_unlock(); - - return len; -} - -module_param_call(transform, NULL, crc64_rocksoft_transform_show, NULL, 0444); - -MODULE_AUTHOR("Keith Busch <kbusch@kernel.org>"); -MODULE_DESCRIPTION("Rocksoft model CRC64 calculation (library API)"); -MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc64"); diff --git a/lib/crc64.c b/lib/crc64.c index 61ae8dfb6a1c..5b1b17057f0a 100644 --- a/lib/crc64.c +++ b/lib/crc64.c @@ -22,8 +22,8 @@ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + * x^7 + x^4 + x + 1 * - * crc64rocksoft[256] table is from the Rocksoft specification polynomial - * defined as, + * crc64nvmetable[256] uses the CRC64 polynomial from the NVME NVM Command Set + * Specification and uses least-significant-bit first bit order: * * x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + x^47 + * x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + x^26 + x^23 + @@ -41,45 +41,18 @@ MODULE_DESCRIPTION("CRC64 calculations"); MODULE_LICENSE("GPL v2"); -/** - * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 - * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, - * or the previous crc64 value if computing incrementally. - * @p: pointer to buffer over which CRC64 is run - * @len: length of buffer @p - */ -u64 __pure crc64_be(u64 crc, const void *p, size_t len) +u64 crc64_be_generic(u64 crc, const u8 *p, size_t len) { - size_t i, t; - - const unsigned char *_p = p; - - for (i = 0; i < len; i++) { - t = ((crc >> 56) ^ (*_p++)) & 0xFF; - crc = crc64table[t] ^ (crc << 8); - } - + while (len--) + crc = (crc << 8) ^ crc64table[(crc >> 56) ^ *p++]; return crc; } -EXPORT_SYMBOL_GPL(crc64_be); +EXPORT_SYMBOL_GPL(crc64_be_generic); -/** - * crc64_rocksoft_generic - Calculate bitwise Rocksoft CRC64 - * @crc: seed value for computation. 0 for a new CRC calculation, or the - * previous crc64 value if computing incrementally. - * @p: pointer to buffer over which CRC64 is run - * @len: length of buffer @p - */ -u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len) +u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len) { - const unsigned char *_p = p; - size_t i; - - crc = ~crc; - - for (i = 0; i < len; i++) - crc = (crc >> 8) ^ crc64rocksofttable[(crc & 0xff) ^ *_p++]; - - return ~crc; + while (len--) + crc = (crc >> 8) ^ crc64nvmetable[(crc & 0xff) ^ *p++]; + return crc; } -EXPORT_SYMBOL_GPL(crc64_rocksoft_generic); +EXPORT_SYMBOL_GPL(crc64_nvme_generic); diff --git a/lib/crc7.c b/lib/crc7.c index 3848e313b722..8dd991cc6114 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -7,14 +7,13 @@ #include <linux/module.h> #include <linux/crc7.h> - /* * Table for CRC-7 (polynomial x^7 + x^3 + 1). * This is a big-endian CRC (msbit is highest power of x), * aligned so the msbit of the byte is the x^6 coefficient * and the lsbit is not used. */ -const u8 crc7_be_syndrome_table[256] = { +static const u8 crc7_be_syndrome_table[256] = { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, @@ -48,7 +47,6 @@ const u8 crc7_be_syndrome_table[256] = { 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2 }; -EXPORT_SYMBOL(crc7_be_syndrome_table); /** * crc7_be - update the CRC7 for the data buffer @@ -65,7 +63,7 @@ EXPORT_SYMBOL(crc7_be_syndrome_table); u8 crc7_be(u8 crc, const u8 *buffer, size_t len) { while (len--) - crc = crc7_be_byte(crc, *buffer++); + crc = crc7_be_syndrome_table[crc ^ *buffer++]; return crc; } EXPORT_SYMBOL(crc7_be); diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c index 55e222acd0b8..e05a4230a0a0 100644 --- a/lib/gen_crc64table.c +++ b/lib/gen_crc64table.c @@ -17,10 +17,10 @@ #include <stdio.h> #define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL -#define CRC64_ROCKSOFT_POLY 0x9A6C9329AC4BC9B5ULL +#define CRC64_NVME_POLY 0x9A6C9329AC4BC9B5ULL static uint64_t crc64_table[256] = {0}; -static uint64_t crc64_rocksoft_table[256] = {0}; +static uint64_t crc64_nvme_table[256] = {0}; static void generate_reflected_crc64_table(uint64_t table[256], uint64_t poly) { @@ -82,14 +82,14 @@ static void print_crc64_tables(void) printf("static const u64 ____cacheline_aligned crc64table[256] = {\n"); output_table(crc64_table); - printf("\nstatic const u64 ____cacheline_aligned crc64rocksofttable[256] = {\n"); - output_table(crc64_rocksoft_table); + printf("\nstatic const u64 ____cacheline_aligned crc64nvmetable[256] = {\n"); + output_table(crc64_nvme_table); } int main(int argc, char *argv[]) { generate_crc64_table(crc64_table, CRC64_ECMA182_POLY); - generate_reflected_crc64_table(crc64_rocksoft_table, CRC64_ROCKSOFT_POLY); + generate_reflected_crc64_table(crc64_nvme_table, CRC64_NVME_POLY); print_crc64_tables(); return 0; } diff --git a/lib/tests/crc_kunit.c b/lib/tests/crc_kunit.c index 6a61d4b5fd45..585c48b65cef 100644 --- a/lib/tests/crc_kunit.c +++ b/lib/tests/crc_kunit.c @@ -7,6 +7,7 @@ * Author: Eric Biggers <ebiggers@google.com> */ #include <kunit/test.h> +#include <linux/crc7.h> #include <linux/crc16.h> #include <linux/crc-t10dif.h> #include <linux/crc32.h> @@ -32,7 +33,9 @@ static size_t test_buflen; * @poly: The generator polynomial with the highest-order term omitted. * Bit-reversed if @le is true. * @func: The function to compute a CRC. The type signature uses u64 so that it - * can fit any CRC up to CRC-64. + * can fit any CRC up to CRC-64. The CRC is passed in, and is expected + * to be returned in, the least significant bits of the u64. The + * function is expected to *not* invert the CRC at the beginning and end. * @combine_func: Optional function to combine two CRCs. */ struct crc_variant { @@ -223,8 +226,9 @@ crc_benchmark(struct kunit *test, }; size_t len, i, j, num_iters; /* - * Some of the CRC library functions are marked as __pure, so use - * volatile to ensure that all calls are really made as intended. + * The CRC value that this function computes in a series of calls to + * crc_func is never actually used, so use volatile to ensure that the + * computations are done as intended and don't all get optimized out. */ volatile u64 crc = 0; u64 t; @@ -251,6 +255,33 @@ crc_benchmark(struct kunit *test, } } +/* crc7_be */ + +static u64 crc7_be_wrapper(u64 crc, const u8 *p, size_t len) +{ + /* + * crc7_be() left-aligns the 7-bit CRC in a u8, whereas the test wants a + * right-aligned CRC (in a u64). Convert between the conventions. + */ + return crc7_be(crc << 1, p, len) >> 1; +} + +static const struct crc_variant crc_variant_crc7_be = { + .bits = 7, + .poly = 0x9, + .func = crc7_be_wrapper, +}; + +static void crc7_be_test(struct kunit *test) +{ + crc_test(test, &crc_variant_crc7_be); +} + +static void crc7_be_benchmark(struct kunit *test) +{ + crc_benchmark(test, crc7_be_wrapper); +} + /* crc16 */ static u64 crc16_wrapper(u64 crc, const u8 *p, size_t len) @@ -362,7 +393,7 @@ static u64 crc32c_wrapper(u64 crc, const u8 *p, size_t len) static u64 crc32c_combine_wrapper(u64 crc1, u64 crc2, size_t len2) { - return __crc32c_le_combine(crc1, crc2, len2); + return crc32c_combine(crc1, crc2, len2); } static const struct crc_variant crc_variant_crc32c = { @@ -407,7 +438,34 @@ static void crc64_be_benchmark(struct kunit *test) crc_benchmark(test, crc64_be_wrapper); } +/* crc64_nvme */ + +static u64 crc64_nvme_wrapper(u64 crc, const u8 *p, size_t len) +{ + /* The inversions that crc64_nvme() does have to be undone here. */ + return ~crc64_nvme(~crc, p, len); +} + +static const struct crc_variant crc_variant_crc64_nvme = { + .bits = 64, + .le = true, + .poly = 0x9a6c9329ac4bc9b5, + .func = crc64_nvme_wrapper, +}; + +static void crc64_nvme_test(struct kunit *test) +{ + crc_test(test, &crc_variant_crc64_nvme); +} + +static void crc64_nvme_benchmark(struct kunit *test) +{ + crc_benchmark(test, crc64_nvme_wrapper); +} + static struct kunit_case crc_test_cases[] = { + KUNIT_CASE(crc7_be_test), + KUNIT_CASE(crc7_be_benchmark), KUNIT_CASE(crc16_test), KUNIT_CASE(crc16_benchmark), KUNIT_CASE(crc_t10dif_test), @@ -420,6 +478,8 @@ static struct kunit_case crc_test_cases[] = { KUNIT_CASE(crc32c_benchmark), KUNIT_CASE(crc64_be_test), KUNIT_CASE(crc64_be_benchmark), + KUNIT_CASE(crc64_nvme_test), + KUNIT_CASE(crc64_nvme_benchmark), {}, }; diff --git a/scripts/gen-crc-consts.py b/scripts/gen-crc-consts.py new file mode 100755 index 000000000000..f9b44fc3a03f --- /dev/null +++ b/scripts/gen-crc-consts.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Script that generates constants for computing the given CRC variant(s). +# +# Copyright 2025 Google LLC +# +# Author: Eric Biggers <ebiggers@google.com> + +import sys + +# XOR (add) an iterable of polynomials. +def xor(iterable): + res = 0 + for val in iterable: + res ^= val + return res + +# Multiply two polynomials. +def clmul(a, b): + return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0) + +# Polynomial division floor(a / b). +def div(a, b): + q = 0 + while a.bit_length() >= b.bit_length(): + q ^= 1 << (a.bit_length() - b.bit_length()) + a ^= b << (a.bit_length() - b.bit_length()) + return q + +# Reduce the polynomial 'a' modulo the polynomial 'b'. +def reduce(a, b): + return a ^ clmul(div(a, b), b) + +# Reflect the bits of a polynomial. +def bitreflect(poly, num_bits): + assert poly.bit_length() <= num_bits + return xor(((poly >> i) & 1) << (num_bits - 1 - i) for i in range(num_bits)) + +# Format a polynomial as hex. Bit-reflect it if the CRC is lsb-first. +def fmt_poly(variant, poly, num_bits): + if variant.lsb: + poly = bitreflect(poly, num_bits) + return f'0x{poly:0{2*num_bits//8}x}' + +# Print a pair of 64-bit polynomial multipliers. They are always passed in the +# order [HI64_TERMS, LO64_TERMS] but will be printed in the appropriate order. +def print_mult_pair(variant, mults): + mults = list(mults if variant.lsb else reversed(mults)) + terms = ['HI64_TERMS', 'LO64_TERMS'] if variant.lsb else ['LO64_TERMS', 'HI64_TERMS'] + for i in range(2): + print(f'\t\t{fmt_poly(variant, mults[i]["val"], 64)},\t/* {terms[i]}: {mults[i]["desc"]} */') + +# Pretty-print a polynomial. +def pprint_poly(prefix, poly): + terms = [f'x^{i}' for i in reversed(range(poly.bit_length())) + if (poly & (1 << i)) != 0] + j = 0 + while j < len(terms): + s = prefix + terms[j] + (' +' if j < len(terms) - 1 else '') + j += 1 + while j < len(terms) and len(s) < 73: + s += ' ' + terms[j] + (' +' if j < len(terms) - 1 else '') + j += 1 + print(s) + prefix = ' * ' + (' ' * (len(prefix) - 3)) + +# Print a comment describing constants generated for the given CRC variant. +def print_header(variant, what): + print('/*') + s = f'{"least" if variant.lsb else "most"}-significant-bit-first CRC-{variant.bits}' + print(f' * {what} generated for {s} using') + pprint_poly(' * G(x) = ', variant.G) + print(' */') + +class CrcVariant: + def __init__(self, bits, generator_poly, bit_order): + self.bits = bits + if bit_order not in ['lsb', 'msb']: + raise ValueError('Invalid value for bit_order') + self.lsb = bit_order == 'lsb' + self.name = f'crc{bits}_{bit_order}_0x{generator_poly:0{(2*bits+7)//8}x}' + if self.lsb: + generator_poly = bitreflect(generator_poly, bits) + self.G = generator_poly ^ (1 << bits) + +# Generate tables for CRC computation using the "slice-by-N" method. +# N=1 corresponds to the traditional byte-at-a-time table. +def gen_slicebyN_tables(variants, n): + for v in variants: + print('') + print_header(v, f'Slice-by-{n} CRC table') + print(f'static const u{v.bits} __maybe_unused {v.name}_table[{256*n}] = {{') + s = '' + for i in range(256 * n): + # The i'th table entry is the CRC of the message consisting of byte + # i % 256 followed by i // 256 zero bytes. + poly = (bitreflect(i % 256, 8) if v.lsb else i % 256) << (v.bits + 8*(i//256)) + next_entry = fmt_poly(v, reduce(poly, v.G), v.bits) + ',' + if len(s + next_entry) > 71: + print(f'\t{s}') + s = '' + s += (' ' if s else '') + next_entry + if s: + print(f'\t{s}') + print('};') + +def print_riscv_const(v, bits_per_long, name, val, desc): + print(f'\t.{name} = {fmt_poly(v, val, bits_per_long)}, /* {desc} */') + +def do_gen_riscv_clmul_consts(v, bits_per_long): + (G, n, lsb) = (v.G, v.bits, v.lsb) + + pow_of_x = 3 * bits_per_long - (1 if lsb else 0) + print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_hi', + reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G') + pow_of_x = 2 * bits_per_long - (1 if lsb else 0) + print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_lo', + reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G') + + pow_of_x = bits_per_long - 1 + n + print_riscv_const(v, bits_per_long, 'barrett_reduction_const_1', + div(1 << pow_of_x, G), f'floor(x^{pow_of_x} / G)') + + val = G - (1 << n) + desc = f'G - x^{n}' + if lsb: + val <<= bits_per_long - n + desc = f'({desc}) * x^{bits_per_long - n}' + print_riscv_const(v, bits_per_long, 'barrett_reduction_const_2', val, desc) + +def gen_riscv_clmul_consts(variants): + print('') + print('struct crc_clmul_consts {'); + print('\tunsigned long fold_across_2_longs_const_hi;'); + print('\tunsigned long fold_across_2_longs_const_lo;'); + print('\tunsigned long barrett_reduction_const_1;'); + print('\tunsigned long barrett_reduction_const_2;'); + print('};'); + for v in variants: + print(''); + if v.bits > 32: + print_header(v, 'Constants') + print('#ifdef CONFIG_64BIT') + print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{') + do_gen_riscv_clmul_consts(v, 64) + print('};') + print('#endif') + else: + print_header(v, 'Constants') + print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{') + print('#ifdef CONFIG_64BIT') + do_gen_riscv_clmul_consts(v, 64) + print('#else') + do_gen_riscv_clmul_consts(v, 32) + print('#endif') + print('};') + +# Generate constants for carryless multiplication based CRC computation. +def gen_x86_pclmul_consts(variants): + # These are the distances, in bits, to generate folding constants for. + FOLD_DISTANCES = [2048, 1024, 512, 256, 128] + + for v in variants: + (G, n, lsb) = (v.G, v.bits, v.lsb) + print('') + print_header(v, 'CRC folding constants') + print('static const struct {') + if not lsb: + print('\tu8 bswap_mask[16];') + for i in FOLD_DISTANCES: + print(f'\tu64 fold_across_{i}_bits_consts[2];') + print('\tu8 shuf_table[48];') + print('\tu64 barrett_reduction_consts[2];') + print(f'}} {v.name}_consts ____cacheline_aligned __maybe_unused = {{') + + # Byte-reflection mask, needed for msb-first CRCs + if not lsb: + print('\t.bswap_mask = {' + ', '.join(str(i) for i in reversed(range(16))) + '},') + + # Fold constants for all distances down to 128 bits + for i in FOLD_DISTANCES: + print(f'\t.fold_across_{i}_bits_consts = {{') + # Given 64x64 => 128 bit carryless multiplication instructions, two + # 64-bit fold constants are needed per "fold distance" i: one for + # HI64_TERMS that is basically x^(i+64) mod G and one for LO64_TERMS + # that is basically x^i mod G. The exact values however undergo a + # couple adjustments, described below. + mults = [] + for j in [64, 0]: + pow_of_x = i + j + if lsb: + # Each 64x64 => 128 bit carryless multiplication instruction + # actually generates a 127-bit product in physical bits 0 + # through 126, which in the lsb-first case represent the + # coefficients of x^1 through x^127, not x^0 through x^126. + # Thus in the lsb-first case, each such instruction + # implicitly adds an extra factor of x. The below removes a + # factor of x from each constant to compensate for this. + # For n < 64 the x could be removed from either the reduced + # part or unreduced part, but for n == 64 the reduced part + # is the only option. Just always use the reduced part. + pow_of_x -= 1 + # Make a factor of x^(64-n) be applied unreduced rather than + # reduced, to cause the product to use only the x^(64-n) and + # higher terms and always be zero in the lower terms. Usually + # this makes no difference as it does not affect the product's + # congruence class mod G and the constant remains 64-bit, but + # part of the final reduction from 128 bits does rely on this + # property when it reuses one of the constants. + pow_of_x -= 64 - n + mults.append({ 'val': reduce(1 << pow_of_x, G) << (64 - n), + 'desc': f'(x^{pow_of_x} mod G) * x^{64-n}' }) + print_mult_pair(v, mults) + print('\t},') + + # Shuffle table for handling 1..15 bytes at end + print('\t.shuf_table = {') + print('\t\t' + (16*'-1, ').rstrip()) + print('\t\t' + ''.join(f'{i:2}, ' for i in range(16)).rstrip()) + print('\t\t' + (16*'-1, ').rstrip()) + print('\t},') + + # Barrett reduction constants for reducing 128 bits to the final CRC + print('\t.barrett_reduction_consts = {') + mults = [] + + val = div(1 << (63+n), G) + desc = f'floor(x^{63+n} / G)' + if not lsb: + val = (val << 1) - (1 << 64) + desc = f'({desc} * x) - x^64' + mults.append({ 'val': val, 'desc': desc }) + + val = G - (1 << n) + desc = f'G - x^{n}' + if lsb and n == 64: + assert (val & 1) != 0 # The x^0 term should always be nonzero. + val >>= 1 + desc = f'({desc} - x^0) / x' + else: + pow_of_x = 64 - n - (1 if lsb else 0) + val <<= pow_of_x + desc = f'({desc}) * x^{pow_of_x}' + mults.append({ 'val': val, 'desc': desc }) + + print_mult_pair(v, mults) + print('\t},') + + print('};') + +def parse_crc_variants(vars_string): + variants = [] + for var_string in vars_string.split(','): + bits, bit_order, generator_poly = var_string.split('_') + assert bits.startswith('crc') + bits = int(bits.removeprefix('crc')) + assert generator_poly.startswith('0x') + generator_poly = generator_poly.removeprefix('0x') + assert len(generator_poly) % 2 == 0 + generator_poly = int(generator_poly, 16) + variants.append(CrcVariant(bits, generator_poly, bit_order)) + return variants + +if len(sys.argv) != 3: + sys.stderr.write(f'Usage: {sys.argv[0]} CONSTS_TYPE[,CONSTS_TYPE]... CRC_VARIANT[,CRC_VARIANT]...\n') + sys.stderr.write(' CONSTS_TYPE can be sliceby[1-8], riscv_clmul, or x86_pclmul\n') + sys.stderr.write(' CRC_VARIANT is crc${num_bits}_${bit_order}_${generator_poly_as_hex}\n') + sys.stderr.write(' E.g. crc16_msb_0x8bb7 or crc32_lsb_0xedb88320\n') + sys.stderr.write(' Polynomial must use the given bit_order and exclude x^{num_bits}\n') + sys.exit(1) + +print('/* SPDX-License-Identifier: GPL-2.0-or-later */') +print('/*') +print(' * CRC constants generated by:') +print(' *') +print(f' *\t{sys.argv[0]} {" ".join(sys.argv[1:])}') +print(' *') +print(' * Do not edit manually.') +print(' */') +consts_types = sys.argv[1].split(',') +variants = parse_crc_variants(sys.argv[2]) +for consts_type in consts_types: + if consts_type.startswith('sliceby'): + gen_slicebyN_tables(variants, int(consts_type.removeprefix('sliceby'))) + elif consts_type == 'riscv_clmul': + gen_riscv_clmul_consts(variants) + elif consts_type == 'x86_pclmul': + gen_x86_pclmul_consts(variants) + else: + raise ValueError(f'Unknown consts_type: {consts_type}') diff --git a/sound/soc/codecs/aw88395/aw88395_device.c b/sound/soc/codecs/aw88395/aw88395_device.c index 6b333d1c6e94..b7ea8be0d0cb 100644 --- a/sound/soc/codecs/aw88395/aw88395_device.c +++ b/sound/soc/codecs/aw88395/aw88395_device.c @@ -424,7 +424,7 @@ static int aw_dev_dsp_set_crc32(struct aw_device *aw_dev) return -EINVAL; } - crc_value = __crc32c_le(0xFFFFFFFF, crc_dsp_cfg->data, crc_data_len) ^ 0xFFFFFFFF; + crc_value = crc32c(0xFFFFFFFF, crc_dsp_cfg->data, crc_data_len) ^ 0xFFFFFFFF; return aw_dev_dsp_write(aw_dev, AW88395_DSP_REG_CRC_ADDR, crc_value, AW88395_DSP_32_DATA); diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index 348e8bef62c7..e3cec3723ffa 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -46,7 +46,6 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context) } static char *drivers[] = { - "crct10dif-arm64", "sha1-ce", "sha224-arm64", "sha224-arm64-neon", |