summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Ghiti <alexghiti@rivosinc.com>2025-05-06 08:19:57 +0000
committerPalmer Dabbelt <palmer@dabbelt.com>2025-06-05 12:21:59 -0700
commit847689d2a0c4b4778459c387ec561465e521d963 (patch)
tree6ac43733a689227828655f4ffa5e8e9283744b9e
parent415a8c81da3dab0a585bd4f8d505a11ad5a171a7 (diff)
parenteb87e56d651d0a72009842bc0d8eeae7b605c97d (diff)
Merge patch series "riscv: Add Zicbop & prefetchw support"
Alexandre Ghiti <alexghiti@rivosinc.com> says: I found this lost series developed by Guo so here is a respin with the comments on v2 applied. This patch series adds Zicbop support and then enables the Linux prefetch features. * patches from https://lore.kernel.org/r/20250421142441.395849-1-alexghiti@rivosinc.com: riscv: xchg: Prefetch the destination word for sc.w riscv: Add ARCH_HAS_PREFETCH[W] support with Zicbop riscv: Add support for Zicbop riscv: Introduce Zicbop instructions Link: https://lore.kernel.org/r/20250421142441.395849-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
-rw-r--r--arch/riscv/Kconfig15
-rw-r--r--arch/riscv/include/asm/barrier.h5
-rw-r--r--arch/riscv/include/asm/cacheflush.h1
-rw-r--r--arch/riscv/include/asm/cmpxchg.h4
-rw-r--r--arch/riscv/include/asm/hwcap.h1
-rw-r--r--arch/riscv/include/asm/insn-def.h66
-rw-r--r--arch/riscv/include/asm/processor.h25
-rw-r--r--arch/riscv/kernel/cpufeature.c21
-rw-r--r--arch/riscv/mm/cacheflush.c14
9 files changed, 142 insertions, 10 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 78640cd353fd..a93af30727ee 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -847,6 +847,21 @@ config RISCV_ISA_ZICBOZ
If you don't know what to do here, say Y.
+config RISCV_ISA_ZICBOP
+ bool "Zicbop extension support for cache block prefetch"
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the ZICBOP
+ extension (Cache Block Prefetch Operations) and enable its
+ usage.
+
+ The Zicbop extension can be used to prefetch cache blocks for
+ read/write fetch.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
def_bool y
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index e1d9bf1deca6..b8c5726d86ac 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -14,11 +14,6 @@
#include <asm/cmpxchg.h>
#include <asm/fence.h>
-#define nop() __asm__ __volatile__ ("nop")
-#define __nops(n) ".rept " #n "\nnop\n.endr\n"
-#define nops(n) __asm__ __volatile__ (__nops(n))
-
-
/* These barriers need to enforce ordering on both devices or memory. */
#define __mb() RISCV_FENCE(iorw, iorw)
#define __rmb() RISCV_FENCE(ir, ir)
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 8de73f91bfa3..effa02c2e682 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -80,6 +80,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
extern unsigned int riscv_cbom_block_size;
extern unsigned int riscv_cboz_block_size;
+extern unsigned int riscv_cbop_block_size;
void riscv_init_cbo_blocksizes(void);
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 2ec119eb147b..0b749e710216 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -13,6 +13,7 @@
#include <asm/hwcap.h>
#include <asm/insn-def.h>
#include <asm/cpufeature-macros.h>
+#include <asm/processor.h>
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
swap_append, r, p, n) \
@@ -37,6 +38,7 @@
\
__asm__ __volatile__ ( \
prepend \
+ PREFETCHW_ASM(%5) \
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
@@ -44,7 +46,7 @@
" bnez %1, 0b\n" \
sc_append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" (__newx), "rJ" (~__mask) \
+ : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e3cbf203cdde..affd63e11b0a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -105,6 +105,7 @@
#define RISCV_ISA_EXT_ZVFBFWMA 96
#define RISCV_ISA_EXT_ZAAMO 97
#define RISCV_ISA_EXT_ZALRSC 98
+#define RISCV_ISA_EXT_ZICBOP 99
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index 71060a2f838e..d5adbaec1d01 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -18,6 +18,13 @@
#define INSN_I_RD_SHIFT 7
#define INSN_I_OPCODE_SHIFT 0
+#define INSN_S_SIMM7_SHIFT 25
+#define INSN_S_RS2_SHIFT 20
+#define INSN_S_RS1_SHIFT 15
+#define INSN_S_FUNC3_SHIFT 12
+#define INSN_S_SIMM5_SHIFT 7
+#define INSN_S_OPCODE_SHIFT 0
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_AS_HAS_INSN
@@ -30,6 +37,10 @@
.insn i \opcode, \func3, \rd, \rs1, \simm12
.endm
+ .macro insn_s, opcode, func3, rs2, simm12, rs1
+ .insn s \opcode, \func3, \rs2, \simm12(\rs1)
+ .endm
+
#else
#include <asm/gpr-num.h>
@@ -51,10 +62,20 @@
(\simm12 << INSN_I_SIMM12_SHIFT))
.endm
+ .macro insn_s, opcode, func3, rs2, simm12, rs1
+ .4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \
+ (\func3 << INSN_S_FUNC3_SHIFT) | \
+ (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \
+ (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \
+ ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \
+ (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
+ .endm
+
#endif
#define __INSN_R(...) insn_r __VA_ARGS__
#define __INSN_I(...) insn_i __VA_ARGS__
+#define __INSN_S(...) insn_s __VA_ARGS__
#else /* ! __ASSEMBLY__ */
@@ -66,6 +87,9 @@
#define __INSN_I(opcode, func3, rd, rs1, simm12) \
".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
+#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
+ ".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
+
#else
#include <linux/stringify.h>
@@ -92,12 +116,26 @@
" (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
" .endm\n"
+#define DEFINE_INSN_S \
+ __DEFINE_ASM_GPR_NUMS \
+" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \
+" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \
+" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \
+" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
+" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
+" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
+" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
+" .endm\n"
+
#define UNDEFINE_INSN_R \
" .purgem insn_r\n"
#define UNDEFINE_INSN_I \
" .purgem insn_i\n"
+#define UNDEFINE_INSN_S \
+" .purgem insn_s\n"
+
#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \
DEFINE_INSN_R \
"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
@@ -108,6 +146,11 @@
"insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
UNDEFINE_INSN_I
+#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
+ DEFINE_INSN_S \
+ "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \
+ UNDEFINE_INSN_S
+
#endif
#endif /* ! __ASSEMBLY__ */
@@ -120,6 +163,10 @@
__INSN_I(RV_##opcode, RV_##func3, RV_##rd, \
RV_##rs1, RV_##simm12)
+#define INSN_S(opcode, func3, rs2, simm12, rs1) \
+ __INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \
+ RV_##simm12, RV_##rs1)
+
#define RV_OPCODE(v) __ASM_STR(v)
#define RV_FUNC3(v) __ASM_STR(v)
#define RV_FUNC7(v) __ASM_STR(v)
@@ -133,6 +180,7 @@
#define RV___RS2(v) __RV_REG(v)
#define RV_OPCODE_MISC_MEM RV_OPCODE(15)
+#define RV_OPCODE_OP_IMM RV_OPCODE(19)
#define RV_OPCODE_SYSTEM RV_OPCODE(115)
#define HFENCE_VVMA(vaddr, asid) \
@@ -196,6 +244,18 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(4))
+#define PREFETCH_I(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
+#define PREFETCH_R(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
+#define PREFETCH_W(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
#define RISCV_PAUSE ".4byte 0x100000f"
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
@@ -203,4 +263,10 @@
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
+#ifndef __ASSEMBLY__
+#define nop() __asm__ __volatile__ ("nop")
+#define __nops(n) ".rept " #n "\nnop\n.endr\n"
+#define nops(n) __asm__ __volatile__ (__nops(n))
+#endif
+
#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 9c1cc716b891..e12c14fea340 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -13,6 +13,9 @@
#include <vdso/processor.h>
#include <asm/ptrace.h>
+#include <asm/insn-def.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
#define arch_get_mmap_end(addr, len, flags) \
({ \
@@ -52,7 +55,6 @@
#endif
#ifndef __ASSEMBLY__
-#include <linux/cpumask.h>
struct task_struct;
struct pt_regs;
@@ -141,6 +143,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
+#define PREFETCH_ASM(x) \
+ ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \
+ RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+#define PREFETCHW_ASM(x) \
+ ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \
+ RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+#ifdef CONFIG_RISCV_ISA_ZICBOP
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x)
+{
+ __asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory");
+}
+
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x)
+{
+ __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
+}
+#endif /* CONFIG_RISCV_ISA_ZICBOP */
/* Do necessary setup to start up a newly executed thread. */
extern void start_thread(struct pt_regs *regs,
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 2054f6c4b0ae..743d53415572 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -32,6 +32,7 @@
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
static bool any_cpu_has_zicboz;
+static bool any_cpu_has_zicbop;
static bool any_cpu_has_zicbom;
unsigned long elf_hwcap __read_mostly;
@@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
return 0;
}
+static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!riscv_cbop_block_size) {
+ pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
+ return -EINVAL;
+ }
+ if (!is_power_of_2(riscv_cbop_block_size)) {
+ pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
+ return -EINVAL;
+ }
+ any_cpu_has_zicbop = true;
+ return 0;
+}
+
static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
const unsigned long *isa_bitmap)
{
@@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
__RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
@@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void)
current->thread.envcfg |= ENVCFG_CBCFE;
else if (any_cpu_has_zicbom)
pr_warn("Zicbom disabled as it is unavailable on some harts\n");
+
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
+ any_cpu_has_zicbop)
+ pr_warn("Zicbop disabled as it is unavailable on some harts\n");
}
#ifdef CONFIG_RISCV_ALTERNATIVE
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index b2e4b81763f8..bf9a3b5aae73 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -114,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
unsigned int riscv_cboz_block_size;
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
+unsigned int riscv_cbop_block_size;
+EXPORT_SYMBOL_GPL(riscv_cbop_block_size);
+
static void __init cbo_get_block_size(struct device_node *node,
const char *name, u32 *block_size,
unsigned long *first_hartid)
@@ -138,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node,
void __init riscv_init_cbo_blocksizes(void)
{
- unsigned long cbom_hartid, cboz_hartid;
- u32 cbom_block_size = 0, cboz_block_size = 0;
+ unsigned long cbom_hartid, cboz_hartid, cbop_hartid;
+ u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0;
struct device_node *node;
struct acpi_table_header *rhct;
acpi_status status;
@@ -151,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void)
&cbom_block_size, &cbom_hartid);
cbo_get_block_size(node, "riscv,cboz-block-size",
&cboz_block_size, &cboz_hartid);
+ cbo_get_block_size(node, "riscv,cbop-block-size",
+ &cbop_block_size, &cbop_hartid);
}
} else {
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
if (ACPI_FAILURE(status))
return;
- acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
+ acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size);
acpi_put_table((struct acpi_table_header *)rhct);
}
@@ -166,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
+
+ if (cbop_block_size)
+ riscv_cbop_block_size = cbop_block_size;
}
#ifdef CONFIG_SMP