summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap3
-rw-r--r--Documentation/arch/riscv/cmodx.rst46
-rw-r--r--Documentation/arch/riscv/hwprobe.rst26
-rw-r--r--Documentation/devicetree/bindings/riscv/extensions.yaml25
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml7
-rw-r--r--Documentation/filesystems/overlayfs.rst7
-rw-r--r--MAINTAINERS35
-rw-r--r--arch/riscv/Kconfig38
-rw-r--r--arch/riscv/Kconfig.vendor13
-rw-r--r--arch/riscv/Makefile4
-rw-r--r--arch/riscv/configs/defconfig24
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h2
-rw-r--r--arch/riscv/include/asm/barrier.h5
-rw-r--r--arch/riscv/include/asm/cacheflush.h1
-rw-r--r--arch/riscv/include/asm/cmpxchg.h4
-rw-r--r--arch/riscv/include/asm/cpufeature.h14
-rw-r--r--arch/riscv/include/asm/ftrace.h62
-rw-r--r--arch/riscv/include/asm/hwcap.h1
-rw-r--r--arch/riscv/include/asm/hwprobe.h3
-rw-r--r--arch/riscv/include/asm/image.h2
-rw-r--r--arch/riscv/include/asm/insn-def.h66
-rw-r--r--arch/riscv/include/asm/kexec.h6
-rw-r--r--arch/riscv/include/asm/pgtable-64.h5
-rw-r--r--arch/riscv/include/asm/pgtable.h97
-rw-r--r--arch/riscv/include/asm/processor.h31
-rw-r--r--arch/riscv/include/asm/ptrace.h2
-rw-r--r--arch/riscv/include/asm/sbi.h60
-rw-r--r--arch/riscv/include/asm/tlbflush.h2
-rw-r--r--arch/riscv/include/asm/uaccess.h218
-rw-r--r--arch/riscv/include/asm/vdso/getrandom.h30
-rw-r--r--arch/riscv/include/asm/vector.h22
-rw-r--r--arch/riscv/include/asm/vendor_extensions/sifive.h16
-rw-r--r--arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h19
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h2
-rw-r--r--arch/riscv/include/uapi/asm/vendor/sifive.h6
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/asm-offsets.c18
-rw-r--r--arch/riscv/kernel/cpufeature.c21
-rw-r--r--arch/riscv/kernel/elf_kexec.c485
-rw-r--r--arch/riscv/kernel/entry.S9
-rw-r--r--arch/riscv/kernel/ftrace.c242
-rw-r--r--arch/riscv/kernel/kexec_elf.c144
-rw-r--r--arch/riscv/kernel/kexec_image.c96
-rw-r--r--arch/riscv/kernel/machine_kexec_file.c361
-rw-r--r--arch/riscv/kernel/mcount-dyn.S117
-rw-r--r--arch/riscv/kernel/module-sections.c81
-rw-r--r--arch/riscv/kernel/process.c2
-rw-r--r--arch/riscv/kernel/sbi.c81
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c6
-rw-r--r--arch/riscv/kernel/traps_misaligned.c116
-rw-r--r--arch/riscv/kernel/unaligned_access_speed.c8
-rw-r--r--arch/riscv/kernel/vdso.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile15
-rw-r--r--arch/riscv/kernel/vdso/getrandom.c10
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S3
-rw-r--r--arch/riscv/kernel/vdso/vgetrandom-chacha.S249
-rw-r--r--arch/riscv/kernel/vendor_extensions.c10
-rw-r--r--arch/riscv/kernel/vendor_extensions/Makefile2
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive.c21
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c22
-rw-r--r--arch/riscv/lib/riscv_v_helpers.c11
-rw-r--r--arch/riscv/lib/uaccess.S50
-rw-r--r--arch/riscv/lib/uaccess_vector.S15
-rw-r--r--arch/riscv/mm/cacheflush.c29
-rw-r--r--arch/riscv/mm/pgtable.c10
-rw-r--r--arch/riscv/mm/tlbflush.c38
-rw-r--r--arch/s390/kvm/gaccess.c8
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--drivers/crypto/Kconfig1
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/commsup.c10
-rw-r--r--drivers/scsi/scsi_devinfo.c2
-rw-r--r--drivers/spi/spi-bcm63xx-hsspi.c2
-rw-r--r--drivers/spi/spi-bcm63xx.c2
-rw-r--r--drivers/spi/spi-pci1xxxx.c48
-rw-r--r--drivers/spi/spi-qpic-snand.c14
-rw-r--r--drivers/ufs/core/ufshcd.c3
-rw-r--r--drivers/ufs/host/ufs-qcom.c141
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/export.c21
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/overlayfs/file.c4
-rw-r--r--fs/overlayfs/namei.c98
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/params.c40
-rw-r--r--fs/overlayfs/readdir.c4
-rw-r--r--fs/overlayfs/util.c9
-rw-r--r--include/linux/codetag.h8
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/hugetlb.h3
-rw-r--r--include/linux/raid/pq.h5
-rw-r--r--kernel/module/main.c5
-rw-r--r--kernel/rcu/tree_stall.h26
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--lib/alloc_tag.c12
-rw-r--r--lib/codetag.c34
-rw-r--r--lib/iov_iter.c2
-rw-r--r--lib/raid6/Makefile1
-rw-r--r--lib/raid6/algos.c9
-rw-r--r--lib/raid6/recov_rvv.c229
-rw-r--r--lib/raid6/rvv.c1212
-rw-r--r--lib/raid6/rvv.h39
-rw-r--r--mm/damon/modules-common.c2
-rw-r--r--mm/damon/modules-common.h2
-rw-r--r--mm/damon/ops-common.c2
-rw-r--r--mm/damon/ops-common.h2
-rw-r--r--mm/damon/paddr.c2
-rw-r--r--mm/damon/sysfs-common.c2
-rw-r--r--mm/damon/sysfs-common.h2
-rw-r--r--mm/damon/vaddr.c2
-rw-r--r--mm/hugetlb.c67
-rw-r--r--mm/kmsan/kmsan_test.c1
-rw-r--r--mm/madvise.c5
-rw-r--r--mm/mempolicy.c4
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/vma.c27
-rw-r--r--mm/vma.h7
-rw-r--r--mm/vmstat.c1
-rw-r--r--tools/perf/util/symbol-elf.c6
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c32
-rw-r--r--tools/testing/selftests/mm/merge.c43
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c6
-rw-r--r--tools/testing/selftests/mm/vm_util.c38
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/vDSO/vgetrandom-chacha.S2
-rw-r--r--tools/testing/vma/vma_internal.h2
127 files changed, 4351 insertions, 1106 deletions
diff --git a/.mailmap b/.mailmap
index 77bb62564ef7..119049034708 100644
--- a/.mailmap
+++ b/.mailmap
@@ -107,7 +107,8 @@ Asahi Lina <lina+kernel@asahilina.net> <lina@asahilina.net>
Ashok Raj Nagarajan <quic_arnagara@quicinc.com> <arnagara@codeaurora.org>
Ashwin Chaugule <quic_ashwinc@quicinc.com> <ashwinc@codeaurora.org>
Asutosh Das <quic_asutoshd@quicinc.com> <asutoshd@codeaurora.org>
-Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
+Atish Patra <atish.patra@linux.dev> <atishp@atishpatra.org>
+Atish Patra <atish.patra@linux.dev> <atish.patra@wdc.com>
Avaneesh Kumar Dwivedi <quic_akdwived@quicinc.com> <akdwived@codeaurora.org>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
index 8c48bcff3df9..40ba53bed5df 100644
--- a/Documentation/arch/riscv/cmodx.rst
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -10,13 +10,45 @@ modified by the program itself. Instruction storage and the instruction cache
program must enforce its own synchronization with the unprivileged fence.i
instruction.
-However, the default Linux ABI prohibits the use of fence.i in userspace
-applications. At any point the scheduler may migrate a task onto a new hart. If
-migration occurs after the userspace synchronized the icache and instruction
-storage with fence.i, the icache on the new hart will no longer be clean. This
-is due to the behavior of fence.i only affecting the hart that it is called on.
-Thus, the hart that the task has been migrated to may not have synchronized
-instruction storage and icache.
+CMODX in the Kernel Space
+-------------------------
+
+Dynamic ftrace
+---------------------
+
+Essentially, dynamic ftrace directs the control flow by inserting a function
+call at each patchable function entry, and patches it dynamically at runtime to
+enable or disable the redirection. In the case of RISC-V, 2 instructions,
+AUIPC + JALR, are required to compose a function call. However, it is impossible
+to patch 2 instructions and expect that a concurrent read-side executes them
+without a race condition. This series makes atmoic code patching possible in
+RISC-V ftrace. Kernel preemption makes things even worse as it allows the old
+state to persist across the patching process with stop_machine().
+
+In order to get rid of stop_machine() and run dynamic ftrace with full kernel
+preemption, we partially initialize each patchable function entry at boot-time,
+setting the first instruction to AUIPC, and the second to NOP. Now, atmoic
+patching is possible because the kernel only has to update one instruction.
+According to Ziccif, as long as an instruction is naturally aligned, the ISA
+guarantee an atomic update.
+
+By fixing down the first instruction, AUIPC, the range of the ftrace trampoline
+is limited to +-2K from the predetermined target, ftrace_caller, due to the lack
+of immediate encoding space in RISC-V. To address the issue, we introduce
+CALL_OPS, where an 8B naturally align metadata is added in front of each
+pacthable function. The metadata is resolved at the first trampoline, then the
+execution can be derect to another custom trampoline.
+
+CMODX in the User Space
+-----------------------
+
+Though fence.i is an unprivileged instruction, the default Linux ABI prohibits
+the use of fence.i in userspace applications. At any point the scheduler may
+migrate a task onto a new hart. If migration occurs after the userspace
+synchronized the icache and instruction storage with fence.i, the icache on the
+new hart will no longer be clean. This is due to the behavior of fence.i only
+affecting the hart that it is called on. Thus, the hart that the task has been
+migrated to may not have synchronized instruction storage and icache.
There are two ways to solve this problem: use the riscv_flush_icache() syscall,
or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index f60bf5991755..2aa9be272d5d 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -271,6 +271,10 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as
ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZABHA`: The Zabha extension is supported as
+ ratified in commit 49f49c842ff9 ("Update to Rafified state") of
+ riscv-zabha.
+
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
:c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was
mistakenly classified as a bitmask rather than a value.
@@ -335,3 +339,25 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which
represents the size of the Zicbom block in bytes.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0`: A bitmask containing the
+ sifive vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * SIFIVE
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD`: The Xsfqmaccdod vendor
+ extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
+ Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ`: The Xsfqmaccqoq vendor
+ extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
+ Instruction Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF`: The Xsfvfnrclipxfqf
+ vendor extension is supported in version 1.0 of SiFive FP32-to-int8 Ranged
+ Clip Instructions Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq
+ vendor extension is supported in version 1.0 of Matrix Multiply Accumulate
+ Instruction Extensions Specification. \ No newline at end of file
diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
index bcab59e0cc2e..ede6a58ccf53 100644
--- a/Documentation/devicetree/bindings/riscv/extensions.yaml
+++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
@@ -662,6 +662,31 @@ properties:
Registers in the AX45MP datasheet.
https://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
+ # SiFive
+ - const: xsfvqmaccdod
+ description:
+ SiFive Int8 Matrix Multiplication Extensions Specification.
+ See more details in
+ https://www.sifive.com/document-file/sifive-int8-matrix-multiplication-extensions-specification
+
+ - const: xsfvqmaccqoq
+ description:
+ SiFive Int8 Matrix Multiplication Extensions Specification.
+ See more details in
+ https://www.sifive.com/document-file/sifive-int8-matrix-multiplication-extensions-specification
+
+ - const: xsfvfnrclipxfqf
+ description:
+ SiFive FP32-to-int8 Ranged Clip Instructions Extensions Specification.
+ See more details in
+ https://www.sifive.com/document-file/fp32-to-int8-ranged-clip-instructions
+
+ - const: xsfvfwmaccqqq
+ description:
+ SiFive Matrix Multiply Accumulate Instruction Extensions Specification.
+ See more details in
+ https://www.sifive.com/document-file/matrix-multiply-accumulate-instruction
+
# T-HEAD
- const: xtheadvector
description:
diff --git a/Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml b/Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml
index 948ff7a09643..66e54dedab14 100644
--- a/Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml
@@ -14,7 +14,12 @@ allOf:
properties:
compatible:
- const: sophgo,sg2044-spifmc-nor
+ oneOf:
+ - const: sophgo,sg2044-spifmc-nor
+ - items:
+ - enum:
+ - sophgo,sg2042-spifmc-nor
+ - const: sophgo,sg2044-spifmc-nor
reg:
maxItems: 1
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 2db379b4b31e..4133a336486d 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -443,6 +443,13 @@ Only the data of the files in the "data-only" lower layers may be visible
when a "metacopy" file in one of the lower layers above it, has a "redirect"
to the absolute path of the "lower data" file in the "data-only" lower layer.
+Instead of explicitly enabling "metacopy=on" it is sufficient to specify at
+least one data-only layer to enable redirection of data to a data-only layer.
+In this case other forms of metacopy are rejected. Note: this way data-only
+layers may be used toghether with "userxattr", in which case careful attention
+must be given to privileges needed to change the "user.overlay.redirect" xattr
+to prevent misuse.
+
Since kernel version v6.8, "data-only" lower layers can also be added using
the "datadir+" mount options and the fsconfig syscall from new mount api.
For example::
diff --git a/MAINTAINERS b/MAINTAINERS
index 71dcd9f2441e..5defb941c141 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10966,7 +10966,7 @@ F: drivers/crypto/hisilicon/sec2/sec_crypto.h
F: drivers/crypto/hisilicon/sec2/sec_main.c
HISILICON SPI Controller DRIVER FOR KUNPENG SOCS
-M: Jay Fang <f.fangjian@huawei.com>
+M: Yang Shen <shenyang39@huawei.com>
L: linux-spi@vger.kernel.org
S: Maintained
W: http://www.hisilicon.com
@@ -10992,7 +10992,7 @@ S: Maintained
F: drivers/crypto/hisilicon/trng/trng.c
HISILICON V3XX SPI NOR FLASH Controller Driver
-M: Jay Fang <f.fangjian@huawei.com>
+M: Yang Shen <shenyang39@huawei.com>
S: Maintained
W: http://www.hisilicon.com
F: drivers/spi/spi-hisi-sfc-v3xx.c
@@ -13270,7 +13270,7 @@ F: arch/powerpc/kvm/
KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
M: Anup Patel <anup@brainfault.org>
-R: Atish Patra <atishp@atishpatra.org>
+R: Atish Patra <atish.patra@linux.dev>
L: kvm@vger.kernel.org
L: kvm-riscv@lists.infradead.org
L: linux-riscv@lists.infradead.org
@@ -15818,6 +15818,7 @@ R: Rakie Kim <rakie.kim@sk.com>
R: Byungchul Park <byungchul@sk.com>
R: Gregory Price <gourry@gourry.net>
R: Ying Huang <ying.huang@linux.alibaba.com>
+R: Alistair Popple <apopple@nvidia.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
@@ -15889,6 +15890,25 @@ S: Maintained
F: include/linux/secretmem.h
F: mm/secretmem.c
+MEMORY MANAGEMENT - SWAP
+M: Andrew Morton <akpm@linux-foundation.org>
+R: Kemeng Shi <shikemeng@huaweicloud.com>
+R: Kairui Song <kasong@tencent.com>
+R: Nhat Pham <nphamcs@gmail.com>
+R: Baoquan He <bhe@redhat.com>
+R: Barry Song <baohua@kernel.org>
+R: Chris Li <chrisl@kernel.org>
+L: linux-mm@kvack.org
+S: Maintained
+F: include/linux/swap.h
+F: include/linux/swapfile.h
+F: include/linux/swapops.h
+F: mm/page_io.c
+F: mm/swap.c
+F: mm/swap.h
+F: mm/swap_state.c
+F: mm/swapfile.c
+
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
@@ -16727,6 +16747,7 @@ L: linux-mm@kvack.org
S: Maintained
F: arch/*/include/asm/tlb.h
F: include/asm-generic/tlb.h
+F: include/trace/events/tlb.h
F: mm/mmu_gather.c
MN88472 MEDIA DRIVER
@@ -21332,7 +21353,7 @@ F: arch/riscv/boot/dts/sifive/
F: arch/riscv/boot/dts/starfive/
RISC-V PMU DRIVERS
-M: Atish Patra <atishp@atishpatra.org>
+M: Atish Patra <atish.patra@linux.dev>
R: Anup Patel <anup@brainfault.org>
L: linux-riscv@lists.infradead.org
S: Supported
@@ -25013,10 +25034,8 @@ F: mm/shmem.c
TOMOYO SECURITY MODULE
M: Kentaro Takeda <takedakn@nttdata.co.jp>
M: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
-L: tomoyo-dev-en@lists.osdn.me (subscribers-only, for developers in English)
-L: tomoyo-users-en@lists.osdn.me (subscribers-only, for users in English)
-L: tomoyo-dev@lists.osdn.me (subscribers-only, for developers in Japanese)
-L: tomoyo-users@lists.osdn.me (subscribers-only, for users in Japanese)
+L: tomoyo-users_en@lists.sourceforge.net (subscribers-only, English language)
+L: tomoyo-users_ja@lists.sourceforge.net (subscribers-only, Japanese language)
S: Maintained
W: https://tomoyo.sourceforge.net/
F: security/tomoyo/
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index bbec87b79309..36061f4732b7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -70,6 +70,7 @@ config RISCV
# LLD >= 14: https://github.com/llvm/llvm-project/issues/50505
select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000
select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
+ select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_SUPPORTS_RT
@@ -99,6 +100,7 @@ config RISCV
select EDAC_SUPPORT
select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
+ select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS
select GENERIC_ARCH_TOPOLOGY
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -143,6 +145,7 @@ config RISCV
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if 64BIT && MMU
select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
@@ -150,13 +153,15 @@ config RISCV
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
- select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+ select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_FUNCTION_GRAPH_FREGS
- select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_EBPF_JIT if MMU
select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -218,6 +223,7 @@ config RISCV
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select UACCESS_MEMCPY if !MMU
+ select VDSO_GETRANDOM if HAVE_GENERIC_VDSO
select USER_STACKTRACE_SUPPORT
select ZONE_DMA32 if 64BIT
@@ -236,6 +242,7 @@ config CLANG_SUPPORTS_DYNAMIC_FTRACE
config GCC_SUPPORTS_DYNAMIC_FTRACE
def_bool CC_IS_GCC
depends on $(cc-option,-fpatchable-function-entry=8)
+ depends on CC_HAS_MIN_FUNCTION_ALIGNMENT || !RISCV_ISA_C
config HAVE_SHADOW_CALL_STACK
def_bool $(cc-option,-fsanitize=shadow-call-stack)
@@ -664,12 +671,12 @@ config RISCV_ISA_V_PREEMPTIVE
default y
help
Usually, in-kernel SIMD routines are run with preemption disabled.
- Functions which envoke long running SIMD thus must yield core's
+ Functions which invoke long running SIMD thus must yield the core's
vector unit to prevent blocking other tasks for too long.
- This config allows kernel to run SIMD without explicitly disable
- preemption. Enabling this config will result in higher memory
- consumption due to the allocation of per-task's kernel Vector context.
+ This config allows the kernel to run SIMD without explicitly disabling
+ preemption. Enabling this config will result in higher memory consumption
+ due to the allocation of per-task's kernel Vector context.
config RISCV_ISA_ZAWRS
bool "Zawrs extension support for more efficient busy waiting"
@@ -842,6 +849,21 @@ config RISCV_ISA_ZICBOZ
If you don't know what to do here, say Y.
+config RISCV_ISA_ZICBOP
+ bool "Zicbop extension support for cache block prefetch"
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the ZICBOP
+ extension (Cache Block Prefetch Operations) and enable its
+ usage.
+
+ The Zicbop extension can be used to prefetch cache blocks for
+ read/write fetch.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
def_bool y
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
@@ -1171,8 +1193,8 @@ config CMDLINE_FALLBACK
config CMDLINE_EXTEND
bool "Extend bootloader kernel arguments"
help
- The command-line arguments provided during boot will be
- appended to the built-in command line. This is useful in
+ The built-in command line will be appended to the command-
+ line arguments provided during boot. This is useful in
cases where the provided arguments are insufficient and
you don't want to or cannot modify them.
diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
index b096548fe0ff..e14f26368963 100644
--- a/arch/riscv/Kconfig.vendor
+++ b/arch/riscv/Kconfig.vendor
@@ -16,6 +16,19 @@ config RISCV_ISA_VENDOR_EXT_ANDES
If you don't know what to do here, say Y.
endmenu
+menu "SiFive"
+config RISCV_ISA_VENDOR_EXT_SIFIVE
+ bool "SiFive vendor extension support"
+ select RISCV_ISA_VENDOR_EXT
+ default y
+ help
+ Say N here if you want to disable all SiFive vendor extension
+ support. This will cause any SiFive vendor extensions that are
+ requested by hardware probing to be ignored.
+
+ If you don't know what to do here, say Y.
+endmenu
+
menu "T-Head"
config RISCV_ISA_VENDOR_EXT_THEAD
bool "T-Head vendor extension support"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 539d2aef5cab..df57654a615e 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -15,9 +15,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux += --no-relax
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
ifeq ($(CONFIG_RISCV_ISA_C),y)
- CC_FLAGS_FTRACE := -fpatchable-function-entry=4
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8,4
else
- CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2
endif
endif
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index eea825ee58e1..fe8bd8afb418 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -18,12 +18,9 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_BPF=y
-CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_ARCH_MICROCHIP=y
CONFIG_ARCH_SIFIVE=y
@@ -182,6 +179,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_REGULATOR_AXP20X=y
CONFIG_REGULATOR_GPIO=y
CONFIG_MEDIA_SUPPORT=m
+CONFIG_MEDIA_PLATFORM_SUPPORT=y
CONFIG_VIDEO_CADENCE_CSI2RX=m
CONFIG_DRM=m
CONFIG_DRM_RADEON=m
@@ -297,25 +295,7 @@ CONFIG_DEFAULT_SECURITY_DAC=y
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_SCHED_STACK_END_CHECK=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_PGFLAGS=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_PER_CPU_MAPS=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_WQ_WATCHDOG=y
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_RWSEMS=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PLIST=y
-CONFIG_DEBUG_SG=y
-# CONFIG_RCU_TRACE is not set
-CONFIG_RCU_EQS_DEBUG=y
-# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index bfc8ea5f9319..a9988bf21ec8 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -12,7 +12,7 @@ long long __ashlti3(long long a, int b);
#ifdef CONFIG_RISCV_ISA_V
#ifdef CONFIG_MMU
-asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, bool enable_sum);
#endif /* CONFIG_MMU */
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index e1d9bf1deca6..b8c5726d86ac 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -14,11 +14,6 @@
#include <asm/cmpxchg.h>
#include <asm/fence.h>
-#define nop() __asm__ __volatile__ ("nop")
-#define __nops(n) ".rept " #n "\nnop\n.endr\n"
-#define nops(n) __asm__ __volatile__ (__nops(n))
-
-
/* These barriers need to enforce ordering on both devices or memory. */
#define __mb() RISCV_FENCE(iorw, iorw)
#define __rmb() RISCV_FENCE(ir, ir)
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index b59ffeb668d6..6086b38d5427 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -85,6 +85,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
extern unsigned int riscv_cbom_block_size;
extern unsigned int riscv_cboz_block_size;
+extern unsigned int riscv_cbop_block_size;
void riscv_init_cbo_blocksizes(void);
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 2ec119eb147b..0b749e710216 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -13,6 +13,7 @@
#include <asm/hwcap.h>
#include <asm/insn-def.h>
#include <asm/cpufeature-macros.h>
+#include <asm/processor.h>
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
swap_append, r, p, n) \
@@ -37,6 +38,7 @@
\
__asm__ __volatile__ ( \
prepend \
+ PREFETCHW_ASM(%5) \
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
@@ -44,7 +46,7 @@
" bnez %1, 0b\n" \
sc_append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" (__newx), "rJ" (~__mask) \
+ : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index f56b409361fb..fbd0e4306c93 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -67,11 +67,11 @@ void __init riscv_user_isa_enable(void);
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
bool __init check_unaligned_access_emulated_all_cpus(void);
+void unaligned_access_init(void);
+int cpu_online_unaligned_access_init(unsigned int cpu);
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
-void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void);
-DECLARE_PER_CPU(long, misaligned_access_speed);
#else
static inline bool unaligned_ctl_available(void)
{
@@ -79,6 +79,16 @@ static inline bool unaligned_ctl_available(void)
}
#endif
+#if defined(CONFIG_RISCV_MISALIGNED)
+DECLARE_PER_CPU(long, misaligned_access_speed);
+bool misaligned_traps_can_delegate(void);
+#else
+static inline bool misaligned_traps_can_delegate(void)
+{
+ return false;
+}
+#endif
+
bool __init check_vector_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index d627f63ee289..22ebea3c2b26 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -20,10 +20,9 @@ extern void *return_address(unsigned int level);
#define ftrace_return_address(n) return_address(n)
void _mcount(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
- return addr;
-}
+unsigned long ftrace_call_adjust(unsigned long addr);
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip);
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
/*
* Let's do like x86/arm64 and ignore the compat syscalls.
@@ -57,12 +56,21 @@ struct dyn_arch_ftrace {
* 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
* return address (original pc + 4)
*
+ * The first 2 instructions for each tracable function is compiled to 2 nop
+ * instructions. Then, the kernel initializes the first instruction to auipc at
+ * boot time (<ftrace disable>). The second instruction is patched to jalr to
+ * start the trace.
+ *
+ *<Image>:
+ * 0: nop
+ * 4: nop
+ *
*<ftrace enable>:
- * 0: auipc t0/ra, 0x?
- * 4: jalr t0/ra, ?(t0/ra)
+ * 0: auipc t0, 0x?
+ * 4: jalr t0, ?(t0)
*
*<ftrace disable>:
- * 0: nop
+ * 0: auipc t0, 0x?
* 4: nop
*
* Dynamic ftrace generates probes to call sites, so we must deal with
@@ -75,10 +83,9 @@ struct dyn_arch_ftrace {
#define AUIPC_OFFSET_MASK (0xfffff000)
#define AUIPC_PAD (0x00001000)
#define JALR_SHIFT 20
-#define JALR_RA (0x000080e7)
-#define AUIPC_RA (0x00000097)
#define JALR_T0 (0x000282e7)
#define AUIPC_T0 (0x00000297)
+#define JALR_RANGE (JALR_SIGN_MASK - 1)
#define to_jalr_t0(offset) \
(((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
@@ -96,26 +103,14 @@ do { \
call[1] = to_jalr_t0(offset); \
} while (0)
-#define to_jalr_ra(offset) \
- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
-
-#define to_auipc_ra(offset) \
- ((offset & JALR_SIGN_MASK) ? \
- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \
- ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
-
-#define make_call_ra(caller, callee, call) \
-do { \
- unsigned int offset = \
- (unsigned long) (callee) - (unsigned long) (caller); \
- call[0] = to_auipc_ra(offset); \
- call[1] = to_jalr_ra(offset); \
-} while (0)
-
/*
- * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+ * Only the jalr insn in the auipc+jalr is patched, so we make it 4
+ * bytes here.
*/
-#define MCOUNT_INSN_SIZE 8
+#define MCOUNT_INSN_SIZE 4
+#define MCOUNT_AUIPC_SIZE 4
+#define MCOUNT_JALR_SIZE 4
+#define MCOUNT_NOP4_SIZE 4
#ifndef __ASSEMBLY__
struct dyn_ftrace;
@@ -135,6 +130,9 @@ struct __arch_ftrace_regs {
unsigned long sp;
unsigned long s0;
unsigned long t1;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ unsigned long direct_tramp;
+#endif
union {
unsigned long args[8];
struct {
@@ -146,6 +144,13 @@ struct __arch_ftrace_regs {
unsigned long a5;
unsigned long a6;
unsigned long a7;
+#ifdef CONFIG_CC_IS_CLANG
+ unsigned long t2;
+ unsigned long t3;
+ unsigned long t4;
+ unsigned long t5;
+ unsigned long t6;
+#endif
};
};
};
@@ -221,10 +226,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
#define ftrace_graph_func ftrace_graph_func
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
{
arch_ftrace_regs(fregs)->t1 = addr;
}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e3cbf203cdde..affd63e11b0a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -105,6 +105,7 @@
#define RISCV_ISA_EXT_ZVFBFWMA 96
#define RISCV_ISA_EXT_ZAAMO 97
#define RISCV_ISA_EXT_ZALRSC 98
+#define RISCV_ISA_EXT_ZICBOP 99
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 1f690fea0e03..7fe0a379474a 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 12
+#define RISCV_HWPROBE_MAX_KEY 13
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
@@ -22,6 +22,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key)
case RISCV_HWPROBE_KEY_IMA_EXT_0:
case RISCV_HWPROBE_KEY_CPUPERF_0:
case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
+ case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0:
return true;
}
diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h
index e0b319af3681..8927a6ea1127 100644
--- a/arch/riscv/include/asm/image.h
+++ b/arch/riscv/include/asm/image.h
@@ -30,6 +30,8 @@
RISCV_HEADER_VERSION_MINOR)
#ifndef __ASSEMBLY__
+#define riscv_image_flag_field(flags, field)\
+ (((flags) >> field##_SHIFT) & field##_MASK)
/**
* struct riscv_image_header - riscv kernel image header
* @code0: Executable code
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index 71060a2f838e..d5adbaec1d01 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -18,6 +18,13 @@
#define INSN_I_RD_SHIFT 7
#define INSN_I_OPCODE_SHIFT 0
+#define INSN_S_SIMM7_SHIFT 25
+#define INSN_S_RS2_SHIFT 20
+#define INSN_S_RS1_SHIFT 15
+#define INSN_S_FUNC3_SHIFT 12
+#define INSN_S_SIMM5_SHIFT 7
+#define INSN_S_OPCODE_SHIFT 0
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_AS_HAS_INSN
@@ -30,6 +37,10 @@
.insn i \opcode, \func3, \rd, \rs1, \simm12
.endm
+ .macro insn_s, opcode, func3, rs2, simm12, rs1
+ .insn s \opcode, \func3, \rs2, \simm12(\rs1)
+ .endm
+
#else
#include <asm/gpr-num.h>
@@ -51,10 +62,20 @@
(\simm12 << INSN_I_SIMM12_SHIFT))
.endm
+ .macro insn_s, opcode, func3, rs2, simm12, rs1
+ .4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \
+ (\func3 << INSN_S_FUNC3_SHIFT) | \
+ (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \
+ (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \
+ ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \
+ (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
+ .endm
+
#endif
#define __INSN_R(...) insn_r __VA_ARGS__
#define __INSN_I(...) insn_i __VA_ARGS__
+#define __INSN_S(...) insn_s __VA_ARGS__
#else /* ! __ASSEMBLY__ */
@@ -66,6 +87,9 @@
#define __INSN_I(opcode, func3, rd, rs1, simm12) \
".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
+#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
+ ".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
+
#else
#include <linux/stringify.h>
@@ -92,12 +116,26 @@
" (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
" .endm\n"
+#define DEFINE_INSN_S \
+ __DEFINE_ASM_GPR_NUMS \
+" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \
+" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \
+" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \
+" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
+" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
+" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
+" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
+" .endm\n"
+
#define UNDEFINE_INSN_R \
" .purgem insn_r\n"
#define UNDEFINE_INSN_I \
" .purgem insn_i\n"
+#define UNDEFINE_INSN_S \
+" .purgem insn_s\n"
+
#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \
DEFINE_INSN_R \
"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
@@ -108,6 +146,11 @@
"insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
UNDEFINE_INSN_I
+#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
+ DEFINE_INSN_S \
+ "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \
+ UNDEFINE_INSN_S
+
#endif
#endif /* ! __ASSEMBLY__ */
@@ -120,6 +163,10 @@
__INSN_I(RV_##opcode, RV_##func3, RV_##rd, \
RV_##rs1, RV_##simm12)
+#define INSN_S(opcode, func3, rs2, simm12, rs1) \
+ __INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \
+ RV_##simm12, RV_##rs1)
+
#define RV_OPCODE(v) __ASM_STR(v)
#define RV_FUNC3(v) __ASM_STR(v)
#define RV_FUNC7(v) __ASM_STR(v)
@@ -133,6 +180,7 @@
#define RV___RS2(v) __RV_REG(v)
#define RV_OPCODE_MISC_MEM RV_OPCODE(15)
+#define RV_OPCODE_OP_IMM RV_OPCODE(19)
#define RV_OPCODE_SYSTEM RV_OPCODE(115)
#define HFENCE_VVMA(vaddr, asid) \
@@ -196,6 +244,18 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(4))
+#define PREFETCH_I(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
+#define PREFETCH_R(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
+#define PREFETCH_W(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
#define RISCV_PAUSE ".4byte 0x100000f"
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
@@ -203,4 +263,10 @@
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
+#ifndef __ASSEMBLY__
+#define nop() __asm__ __volatile__ ("nop")
+#define __nops(n) ".rept " #n "\nnop\n.endr\n"
+#define nops(n) __asm__ __volatile__ (__nops(n))
+#endif
+
#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
index 2b56769cb530..b9ee8346cc8c 100644
--- a/arch/riscv/include/asm/kexec.h
+++ b/arch/riscv/include/asm/kexec.h
@@ -56,6 +56,7 @@ extern riscv_kexec_method riscv_kexec_norelocate;
#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops elf_kexec_ops;
+extern const struct kexec_file_ops image_kexec_ops;
struct purgatory_info;
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
@@ -67,6 +68,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
struct kimage;
int arch_kimage_file_post_load_cleanup(struct kimage *image);
#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+
+int load_extra_segments(struct kimage *image, unsigned long kernel_start,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len);
#endif
#endif
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 188fadc1c21f..7de05db7d3bd 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -184,7 +184,7 @@ static inline int pud_none(pud_t pud)
static inline int pud_bad(pud_t pud)
{
- return !pud_present(pud);
+ return !pud_present(pud) || (pud_val(pud) & _PAGE_LEAF);
}
#define pud_leaf pud_leaf
@@ -399,6 +399,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pte_devmap(pte_t pte);
static inline pte_t pmd_pte(pmd_t pmd);
+static inline pte_t pud_pte(pud_t pud);
static inline int pmd_devmap(pmd_t pmd)
{
@@ -407,7 +408,7 @@ static inline int pmd_devmap(pmd_t pmd)
static inline int pud_devmap(pud_t pud)
{
- return 0;
+ return pte_devmap(pud_pte(pud));
}
static inline int pgd_devmap(pgd_t pgd)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index f19240fd018e..a11816bbf9e7 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -900,6 +900,103 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define pmdp_collapse_flush pmdp_collapse_flush
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+ return pte_pud(pte_wrprotect(pud_pte(pud)));
+}
+
+static inline int pud_trans_huge(pud_t pud)
+{
+ return pud_leaf(pud);
+}
+
+static inline int pud_dirty(pud_t pud)
+{
+ return pte_dirty(pud_pte(pud));
+}
+
+static inline pud_t pud_mkyoung(pud_t pud)
+{
+ return pte_pud(pte_mkyoung(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkold(pud_t pud)
+{
+ return pte_pud(pte_mkold(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+ return pte_pud(pte_mkdirty(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+ return pte_pud(pte_mkclean(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+ return pte_pud(pte_mkwrite_novma(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ return pud;
+}
+
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+ return pte_pud(pte_mkdevmap(pud_pte(pud)));
+}
+
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty)
+{
+ return ptep_set_access_flags(vma, address, (pte_t *)pudp, pud_pte(entry), dirty);
+}
+
+static inline int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ return ptep_test_and_clear_young(vma, address, (pte_t *)pudp);
+}
+
+static inline int pud_young(pud_t pud)
+{
+ return pte_young(pud_pte(pud));
+}
+
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ pte_t *ptep = (pte_t *)pudp;
+
+ update_mmu_cache(vma, address, ptep);
+}
+
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ return __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud)));
+}
+
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return __pud(pud_val(pud) & ~(_PAGE_PRESENT | _PAGE_PROT_NONE));
+}
+
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ return pte_pud(pte_modify(pud_pte(pud), newprot));
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 5f56eb9d114a..24d3af4d3807 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -13,6 +13,9 @@
#include <vdso/processor.h>
#include <asm/ptrace.h>
+#include <asm/insn-def.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
#define arch_get_mmap_end(addr, len, flags) \
({ \
@@ -52,7 +55,6 @@
#endif
#ifndef __ASSEMBLY__
-#include <linux/cpumask.h>
struct task_struct;
struct pt_regs;
@@ -79,6 +81,10 @@ struct pt_regs;
* Thus, the task does not own preempt_v. Any use of Vector will have to
* save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
* Vector.
+ * - bit 29: The thread voluntarily calls schedule() while holding an active
+ * preempt_v. All preempt_v context should be dropped in such case because
+ * V-regs are caller-saved. Only sstatus.VS=ON is persisted across a
+ * schedule() call.
* - bit 30: The in-kernel preempt_v context is saved, and requries to be
* restored when returning to the context that owns the preempt_v.
* - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
@@ -93,6 +99,7 @@ struct pt_regs;
#define RISCV_PREEMPT_V 0x00000100
#define RISCV_PREEMPT_V_DIRTY 0x80000000
#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
+#define RISCV_PREEMPT_V_IN_SCHEDULE 0x20000000
/* CPU-specific state of a task */
struct thread_struct {
@@ -103,6 +110,7 @@ struct thread_struct {
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
unsigned long envcfg;
+ unsigned long sum;
u32 riscv_v_flags;
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
@@ -136,6 +144,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
+#define PREFETCH_ASM(x) \
+ ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \
+ RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+#define PREFETCHW_ASM(x) \
+ ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \
+ RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+#ifdef CONFIG_RISCV_ISA_ZICBOP
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x)
+{
+ __asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory");
+}
+
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x)
+{
+ __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
+}
+#endif /* CONFIG_RISCV_ISA_ZICBOP */
/* Do necessary setup to start up a newly executed thread. */
extern void start_thread(struct pt_regs *regs,
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 2910231977cb..a7dc0e330757 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -175,7 +175,7 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
return 0;
}
-static inline int regs_irqs_disabled(struct pt_regs *regs)
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
{
return !(regs->status & SR_PIE);
}
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 3d250824178b..341e74238aa0 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -35,6 +35,7 @@ enum sbi_ext_id {
SBI_EXT_DBCN = 0x4442434E,
SBI_EXT_STA = 0x535441,
SBI_EXT_NACL = 0x4E41434C,
+ SBI_EXT_FWFT = 0x46574654,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -402,6 +403,33 @@ enum sbi_ext_nacl_feature {
#define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i))
#define SBI_NACL_SHMEM_SRET_X_LAST 31
+/* SBI function IDs for FW feature extension */
+#define SBI_EXT_FWFT_SET 0x0
+#define SBI_EXT_FWFT_GET 0x1
+
+enum sbi_fwft_feature_t {
+ SBI_FWFT_MISALIGNED_EXC_DELEG = 0x0,
+ SBI_FWFT_LANDING_PAD = 0x1,
+ SBI_FWFT_SHADOW_STACK = 0x2,
+ SBI_FWFT_DOUBLE_TRAP = 0x3,
+ SBI_FWFT_PTE_AD_HW_UPDATING = 0x4,
+ SBI_FWFT_POINTER_MASKING_PMLEN = 0x5,
+ SBI_FWFT_LOCAL_RESERVED_START = 0x6,
+ SBI_FWFT_LOCAL_RESERVED_END = 0x3fffffff,
+ SBI_FWFT_LOCAL_PLATFORM_START = 0x40000000,
+ SBI_FWFT_LOCAL_PLATFORM_END = 0x7fffffff,
+
+ SBI_FWFT_GLOBAL_RESERVED_START = 0x80000000,
+ SBI_FWFT_GLOBAL_RESERVED_END = 0xbfffffff,
+ SBI_FWFT_GLOBAL_PLATFORM_START = 0xc0000000,
+ SBI_FWFT_GLOBAL_PLATFORM_END = 0xffffffff,
+};
+
+#define SBI_FWFT_PLATFORM_FEATURE_BIT BIT(30)
+#define SBI_FWFT_GLOBAL_FEATURE_BIT BIT(31)
+
+#define SBI_FWFT_SET_FLAG_LOCK BIT(0)
+
/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
@@ -419,6 +447,11 @@ enum sbi_ext_nacl_feature {
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
#define SBI_ERR_NO_SHMEM -9
+#define SBI_ERR_INVALID_STATE -10
+#define SBI_ERR_BAD_RANGE -11
+#define SBI_ERR_TIMEOUT -12
+#define SBI_ERR_IO -13
+#define SBI_ERR_DENIED_LOCKED -14
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -470,6 +503,23 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long asid);
long sbi_probe_extension(int ext);
+int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags);
+int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature,
+ unsigned long value, unsigned long flags);
+/**
+ * sbi_fwft_set_online_cpus() - Set a feature on all online cpus
+ * @feature: The feature to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+static inline int sbi_fwft_set_online_cpus(u32 feature, unsigned long value,
+ unsigned long flags)
+{
+ return sbi_fwft_set_cpumask(cpu_online_mask, feature, value, flags);
+}
+
/* Check if current SBI specification version is 0.1 or not */
static inline int sbi_spec_is_0_1(void)
{
@@ -503,11 +553,21 @@ static inline int sbi_err_map_linux_errno(int err)
case SBI_SUCCESS:
return 0;
case SBI_ERR_DENIED:
+ case SBI_ERR_DENIED_LOCKED:
return -EPERM;
case SBI_ERR_INVALID_PARAM:
+ case SBI_ERR_INVALID_STATE:
return -EINVAL;
+ case SBI_ERR_BAD_RANGE:
+ return -ERANGE;
case SBI_ERR_INVALID_ADDRESS:
return -EFAULT;
+ case SBI_ERR_NO_SHMEM:
+ return -ENOMEM;
+ case SBI_ERR_TIMEOUT:
+ return -ETIMEDOUT;
+ case SBI_ERR_IO:
+ return -EIO;
case SBI_ERR_NOT_SUPPORTED:
case SBI_ERR_FAILURE:
default:
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index ce0dd0fed764..1a20dd746a49 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -56,6 +56,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
+void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
#endif
bool arch_tlbbatch_should_defer(struct mm_struct *mm);
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index fee56b0c8058..d472da4450e6 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -62,6 +62,19 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne
__asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory")
/*
+ * This is the smallest unsigned integer type that can fit a value
+ * (up to 'long long')
+ */
+#define __inttype(x) __typeof__( \
+ __typefits(x, char, \
+ __typefits(x, short, \
+ __typefits(x, int, \
+ __typefits(x, long, 0ULL)))))
+
+#define __typefits(x, type, not) \
+ __builtin_choose_expr(sizeof(x) <= sizeof(type), (unsigned type)0, not)
+
+/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
* the address at which the program should continue. No registers are
@@ -83,27 +96,58 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne
* call.
*/
-#define __get_user_asm(insn, x, ptr, err) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_user_asm(insn, x, ptr, label) \
+ asm_goto_output( \
+ "1:\n" \
+ " " insn " %0, %1\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, %l2, %0) \
+ : "=&r" (x) \
+ : "m" (*(ptr)) : : label)
+#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+#define __get_user_asm(insn, x, ptr, label) \
do { \
- __typeof__(x) __x; \
+ long __gua_err = 0; \
__asm__ __volatile__ ( \
"1:\n" \
" " insn " %1, %2\n" \
"2:\n" \
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \
- : "+r" (err), "=&r" (__x) \
+ : "+r" (__gua_err), "=&r" (x) \
: "m" (*(ptr))); \
- (x) = __x; \
+ if (__gua_err) \
+ goto label; \
} while (0)
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
#ifdef CONFIG_64BIT
-#define __get_user_8(x, ptr, err) \
- __get_user_asm("ld", x, ptr, err)
+#define __get_user_8(x, ptr, label) \
+ __get_user_asm("ld", x, ptr, label)
#else /* !CONFIG_64BIT */
-#define __get_user_8(x, ptr, err) \
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_user_8(x, ptr, label) \
+ u32 __user *__ptr = (u32 __user *)(ptr); \
+ u32 __lo, __hi; \
+ asm_goto_output( \
+ "1:\n" \
+ " lw %0, %2\n" \
+ "2:\n" \
+ " lw %1, %3\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, %l4, %0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, %l4, %0) \
+ : "=&r" (__lo), "=r" (__hi) \
+ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]) \
+ : : label); \
+ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \
+ (((u64)__hi << 32) | __lo))); \
+
+#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+#define __get_user_8(x, ptr, label) \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u32 __lo, __hi; \
+ long __gu8_err = 0; \
__asm__ __volatile__ ( \
"1:\n" \
" lw %1, %3\n" \
@@ -112,35 +156,62 @@ do { \
"3:\n" \
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \
- : "+r" (err), "=&r" (__lo), "=r" (__hi) \
+ : "+r" (__gu8_err), "=&r" (__lo), "=r" (__hi) \
: "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \
- if (err) \
+ if (__gu8_err) { \
__hi = 0; \
- (x) = (__typeof__(x))((__typeof__((x)-(x)))( \
+ goto label; \
+ } \
+ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \
(((u64)__hi << 32) | __lo))); \
} while (0)
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
#endif /* CONFIG_64BIT */
-#define __get_user_nocheck(x, __gu_ptr, __gu_err) \
+unsigned long __must_check __asm_copy_to_user_sum_enabled(void __user *to,
+ const void *from, unsigned long n);
+unsigned long __must_check __asm_copy_from_user_sum_enabled(void *to,
+ const void __user *from, unsigned long n);
+
+#define __get_user_nocheck(x, __gu_ptr, label) \
do { \
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
+ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \
+ if (__asm_copy_from_user_sum_enabled(&(x), __gu_ptr, sizeof(*__gu_ptr))) \
+ goto label; \
+ break; \
+ } \
switch (sizeof(*__gu_ptr)) { \
case 1: \
- __get_user_asm("lb", (x), __gu_ptr, __gu_err); \
+ __get_user_asm("lb", (x), __gu_ptr, label); \
break; \
case 2: \
- __get_user_asm("lh", (x), __gu_ptr, __gu_err); \
+ __get_user_asm("lh", (x), __gu_ptr, label); \
break; \
case 4: \
- __get_user_asm("lw", (x), __gu_ptr, __gu_err); \
+ __get_user_asm("lw", (x), __gu_ptr, label); \
break; \
case 8: \
- __get_user_8((x), __gu_ptr, __gu_err); \
+ __get_user_8((x), __gu_ptr, label); \
break; \
default: \
BUILD_BUG(); \
} \
} while (0)
+#define __get_user_error(x, ptr, err) \
+do { \
+ __label__ __gu_failed; \
+ \
+ __get_user_nocheck(x, ptr, __gu_failed); \
+ err = 0; \
+ break; \
+__gu_failed: \
+ x = 0; \
+ err = -EFAULT; \
+} while (0)
+
/**
* __get_user: - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
@@ -165,13 +236,16 @@ do { \
({ \
const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \
long __gu_err = 0; \
+ __typeof__(x) __gu_val; \
\
__chk_user_ptr(__gu_ptr); \
\
__enable_user_access(); \
- __get_user_nocheck(x, __gu_ptr, __gu_err); \
+ __get_user_error(__gu_val, __gu_ptr, __gu_err); \
__disable_user_access(); \
\
+ (x) = __gu_val; \
+ \
__gu_err; \
})
@@ -201,61 +275,73 @@ do { \
((x) = (__force __typeof__(x))0, -EFAULT); \
})
-#define __put_user_asm(insn, x, ptr, err) \
+#define __put_user_asm(insn, x, ptr, label) \
do { \
__typeof__(*(ptr)) __x = x; \
- __asm__ __volatile__ ( \
+ asm goto( \
"1:\n" \
- " " insn " %z2, %1\n" \
- "2:\n" \
- _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \
- : "+r" (err), "=m" (*(ptr)) \
- : "rJ" (__x)); \
+ " " insn " %z0, %1\n" \
+ _ASM_EXTABLE(1b, %l2) \
+ : : "rJ" (__x), "m"(*(ptr)) : : label); \
} while (0)
#ifdef CONFIG_64BIT
-#define __put_user_8(x, ptr, err) \
- __put_user_asm("sd", x, ptr, err)
+#define __put_user_8(x, ptr, label) \
+ __put_user_asm("sd", x, ptr, label)
#else /* !CONFIG_64BIT */
-#define __put_user_8(x, ptr, err) \
+#define __put_user_8(x, ptr, label) \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u64 __x = (__typeof__((x)-(x)))(x); \
- __asm__ __volatile__ ( \
+ asm goto( \
"1:\n" \
- " sw %z3, %1\n" \
+ " sw %z0, %2\n" \
"2:\n" \
- " sw %z4, %2\n" \
- "3:\n" \
- _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \
- _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \
- : "+r" (err), \
- "=m" (__ptr[__LSW]), \
- "=m" (__ptr[__MSW]) \
- : "rJ" (__x), "rJ" (__x >> 32)); \
+ " sw %z1, %3\n" \
+ _ASM_EXTABLE(1b, %l4) \
+ _ASM_EXTABLE(2b, %l4) \
+ : : "rJ" (__x), "rJ" (__x >> 32), \
+ "m" (__ptr[__LSW]), \
+ "m" (__ptr[__MSW]) : : label); \
} while (0)
#endif /* CONFIG_64BIT */
-#define __put_user_nocheck(x, __gu_ptr, __pu_err) \
+#define __put_user_nocheck(x, __gu_ptr, label) \
do { \
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
+ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \
+ __inttype(x) val = (__inttype(x))x; \
+ if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(val), sizeof(*__gu_ptr))) \
+ goto label; \
+ break; \
+ } \
switch (sizeof(*__gu_ptr)) { \
case 1: \
- __put_user_asm("sb", (x), __gu_ptr, __pu_err); \
+ __put_user_asm("sb", (x), __gu_ptr, label); \
break; \
case 2: \
- __put_user_asm("sh", (x), __gu_ptr, __pu_err); \
+ __put_user_asm("sh", (x), __gu_ptr, label); \
break; \
case 4: \
- __put_user_asm("sw", (x), __gu_ptr, __pu_err); \
+ __put_user_asm("sw", (x), __gu_ptr, label); \
break; \
case 8: \
- __put_user_8((x), __gu_ptr, __pu_err); \
+ __put_user_8((x), __gu_ptr, label); \
break; \
default: \
BUILD_BUG(); \
} \
} while (0)
+#define __put_user_error(x, ptr, err) \
+do { \
+ __label__ err_label; \
+ __put_user_nocheck(x, ptr, err_label); \
+ break; \
+err_label: \
+ (err) = -EFAULT; \
+} while (0)
+
/**
* __put_user: - Write a simple value into user space, with less checking.
* @x: Value to copy to user space.
@@ -286,7 +372,7 @@ do { \
__chk_user_ptr(__gu_ptr); \
\
__enable_user_access(); \
- __put_user_nocheck(__val, __gu_ptr, __pu_err); \
+ __put_user_error(__val, __gu_ptr, __pu_err); \
__disable_user_access(); \
\
__pu_err; \
@@ -351,23 +437,45 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
}
#define __get_kernel_nofault(dst, src, type, err_label) \
-do { \
- long __kr_err = 0; \
- \
- __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
- if (unlikely(__kr_err)) \
- goto err_label; \
-} while (0)
+ __get_user_nocheck(*((type *)(dst)), (type *)(src), err_label)
#define __put_kernel_nofault(dst, src, type, err_label) \
-do { \
- long __kr_err = 0; \
- \
- __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
- if (unlikely(__kr_err)) \
- goto err_label; \
+ __put_user_nocheck(*((type *)(src)), (type *)(dst), err_label)
+
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr, len)))
+ return 0;
+ __enable_user_access();
+ return 1;
+}
+#define user_access_begin user_access_begin
+#define user_access_end __disable_user_access
+
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long enabled) { }
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_put_user(x, ptr, label) \
+ __put_user_nocheck(x, (ptr), label)
+
+#define unsafe_get_user(x, ptr, label) do { \
+ __inttype(*(ptr)) __gu_val; \
+ __get_user_nocheck(__gu_val, (ptr), label); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
+#define unsafe_copy_to_user(_dst, _src, _len, label) \
+ if (__asm_copy_to_user_sum_enabled(_dst, _src, _len)) \
+ goto label;
+
+#define unsafe_copy_from_user(_dst, _src, _len, label) \
+ if (__asm_copy_from_user_sum_enabled(_dst, _src, _len)) \
+ goto label;
+
#else /* CONFIG_MMU */
#include <asm-generic/uaccess.h>
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..8dc92441702a
--- /dev/null
+++ b/arch/riscv/include/asm/vdso/getrandom.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+
+static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
+{
+ register long ret asm("a0");
+ register long nr asm("a7") = __NR_getrandom;
+ register void *buffer asm("a0") = _buffer;
+ register size_t len asm("a1") = _len;
+ register unsigned int flags asm("a2") = _flags;
+
+ asm volatile ("ecall\n"
+ : "+r" (ret)
+ : "r" (nr), "r" (buffer), "r" (len), "r" (flags)
+ : "memory");
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index e8a83f55be2b..45c9b426fcc5 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -120,6 +120,11 @@ static __always_inline void riscv_v_disable(void)
csr_clear(CSR_SSTATUS, SR_VS);
}
+static __always_inline bool riscv_v_is_on(void)
+{
+ return !!(csr_read(CSR_SSTATUS) & SR_VS);
+}
+
static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
{
asm volatile (
@@ -366,6 +371,11 @@ static inline void __switch_to_vector(struct task_struct *prev,
struct pt_regs *regs;
if (riscv_preempt_v_started(prev)) {
+ if (riscv_v_is_on()) {
+ WARN_ON(prev->thread.riscv_v_flags & RISCV_V_CTX_DEPTH_MASK);
+ riscv_v_disable();
+ prev->thread.riscv_v_flags |= RISCV_PREEMPT_V_IN_SCHEDULE;
+ }
if (riscv_preempt_v_dirty(prev)) {
__riscv_v_vstate_save(&prev->thread.kernel_vstate,
prev->thread.kernel_vstate.datap);
@@ -376,10 +386,16 @@ static inline void __switch_to_vector(struct task_struct *prev,
riscv_v_vstate_save(&prev->thread.vstate, regs);
}
- if (riscv_preempt_v_started(next))
- riscv_preempt_v_set_restore(next);
- else
+ if (riscv_preempt_v_started(next)) {
+ if (next->thread.riscv_v_flags & RISCV_PREEMPT_V_IN_SCHEDULE) {
+ next->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_IN_SCHEDULE;
+ riscv_v_enable();
+ } else {
+ riscv_preempt_v_set_restore(next);
+ }
+ } else {
riscv_v_vstate_set_restore(next, task_pt_regs(next));
+ }
}
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
diff --git a/arch/riscv/include/asm/vendor_extensions/sifive.h b/arch/riscv/include/asm/vendor_extensions/sifive.h
new file mode 100644
index 000000000000..ac00e500361c
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/sifive.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H
+
+#include <asm/vendor_extensions.h>
+
+#include <linux/types.h>
+
+#define RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD 0
+#define RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ 1
+#define RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF 2
+#define RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ 3
+
+extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive;
+
+#endif
diff --git a/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h
new file mode 100644
index 000000000000..90a61abd033c
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H
+
+#include <linux/cpumask.h>
+
+#include <uapi/asm/hwprobe.h>
+
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus);
+#else
+static inline void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ pair->value = 0;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 3c2fce939673..aaf6ad970499 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -81,6 +81,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55)
#define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56)
#define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57)
+#define RISCV_HWPROBE_EXT_ZABHA (1ULL << 58)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
@@ -104,6 +105,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4
#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11
#define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12
+#define RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0 13
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */
diff --git a/arch/riscv/include/uapi/asm/vendor/sifive.h b/arch/riscv/include/uapi/asm/vendor/sifive.h
new file mode 100644
index 000000000000..9f3278a4b298
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/vendor/sifive.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD (1 << 0)
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ (1 << 1)
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF (1 << 2)
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ (1 << 3)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f7480c9c6f8d..7ce2307738c2 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -107,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
-obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
+obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o kexec_image.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 16490755304e..6e8c0d6feae9 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -34,6 +34,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
+ OFFSET(TASK_THREAD_SUM, task_struct, thread.sum);
OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
@@ -346,6 +347,10 @@ void asm_offsets(void)
offsetof(struct task_struct, thread.s[11])
- offsetof(struct task_struct, thread.ra)
);
+ DEFINE(TASK_THREAD_SUM_RA,
+ offsetof(struct task_struct, thread.sum)
+ - offsetof(struct task_struct, thread.ra)
+ );
DEFINE(TASK_THREAD_F0_F0,
offsetof(struct task_struct, thread.fstate.f[0])
@@ -493,6 +498,12 @@ void asm_offsets(void)
DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
OFFSET(STACKFRAME_FP, stackframe, fp);
OFFSET(STACKFRAME_RA, stackframe, ra);
+#ifdef CONFIG_FUNCTION_TRACER
+ DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN));
@@ -501,6 +512,13 @@ void asm_offsets(void)
DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp));
DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0));
DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1));
+#ifdef CONFIG_CC_IS_CLANG
+ DEFINE(FREGS_T2, offsetof(struct __arch_ftrace_regs, t2));
+ DEFINE(FREGS_T3, offsetof(struct __arch_ftrace_regs, t3));
+ DEFINE(FREGS_T4, offsetof(struct __arch_ftrace_regs, t4));
+ DEFINE(FREGS_T5, offsetof(struct __arch_ftrace_regs, t5));
+ DEFINE(FREGS_T6, offsetof(struct __arch_ftrace_regs, t6));
+#endif
DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0));
DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1));
DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2));
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 2054f6c4b0ae..743d53415572 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -32,6 +32,7 @@
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
static bool any_cpu_has_zicboz;
+static bool any_cpu_has_zicbop;
static bool any_cpu_has_zicbom;
unsigned long elf_hwcap __read_mostly;
@@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
return 0;
}
+static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!riscv_cbop_block_size) {
+ pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
+ return -EINVAL;
+ }
+ if (!is_power_of_2(riscv_cbop_block_size)) {
+ pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
+ return -EINVAL;
+ }
+ any_cpu_has_zicbop = true;
+ return 0;
+}
+
static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
const unsigned long *isa_bitmap)
{
@@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
__RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
@@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void)
current->thread.envcfg |= ENVCFG_CBCFE;
else if (any_cpu_has_zicbom)
pr_warn("Zicbom disabled as it is unavailable on some harts\n");
+
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
+ any_cpu_has_zicbop)
+ pr_warn("Zicbop disabled as it is unavailable on some harts\n");
}
#ifdef CONFIG_RISCV_ALTERNATIVE
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
deleted file mode 100644
index e783a72d051f..000000000000
--- a/arch/riscv/kernel/elf_kexec.c
+++ /dev/null
@@ -1,485 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2021 Huawei Technologies Co, Ltd.
- *
- * Author: Liao Chang (liaochang1@huawei.com)
- *
- * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
- * for kernel.
- */
-
-#define pr_fmt(fmt) "kexec_image: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/libfdt.h>
-#include <linux/types.h>
-#include <linux/memblock.h>
-#include <linux/vmalloc.h>
-#include <asm/setup.h>
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
- kvfree(image->arch.fdt);
- image->arch.fdt = NULL;
-
- vfree(image->elf_headers);
- image->elf_headers = NULL;
- image->elf_headers_sz = 0;
-
- return kexec_image_post_load_cleanup_default(image);
-}
-
-static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
- struct kexec_elf_info *elf_info, unsigned long old_pbase,
- unsigned long new_pbase)
-{
- int i;
- int ret = 0;
- size_t size;
- struct kexec_buf kbuf;
- const struct elf_phdr *phdr;
-
- kbuf.image = image;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.buf_align = phdr->p_align;
- kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
- kbuf.memsz = phdr->p_memsz;
- kbuf.top_down = false;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-/*
- * Go through the available phsyical memory regions and find one that hold
- * an image of the specified size.
- */
-static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
- struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
- unsigned long *old_pbase, unsigned long *new_pbase)
-{
- int i;
- int ret;
- struct kexec_buf kbuf;
- const struct elf_phdr *phdr;
- unsigned long lowest_paddr = ULONG_MAX;
- unsigned long lowest_vaddr = ULONG_MAX;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- if (lowest_paddr > phdr->p_paddr)
- lowest_paddr = phdr->p_paddr;
-
- if (lowest_vaddr > phdr->p_vaddr)
- lowest_vaddr = phdr->p_vaddr;
- }
-
- kbuf.image = image;
- kbuf.buf_min = lowest_paddr;
- kbuf.buf_max = ULONG_MAX;
-
- /*
- * Current riscv boot protocol requires 2MB alignment for
- * RV64 and 4MB alignment for RV32
- *
- */
- kbuf.buf_align = PMD_SIZE;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
- kbuf.top_down = false;
- ret = arch_kexec_locate_mem_hole(&kbuf);
- if (!ret) {
- *old_pbase = lowest_paddr;
- *new_pbase = kbuf.mem;
- image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
- }
- return ret;
-}
-
-#ifdef CONFIG_CRASH_DUMP
-static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
-{
- unsigned int *nr_ranges = arg;
-
- (*nr_ranges)++;
- return 0;
-}
-
-static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
-{
- struct crash_mem *cmem = arg;
-
- cmem->ranges[cmem->nr_ranges].start = res->start;
- cmem->ranges[cmem->nr_ranges].end = res->end;
- cmem->nr_ranges++;
-
- return 0;
-}
-
-static int prepare_elf_headers(void **addr, unsigned long *sz)
-{
- struct crash_mem *cmem;
- unsigned int nr_ranges;
- int ret;
-
- nr_ranges = 1; /* For exclusion of crashkernel region */
- walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
-
- cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
- if (!cmem)
- return -ENOMEM;
-
- cmem->max_nr_ranges = nr_ranges;
- cmem->nr_ranges = 0;
- ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
- if (ret)
- goto out;
-
- /* Exclude crashkernel region */
- ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
- if (!ret)
- ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
-
-out:
- kfree(cmem);
- return ret;
-}
-
-static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
- unsigned long cmdline_len)
-{
- int elfcorehdr_strlen;
- char *cmdline_ptr;
-
- cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
- if (!cmdline_ptr)
- return NULL;
-
- elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
- image->elf_load_addr);
-
- if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
- pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
- kfree(cmdline_ptr);
- return NULL;
- }
-
- memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
- /* Ensure it's nul terminated */
- cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
- return cmdline_ptr;
-}
-#endif
-
-static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
- unsigned long kernel_len, char *initrd,
- unsigned long initrd_len, char *cmdline,
- unsigned long cmdline_len)
-{
- int ret;
- void *fdt;
- unsigned long old_kernel_pbase = ULONG_MAX;
- unsigned long new_kernel_pbase = 0UL;
- unsigned long initrd_pbase = 0UL;
- unsigned long kernel_start;
- struct elfhdr ehdr;
- struct kexec_buf kbuf;
- struct kexec_elf_info elf_info;
- char *modified_cmdline = NULL;
-
- ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
- if (ret)
- return ERR_PTR(ret);
-
- ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
- &old_kernel_pbase, &new_kernel_pbase);
- if (ret)
- goto out;
- kernel_start = image->start;
-
- /* Add the kernel binary to the image */
- ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
- old_kernel_pbase, new_kernel_pbase);
- if (ret)
- goto out;
-
- kbuf.image = image;
- kbuf.buf_min = new_kernel_pbase + kernel_len;
- kbuf.buf_max = ULONG_MAX;
-
-#ifdef CONFIG_CRASH_DUMP
- /* Add elfcorehdr */
- if (image->type == KEXEC_TYPE_CRASH) {
- void *headers;
- unsigned long headers_sz;
- ret = prepare_elf_headers(&headers, &headers_sz);
- if (ret) {
- pr_err("Preparing elf core header failed\n");
- goto out;
- }
-
- kbuf.buffer = headers;
- kbuf.bufsz = headers_sz;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.memsz = headers_sz;
- kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
- kbuf.top_down = true;
-
- ret = kexec_add_buffer(&kbuf);
- if (ret) {
- vfree(headers);
- goto out;
- }
- image->elf_headers = headers;
- image->elf_load_addr = kbuf.mem;
- image->elf_headers_sz = headers_sz;
-
- kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
-
- /* Setup cmdline for kdump kernel case */
- modified_cmdline = setup_kdump_cmdline(image, cmdline,
- cmdline_len);
- if (!modified_cmdline) {
- pr_err("Setting up cmdline for kdump kernel failed\n");
- ret = -EINVAL;
- goto out;
- }
- cmdline = modified_cmdline;
- }
-#endif
-
-#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
- /* Add purgatory to the image */
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_load_purgatory(image, &kbuf);
- if (ret) {
- pr_err("Error loading purgatory ret=%d\n", ret);
- goto out;
- }
- kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
-
- ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
- &kernel_start,
- sizeof(kernel_start), 0);
- if (ret)
- pr_err("Error update purgatory ret=%d\n", ret);
-#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
-
- /* Add the initrd to the image */
- if (initrd != NULL) {
- kbuf.buffer = initrd;
- kbuf.bufsz = kbuf.memsz = initrd_len;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- initrd_pbase = kbuf.mem;
- kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
- }
-
- /* Add the DTB to the image */
- fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
- initrd_len, cmdline, 0);
- if (!fdt) {
- pr_err("Error setting up the new device tree.\n");
- ret = -EINVAL;
- goto out;
- }
-
- fdt_pack(fdt);
- kbuf.buffer = fdt;
- kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
- kbuf.buf_align = PAGE_SIZE;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.top_down = true;
- ret = kexec_add_buffer(&kbuf);
- if (ret) {
- pr_err("Error add DTB kbuf ret=%d\n", ret);
- goto out_free_fdt;
- }
- /* Cache the fdt buffer address for memory cleanup */
- image->arch.fdt = fdt;
- kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
- goto out;
-
-out_free_fdt:
- kvfree(fdt);
-out:
- kfree(modified_cmdline);
- kexec_free_elf_info(&elf_info);
- return ret ? ERR_PTR(ret) : NULL;
-}
-
-#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
-#define RISCV_IMM_BITS 12
-#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
-#define RISCV_CONST_HIGH_PART(x) \
- (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
-#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
-
-#define ENCODE_ITYPE_IMM(x) \
- (RV_X(x, 0, 12) << 20)
-#define ENCODE_BTYPE_IMM(x) \
- ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
- (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
-#define ENCODE_UTYPE_IMM(x) \
- (RV_X(x, 12, 20) << 12)
-#define ENCODE_JTYPE_IMM(x) \
- ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
- (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
-#define ENCODE_CBTYPE_IMM(x) \
- ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
- (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
-#define ENCODE_CJTYPE_IMM(x) \
- ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
- (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
- (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
-#define ENCODE_UJTYPE_IMM(x) \
- (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
- (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
-#define ENCODE_UITYPE_IMM(x) \
- (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
-
-#define CLEAN_IMM(type, x) \
- ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
-
-int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
- Elf_Shdr *section,
- const Elf_Shdr *relsec,
- const Elf_Shdr *symtab)
-{
- const char *strtab, *name, *shstrtab;
- const Elf_Shdr *sechdrs;
- Elf64_Rela *relas;
- int i, r_type;
-
- /* String & section header string table */
- sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
- strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
- shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
-
- relas = (void *)pi->ehdr + relsec->sh_offset;
-
- for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
- const Elf_Sym *sym; /* symbol to relocate */
- unsigned long addr; /* final location after relocation */
- unsigned long val; /* relocated symbol value */
- unsigned long sec_base; /* relocated symbol value */
- void *loc; /* tmp location to modify */
-
- sym = (void *)pi->ehdr + symtab->sh_offset;
- sym += ELF64_R_SYM(relas[i].r_info);
-
- if (sym->st_name)
- name = strtab + sym->st_name;
- else
- name = shstrtab + sechdrs[sym->st_shndx].sh_name;
-
- loc = pi->purgatory_buf;
- loc += section->sh_offset;
- loc += relas[i].r_offset;
-
- if (sym->st_shndx == SHN_ABS)
- sec_base = 0;
- else if (sym->st_shndx >= pi->ehdr->e_shnum) {
- pr_err("Invalid section %d for symbol %s\n",
- sym->st_shndx, name);
- return -ENOEXEC;
- } else
- sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
-
- val = sym->st_value;
- val += sec_base;
- val += relas[i].r_addend;
-
- addr = section->sh_addr + relas[i].r_offset;
-
- r_type = ELF64_R_TYPE(relas[i].r_info);
-
- switch (r_type) {
- case R_RISCV_BRANCH:
- *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
- ENCODE_BTYPE_IMM(val - addr);
- break;
- case R_RISCV_JAL:
- *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
- ENCODE_JTYPE_IMM(val - addr);
- break;
- /*
- * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
- * sym is expected to be next to R_RISCV_PCREL_HI20
- * in purgatory relsec. Handle it like R_RISCV_CALL
- * sym, instead of searching the whole relsec.
- */
- case R_RISCV_PCREL_HI20:
- case R_RISCV_CALL_PLT:
- case R_RISCV_CALL:
- *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
- ENCODE_UJTYPE_IMM(val - addr);
- break;
- case R_RISCV_RVC_BRANCH:
- *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
- ENCODE_CBTYPE_IMM(val - addr);
- break;
- case R_RISCV_RVC_JUMP:
- *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
- ENCODE_CJTYPE_IMM(val - addr);
- break;
- case R_RISCV_ADD16:
- *(u16 *)loc += val;
- break;
- case R_RISCV_SUB16:
- *(u16 *)loc -= val;
- break;
- case R_RISCV_ADD32:
- *(u32 *)loc += val;
- break;
- case R_RISCV_SUB32:
- *(u32 *)loc -= val;
- break;
- /* It has been applied by R_RISCV_PCREL_HI20 sym */
- case R_RISCV_PCREL_LO12_I:
- case R_RISCV_ALIGN:
- case R_RISCV_RELAX:
- break;
- case R_RISCV_64:
- *(u64 *)loc = val;
- break;
- default:
- pr_err("Unknown rela relocation: %d\n", r_type);
- return -ENOEXEC;
- }
- }
- return 0;
-}
-
-const struct kexec_file_ops elf_kexec_ops = {
- .probe = kexec_elf_probe,
- .load = elf_kexec_load,
-};
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 0fb338000c6d..75656afa2d6b 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -401,9 +401,18 @@ SYM_FUNC_START(__switch_to)
REG_S s9, TASK_THREAD_S9_RA(a3)
REG_S s10, TASK_THREAD_S10_RA(a3)
REG_S s11, TASK_THREAD_S11_RA(a3)
+
+ /* save the user space access flag */
+ csrr s0, CSR_STATUS
+ REG_S s0, TASK_THREAD_SUM_RA(a3)
+
/* Save the kernel shadow call stack pointer */
scs_save_current
/* Restore context from next->thread */
+ REG_L s0, TASK_THREAD_SUM_RA(a4)
+ li s1, SR_SUM
+ and s0, s0, s1
+ csrs CSR_STATUS, s0
REG_L ra, TASK_THREAD_RA_RA(a4)
REG_L sp, TASK_THREAD_SP_RA(a4)
REG_L s0, TASK_THREAD_S0_RA(a4)
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 674dcdfae7a1..4c6c24380cfd 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -8,98 +8,129 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/memory.h>
+#include <linux/irqflags.h>
#include <linux/stop_machine.h>
#include <asm/cacheflush.h>
#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE
-void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+unsigned long ftrace_call_adjust(unsigned long addr)
{
- mutex_lock(&text_mutex);
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return addr + 8 + MCOUNT_AUIPC_SIZE;
- /*
- * The code sequences we use for ftrace can't be patched while the
- * kernel is running, so we need to use stop_machine() to modify them
- * for now. This doesn't play nice with text_mutex, we use this flag
- * to elide the check.
- */
- riscv_patch_in_stop_machine = true;
+ return addr + MCOUNT_AUIPC_SIZE;
+}
+
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ return fentry_ip - MCOUNT_AUIPC_SIZE;
}
-void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+void arch_ftrace_update_code(int command)
{
- riscv_patch_in_stop_machine = false;
+ mutex_lock(&text_mutex);
+ command |= FTRACE_MAY_SLEEP;
+ ftrace_modify_all_code(command);
mutex_unlock(&text_mutex);
+ flush_icache_all();
}
-static int ftrace_check_current_call(unsigned long hook_pos,
- unsigned int *expected)
+static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate)
{
+ unsigned int call[2], offset;
unsigned int replaced[2];
- unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
- /* we expect nops at the hook position */
- if (!expected)
- expected = nops;
+ offset = target - source;
+ call[1] = to_jalr_t0(offset);
- /*
- * Read the text we want to modify;
- * return must be -EFAULT on read error
- */
- if (copy_from_kernel_nofault(replaced, (void *)hook_pos,
- MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /*
- * Make sure it is what we expect it to be;
- * return must be -EINVAL on failed comparison
- */
- if (memcmp(expected, replaced, sizeof(replaced))) {
- pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n",
- (void *)hook_pos, expected[0], expected[1], replaced[0],
- replaced[1]);
- return -EINVAL;
+ if (validate) {
+ call[0] = to_auipc_t0(offset);
+ /*
+ * Read the text we want to modify;
+ * return must be -EFAULT on read error
+ */
+ if (copy_from_kernel_nofault(replaced, (void *)source, 2 * MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (replaced[0] != call[0]) {
+ pr_err("%p: expected (%08x) but got (%08x)\n",
+ (void *)source, call[0], replaced[0]);
+ return -EINVAL;
+ }
}
+ /* Replace the jalr at once. Return -EPERM on write error. */
+ if (patch_insn_write((void *)(source + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE))
+ return -EPERM;
+
return 0;
}
-static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
- bool enable, bool ra)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec)
{
- unsigned int call[2];
- unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
+ const struct ftrace_ops *ops = NULL;
- if (ra)
- make_call_ra(hook_pos, target, call);
- else
- make_call_t0(hook_pos, target, call);
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ WARN_ON_ONCE(!ops);
+ }
- /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
- if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
- return -EPERM;
+ if (!ops)
+ ops = &ftrace_list_ops;
- return 0;
+ return ops;
+}
+
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops)
+{
+ unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
+
+ return patch_text_nosync((void *)literal, &ops, sizeof(ops));
+}
+
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
+
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec));
}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int call[2];
+ unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE;
+ int ret;
- make_call_t0(rec->ip, addr, call);
+ ret = ftrace_rec_update_ops(rec);
+ if (ret)
+ return ret;
- if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE))
- return -EPERM;
+ orig_addr = (unsigned long)&ftrace_caller;
+ distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr;
+ if (distance > JALR_RANGE)
+ addr = FTRACE_ADDR;
- return 0;
+ return __ftrace_modify_call(pc, addr, false);
}
-int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
- unsigned long addr)
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
+ u32 nop4 = RISCV_INSN_NOP4;
+ int ret;
- if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
+
+ if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE))
return -EPERM;
return 0;
@@ -114,75 +145,71 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
*/
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
- int out;
+ unsigned long pc = rec->ip - MCOUNT_AUIPC_SIZE;
+ unsigned int nops[2], offset;
+ int ret;
- mutex_lock(&text_mutex);
- out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
- mutex_unlock(&text_mutex);
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
- return out;
-}
+ offset = (unsigned long) &ftrace_caller - pc;
+ nops[0] = to_auipc_t0(offset);
+ nops[1] = RISCV_INSN_NOP4;
-int ftrace_update_ftrace_func(ftrace_func_t func)
-{
- int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
- (unsigned long)func, true, true);
+ mutex_lock(&text_mutex);
+ ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE);
+ mutex_unlock(&text_mutex);
return ret;
}
-struct ftrace_modify_param {
- int command;
- atomic_t cpu_count;
-};
-
-static int __ftrace_modify_code(void *data)
+ftrace_func_t ftrace_call_dest = ftrace_stub;
+int ftrace_update_ftrace_func(ftrace_func_t func)
{
- struct ftrace_modify_param *param = data;
-
- if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
- ftrace_modify_all_code(param->command);
- /*
- * Make sure the patching store is effective *before* we
- * increment the counter which releases all waiting CPUs
- * by using the release variant of atomic increment. The
- * release pairs with the call to local_flush_icache_all()
- * on the waiting CPU.
- */
- atomic_inc_return_release(&param->cpu_count);
- } else {
- while (atomic_read(&param->cpu_count) <= num_online_cpus())
- cpu_relax();
-
- local_flush_icache_all();
- }
+ /*
+ * When using CALL_OPS, the function to call is associated with the
+ * call site, and we don't have a global function pointer to update.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return 0;
+ WRITE_ONCE(ftrace_call_dest, func);
+ /*
+ * The data fence ensure that the update to ftrace_call_dest happens
+ * before the write to function_trace_op later in the generic ftrace.
+ * If the sequence is not enforced, then an old ftrace_call_dest may
+ * race loading a new function_trace_op set in ftrace_modify_all_code
+ */
+ smp_wmb();
+ /*
+ * Updating ftrace dpes not take stop_machine path, so irqs should not
+ * be disabled.
+ */
+ WARN_ON(irqs_disabled());
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
return 0;
}
-void arch_ftrace_update_code(int command)
+#else /* CONFIG_DYNAMIC_FTRACE */
+unsigned long ftrace_call_adjust(unsigned long addr)
{
- struct ftrace_modify_param param = { command, ATOMIC_INIT(0) };
-
- stop_machine(__ftrace_modify_code, &param, cpu_online_mask);
+ return addr;
}
-#endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- unsigned int call[2];
- unsigned long caller = rec->ip;
+ unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE;
int ret;
- make_call_t0(caller, old_addr, call);
- ret = ftrace_check_current_call(caller, call);
-
+ ret = ftrace_rec_update_ops(rec);
if (ret)
return ret;
- return __ftrace_modify_call(caller, addr, true, false);
+ return __ftrace_modify_call(caller, FTRACE_ADDR, true);
}
#endif
@@ -210,7 +237,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
}
#ifdef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
@@ -231,19 +257,5 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs))
*parent = return_hooker;
}
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-extern void ftrace_graph_call(void);
-int ftrace_enable_ftrace_graph_caller(void)
-{
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, true, true);
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, false, true);
-}
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c
new file mode 100644
index 000000000000..f4755d49b89e
--- /dev/null
+++ b/arch/riscv/kernel/kexec_elf.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang (liaochang1@huawei.com)
+ *
+ * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
+ * for kernel.
+ */
+
+#define pr_fmt(fmt) "kexec_image: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <asm/setup.h>
+
+static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+ struct kexec_elf_info *elf_info, unsigned long old_pbase,
+ unsigned long new_pbase)
+{
+ int i;
+ int ret = 0;
+ size_t size;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+
+ kbuf.image = image;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Go through the available phsyical memory regions and find one that hold
+ * an image of the specified size.
+ */
+static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
+ struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
+ unsigned long *old_pbase, unsigned long *new_pbase)
+{
+ int i;
+ int ret;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+ unsigned long lowest_paddr = ULONG_MAX;
+ unsigned long lowest_vaddr = ULONG_MAX;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (lowest_paddr > phdr->p_paddr)
+ lowest_paddr = phdr->p_paddr;
+
+ if (lowest_vaddr > phdr->p_vaddr)
+ lowest_vaddr = phdr->p_vaddr;
+ }
+
+ kbuf.image = image;
+ kbuf.buf_min = lowest_paddr;
+ kbuf.buf_max = ULONG_MAX;
+
+ /*
+ * Current riscv boot protocol requires 2MB alignment for
+ * RV64 and 4MB alignment for RV32
+ *
+ */
+ kbuf.buf_align = PMD_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
+ kbuf.top_down = false;
+ ret = arch_kexec_locate_mem_hole(&kbuf);
+ if (!ret) {
+ *old_pbase = lowest_paddr;
+ *new_pbase = kbuf.mem;
+ image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
+ }
+ return ret;
+}
+
+static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long old_kernel_pbase = ULONG_MAX;
+ unsigned long new_kernel_pbase = 0UL;
+ struct elfhdr ehdr;
+ struct kexec_elf_info elf_info;
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
+ &old_kernel_pbase, &new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ /* Add the kernel binary to the image */
+ ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
+ old_kernel_pbase, new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ ret = load_extra_segments(image, image->start, kernel_len,
+ initrd, initrd_len, cmdline, cmdline_len);
+out:
+ kexec_free_elf_info(&elf_info);
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops elf_kexec_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf_kexec_load,
+};
diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c
new file mode 100644
index 000000000000..26a81774a78a
--- /dev/null
+++ b/arch/riscv/kernel/kexec_image.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V Kexec image loader
+ *
+ */
+
+#define pr_fmt(fmt) "kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/pe.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+#include <asm/image.h>
+
+static int image_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+ const struct riscv_image_header *h = (const struct riscv_image_header *)kernel_buf;
+
+ if (!h || kernel_len < sizeof(*h))
+ return -EINVAL;
+
+ /* According to Documentation/riscv/boot-image-header.rst,
+ * use "magic2" field to check when version >= 0.2.
+ */
+
+ if (h->version >= RISCV_HEADER_VERSION &&
+ memcmp(&h->magic2, RISCV_IMAGE_MAGIC2, sizeof(h->magic2)))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void *image_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ struct riscv_image_header *h;
+ u64 flags;
+ bool be_image, be_kernel;
+ struct kexec_buf kbuf;
+ int ret;
+
+ /* Check Image header */
+ h = (struct riscv_image_header *)kernel;
+ if (!h->image_size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Check endianness */
+ flags = le64_to_cpu(h->flags);
+ be_image = riscv_image_flag_field(flags, RISCV_IMAGE_FLAG_BE);
+ be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ if (be_image != be_kernel) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Load the kernel image */
+ kbuf.image = image;
+ kbuf.buf_min = 0;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = false;
+
+ kbuf.buffer = kernel;
+ kbuf.bufsz = kernel_len;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = le64_to_cpu(h->image_size);
+ kbuf.buf_align = le64_to_cpu(h->text_offset);
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add kernel image ret=%d\n", ret);
+ goto out;
+ }
+
+ image->start = kbuf.mem;
+
+ pr_info("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.mem, kbuf.bufsz, kbuf.memsz);
+
+ ret = load_extra_segments(image, kbuf.mem, kbuf.memsz,
+ initrd, initrd_len, cmdline, cmdline_len);
+
+out:
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops image_kexec_ops = {
+ .probe = image_probe,
+ .load = image_load,
+};
diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
index b0bf8c1722c0..e36104af2e24 100644
--- a/arch/riscv/kernel/machine_kexec_file.c
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -7,8 +7,369 @@
* Author: Liao Chang (liaochang1@huawei.com)
*/
#include <linux/kexec.h>
+#include <linux/elf.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
const struct kexec_file_ops * const kexec_file_loaders[] = {
&elf_kexec_ops,
+ &image_kexec_ops,
NULL
};
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ kvfree(image->arch.fdt);
+ image->arch.fdt = NULL;
+
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
+{
+ unsigned int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
+{
+ struct crash_mem *cmem = arg;
+
+ cmem->ranges[cmem->nr_ranges].start = res->start;
+ cmem->ranges[cmem->nr_ranges].end = res->end;
+ cmem->nr_ranges++;
+
+ return 0;
+}
+
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+
+ nr_ranges = 1; /* For exclusion of crashkernel region */
+ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
+ if (ret)
+ goto out;
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (!ret)
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+out:
+ kfree(cmem);
+ return ret;
+}
+
+static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int elfcorehdr_strlen;
+ char *cmdline_ptr;
+
+ cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!cmdline_ptr)
+ return NULL;
+
+ elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+ image->elf_load_addr);
+
+ if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+ pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+ kfree(cmdline_ptr);
+ return NULL;
+ }
+
+ memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+ /* Ensure it's nul terminated */
+ cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+ return cmdline_ptr;
+}
+#endif
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RISCV_IMM_BITS 12
+#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
+#define RISCV_CONST_HIGH_PART(x) \
+ (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
+#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
+
+#define ENCODE_ITYPE_IMM(x) \
+ (RV_X(x, 0, 12) << 20)
+#define ENCODE_BTYPE_IMM(x) \
+ ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
+ (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
+#define ENCODE_UTYPE_IMM(x) \
+ (RV_X(x, 12, 20) << 12)
+#define ENCODE_JTYPE_IMM(x) \
+ ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
+ (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
+#define ENCODE_CBTYPE_IMM(x) \
+ ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
+#define ENCODE_CJTYPE_IMM(x) \
+ ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
+ (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
+#define ENCODE_UJTYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
+ (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
+#define ENCODE_UITYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
+
+#define CLEAN_IMM(type, x) \
+ ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab)
+{
+ const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
+ Elf64_Rela *relas;
+ int i, r_type;
+
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+ for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+ const Elf_Sym *sym; /* symbol to relocate */
+ unsigned long addr; /* final location after relocation */
+ unsigned long val; /* relocated symbol value */
+ unsigned long sec_base; /* relocated symbol value */
+ void *loc; /* tmp location to modify */
+
+ sym = (void *)pi->ehdr + symtab->sh_offset;
+ sym += ELF64_R_SYM(relas[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ loc = pi->purgatory_buf;
+ loc += section->sh_offset;
+ loc += relas[i].r_offset;
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= pi->ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
+
+ val = sym->st_value;
+ val += sec_base;
+ val += relas[i].r_addend;
+
+ addr = section->sh_addr + relas[i].r_offset;
+
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+
+ switch (r_type) {
+ case R_RISCV_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
+ ENCODE_BTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_JAL:
+ *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
+ ENCODE_JTYPE_IMM(val - addr);
+ break;
+ /*
+ * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
+ * sym is expected to be next to R_RISCV_PCREL_HI20
+ * in purgatory relsec. Handle it like R_RISCV_CALL
+ * sym, instead of searching the whole relsec.
+ */
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_CALL:
+ *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
+ ENCODE_UJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
+ ENCODE_CBTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_JUMP:
+ *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
+ ENCODE_CJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_ADD16:
+ *(u16 *)loc += val;
+ break;
+ case R_RISCV_SUB16:
+ *(u16 *)loc -= val;
+ break;
+ case R_RISCV_ADD32:
+ *(u32 *)loc += val;
+ break;
+ case R_RISCV_SUB32:
+ *(u32 *)loc -= val;
+ break;
+ /* It has been applied by R_RISCV_PCREL_HI20 sym */
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_ALIGN:
+ case R_RISCV_RELAX:
+ break;
+ case R_RISCV_64:
+ *(u64 *)loc = val;
+ break;
+ default:
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
+
+
+int load_extra_segments(struct kimage *image, unsigned long kernel_start,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ void *fdt;
+ unsigned long initrd_pbase = 0UL;
+ struct kexec_buf kbuf;
+ char *modified_cmdline = NULL;
+
+ kbuf.image = image;
+ kbuf.buf_min = kernel_start + kernel_len;
+ kbuf.buf_max = ULONG_MAX;
+
+#ifdef CONFIG_CRASH_DUMP
+ /* Add elfcorehdr */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ void *headers;
+ unsigned long headers_sz;
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out;
+ }
+ image->elf_headers = headers;
+ image->elf_load_addr = kbuf.mem;
+ image->elf_headers_sz = headers_sz;
+
+ kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ /* Setup cmdline for kdump kernel case */
+ modified_cmdline = setup_kdump_cmdline(image, cmdline,
+ cmdline_len);
+ if (!modified_cmdline) {
+ pr_err("Setting up cmdline for kdump kernel failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ cmdline = modified_cmdline;
+ }
+#endif
+
+#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
+ /* Add purgatory to the image */
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_load_purgatory(image, &kbuf);
+ if (ret) {
+ pr_err("Error loading purgatory ret=%d\n", ret);
+ goto out;
+ }
+ kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
+
+ ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
+ &kernel_start,
+ sizeof(kernel_start), 0);
+ if (ret)
+ pr_err("Error update purgatory ret=%d\n", ret);
+#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
+
+ /* Add the initrd to the image */
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_pbase = kbuf.mem;
+ kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
+ }
+
+ /* Add the DTB to the image */
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
+ initrd_len, cmdline, 0);
+ if (!fdt) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdt_pack(fdt);
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add DTB kbuf ret=%d\n", ret);
+ goto out_free_fdt;
+ }
+ /* Cache the fdt buffer address for memory cleanup */
+ image->arch.fdt = fdt;
+ kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
+out:
+ kfree(modified_cmdline);
+ return ret;
+}
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 745dd4c4a69c..48f6c4f7dca0 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -13,7 +13,6 @@
.text
-#define FENTRY_RA_OFFSET 8
#define ABI_SIZE_ON_STACK 80
#define ABI_A0 0
#define ABI_A1 8
@@ -56,16 +55,13 @@
addi sp, sp, ABI_SIZE_ON_STACK
.endm
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
-
/**
* SAVE_ABI_REGS - save regs against the ftrace_regs struct
*
* After the stack is established,
*
* 0(sp) stores the PC of the traced function which can be accessed
-* by &(fregs)->epc in tracing function. Note that the real
-* function entry address should be computed with -FENTRY_RA_OFFSET.
+* by &(fregs)->epc in tracing function.
*
* 8(sp) stores the function return address (i.e. parent IP) that
* can be accessed by &(fregs)->ra in tracing function.
@@ -86,17 +82,20 @@
* +++++++++
**/
.macro SAVE_ABI_REGS
- mv t4, sp // Save original SP in T4
addi sp, sp, -FREGS_SIZE_ON_STACK
-
REG_S t0, FREGS_EPC(sp)
REG_S x1, FREGS_RA(sp)
- REG_S t4, FREGS_SP(sp) // Put original SP on stack
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
REG_S x8, FREGS_S0(sp)
#endif
REG_S x6, FREGS_T1(sp)
-
+#ifdef CONFIG_CC_IS_CLANG
+ REG_S x7, FREGS_T2(sp)
+ REG_S x28, FREGS_T3(sp)
+ REG_S x29, FREGS_T4(sp)
+ REG_S x30, FREGS_T5(sp)
+ REG_S x31, FREGS_T6(sp)
+#endif
// save the arguments
REG_S x10, FREGS_A0(sp)
REG_S x11, FREGS_A1(sp)
@@ -106,16 +105,25 @@
REG_S x15, FREGS_A5(sp)
REG_S x16, FREGS_A6(sp)
REG_S x17, FREGS_A7(sp)
+ mv a0, sp
+ addi a0, a0, FREGS_SIZE_ON_STACK
+ REG_S a0, FREGS_SP(sp) // Put original SP on stack
.endm
- .macro RESTORE_ABI_REGS, all=0
+ .macro RESTORE_ABI_REGS
REG_L t0, FREGS_EPC(sp)
REG_L x1, FREGS_RA(sp)
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
REG_L x8, FREGS_S0(sp)
#endif
REG_L x6, FREGS_T1(sp)
-
+#ifdef CONFIG_CC_IS_CLANG
+ REG_L x7, FREGS_T2(sp)
+ REG_L x28, FREGS_T3(sp)
+ REG_L x29, FREGS_T4(sp)
+ REG_L x30, FREGS_T5(sp)
+ REG_L x31, FREGS_T6(sp)
+#endif
// restore the arguments
REG_L x10, FREGS_A0(sp)
REG_L x11, FREGS_A1(sp)
@@ -130,60 +138,71 @@
.endm
.macro PREPARE_ARGS
- addi a0, t0, -FENTRY_RA_OFFSET
+ addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ mv a1, ra // parent_ip
+ REG_L a2, -16(t0) // op
+ REG_L ra, FTRACE_OPS_FUNC(a2) // op->func
+#else
la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
+ REG_L a2, 0(a1) // op
+ mv a1, ra // parent_ip
+#endif
+ mv a3, sp // regs
.endm
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-
-#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
SYM_FUNC_START(ftrace_caller)
- SAVE_ABI
-
- addi a0, t0, -FENTRY_RA_OFFSET
- la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+ * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the
+ * function entry, these are later overwritten with the pointer to the
+ * associated struct ftrace_ops.
+ *
+ * -8: &ftrace_ops of the associated tracer function.
+ *<ftrace enable>:
+ * 0: auipc t0/ra, 0x?
+ * 4: jalr t0/ra, ?(t0/ra)
+ *
+ * -8: &ftrace_nop_ops
+ *<ftrace disable>:
+ * 0: nop
+ * 4: nop
+ *
+ * t0 is set to ip+8 after the jalr is executed at the callsite,
+ * so we find the associated op at t0-16.
+ */
+ REG_L t1, -16(t0) // op Should be SZ_REG instead of 16
-SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- call ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi a0, sp, ABI_RA
- REG_L a1, ABI_T0(sp)
- addi a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- mv a2, s0
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /*
+ * If the op has a direct call, handle it immediately without
+ * saving/restoring registers.
+ */
+ REG_L t1, FTRACE_OPS_DIRECT_CALL(t1)
+ bnez t1, ftrace_caller_direct
#endif
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
- call ftrace_stub
#endif
- RESTORE_ABI
- jr t0
-SYM_FUNC_END(ftrace_caller)
-
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-SYM_FUNC_START(ftrace_caller)
- mv t1, zero
SAVE_ABI_REGS
PREPARE_ARGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ jalr ra
+#else
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- call ftrace_stub
-
+ REG_L ra, ftrace_call_dest
+ jalr ra, 0(ra)
+#endif
RESTORE_ABI_REGS
- bnez t1, .Ldirect
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ bnez t1, ftrace_caller_direct
+#endif
jr t0
-.Ldirect:
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL)
jr t1
+#endif
SYM_FUNC_END(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
jr t0
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
index 91d0b355ceef..75551ac6504c 100644
--- a/arch/riscv/kernel/module-sections.c
+++ b/arch/riscv/kernel/module-sections.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
+#include <linux/sort.h>
unsigned long module_emit_got_entry(struct module *mod, unsigned long val)
{
@@ -55,44 +56,70 @@ unsigned long module_emit_plt_entry(struct module *mod, unsigned long val)
return (unsigned long)&plt[i];
}
-static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y)
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
{
- return x->r_info == y->r_info && x->r_addend == y->r_addend;
+ const Elf_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(x->r_info, y->r_info);
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
}
static bool duplicate_rela(const Elf_Rela *rela, int idx)
{
- int i;
- for (i = 0; i < idx; i++) {
- if (is_rela_equal(&rela[i], &rela[idx]))
- return true;
- }
- return false;
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding slot.
+ */
+ return idx > 0 && cmp_rela(rela + idx, rela + idx - 1) == 0;
}
-static void count_max_entries(Elf_Rela *relas, int num,
+static void count_max_entries(const Elf_Rela *relas, size_t num,
unsigned int *plts, unsigned int *gots)
{
- for (int i = 0; i < num; i++) {
+ for (size_t i = 0; i < num; i++) {
+ if (duplicate_rela(relas, i))
+ continue;
+
switch (ELF_R_TYPE(relas[i].r_info)) {
case R_RISCV_CALL_PLT:
case R_RISCV_PLT32:
- if (!duplicate_rela(relas, i))
- (*plts)++;
+ (*plts)++;
break;
case R_RISCV_GOT_HI20:
- if (!duplicate_rela(relas, i))
- (*gots)++;
+ (*gots)++;
break;
+ default:
+ unreachable();
}
}
}
+static bool rela_needs_plt_got_entry(const Elf_Rela *rela)
+{
+ switch (ELF_R_TYPE(rela->r_info)) {
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_GOT_HI20:
+ case R_RISCV_PLT32:
+ return true;
+ default:
+ return false;
+ }
+}
+
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
+ size_t num_scratch_relas = 0;
unsigned int num_plts = 0;
unsigned int num_gots = 0;
+ Elf_Rela *scratch = NULL;
+ size_t scratch_size = 0;
int i;
/*
@@ -122,9 +149,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
/* Calculate the maxinum number of entries */
for (i = 0; i < ehdr->e_shnum; i++) {
+ size_t num_relas = sechdrs[i].sh_size / sizeof(Elf_Rela);
Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset;
- int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela);
Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info;
+ size_t scratch_size_needed;
if (sechdrs[i].sh_type != SHT_RELA)
continue;
@@ -133,7 +161,28 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
if (!(dst_sec->sh_flags & SHF_EXECINSTR))
continue;
- count_max_entries(relas, num_rela, &num_plts, &num_gots);
+ /*
+ * apply_relocate_add() relies on HI20 and LO12 relocation pairs being
+ * close together, so sort a copy of the section to avoid interfering.
+ */
+ scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch);
+ if (scratch_size_needed > scratch_size) {
+ scratch_size = scratch_size_needed;
+ scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+ }
+
+ for (size_t j = 0; j < num_relas; j++)
+ if (rela_needs_plt_got_entry(&relas[j]))
+ scratch[num_scratch_relas++] = relas[j];
+ }
+
+ if (scratch) {
+ /* sort the accumulated PLT/GOT relocations so duplicates are adjacent */
+ sort(scratch, num_scratch_relas, sizeof(*scratch), cmp_rela, NULL);
+ count_max_entries(scratch, num_scratch_relas, &num_plts, &num_gots);
+ kvfree(scratch);
}
mod->arch.plt.shdr->sh_type = SHT_NOBITS;
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index bbf7ec6a75c0..a0a40889d79a 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -60,7 +60,7 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
if (!unaligned_ctl_available())
return -EINVAL;
- return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
+ return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
}
void __show_regs(struct pt_regs *regs)
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 1989b8cade1b..53836a9235e3 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -299,6 +299,76 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
return 0;
}
+static bool sbi_fwft_supported;
+
+struct fwft_set_req {
+ u32 feature;
+ unsigned long value;
+ unsigned long flags;
+ atomic_t error;
+};
+
+static void cpu_sbi_fwft_set(void *arg)
+{
+ struct fwft_set_req *req = arg;
+ int ret;
+
+ ret = sbi_fwft_set(req->feature, req->value, req->flags);
+ if (ret)
+ atomic_set(&req->error, ret);
+}
+
+/**
+ * sbi_fwft_set() - Set a feature on the local hart
+ * @feature: The feature ID to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags)
+{
+ struct sbiret ret;
+
+ if (!sbi_fwft_supported)
+ return -EOPNOTSUPP;
+
+ ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET,
+ feature, value, flags, 0, 0, 0);
+
+ return sbi_err_map_linux_errno(ret.error);
+}
+
+/**
+ * sbi_fwft_set_cpumask() - Set a feature for the specified cpumask
+ * @mask: CPU mask of cpus that need the feature to be set
+ * @feature: The feature ID to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature,
+ unsigned long value, unsigned long flags)
+{
+ struct fwft_set_req req = {
+ .feature = feature,
+ .value = value,
+ .flags = flags,
+ .error = ATOMIC_INIT(0),
+ };
+
+ if (!sbi_fwft_supported)
+ return -EOPNOTSUPP;
+
+ if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT)
+ return -EINVAL;
+
+ on_each_cpu_mask(mask, cpu_sbi_fwft_set, &req, 1);
+
+ return atomic_read(&req.error);
+}
+
/**
* sbi_set_timer() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
@@ -609,7 +679,7 @@ void __init sbi_init(void)
} else {
__sbi_rfence = __sbi_rfence_v01;
}
- if ((sbi_spec_version >= sbi_mk_version(0, 3)) &&
+ if (sbi_spec_version >= sbi_mk_version(0, 3) &&
sbi_probe_extension(SBI_EXT_SRST)) {
pr_info("SBI SRST extension detected\n");
pm_power_off = sbi_srst_power_off;
@@ -617,11 +687,16 @@ void __init sbi_init(void)
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
}
- if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
- (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+ if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+ sbi_probe_extension(SBI_EXT_DBCN) > 0) {
pr_info("SBI DBCN extension detected\n");
sbi_debug_console_available = true;
}
+ if (sbi_spec_version >= sbi_mk_version(3, 0) &&
+ sbi_probe_extension(SBI_EXT_FWFT)) {
+ pr_info("SBI FWFT extension detected\n");
+ sbi_fwft_supported = true;
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 249aec8594a9..0b170e18a2be 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -15,6 +15,7 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/vector.h>
+#include <asm/vendor_extensions/sifive_hwprobe.h>
#include <asm/vendor_extensions/thead_hwprobe.h>
#include <vdso/vsyscall.h>
@@ -96,6 +97,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
* presence in the hart_isa bitmap, are made.
*/
EXT_KEY(ZAAMO);
+ EXT_KEY(ZABHA);
EXT_KEY(ZACAS);
EXT_KEY(ZALRSC);
EXT_KEY(ZAWRS);
@@ -300,6 +302,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = riscv_timebase;
break;
+ case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0:
+ hwprobe_isa_vendor_ext_sifive_0(pair, cpus);
+ break;
+
case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
hwprobe_isa_vendor_ext_thead_0(pair, cpus);
break;
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 77c788660223..dd8e4af6583f 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -16,6 +16,7 @@
#include <asm/entry-common.h>
#include <asm/hwprobe.h>
#include <asm/cpufeature.h>
+#include <asm/sbi.h>
#include <asm/vector.h>
#define INSN_MATCH_LB 0x3
@@ -368,9 +369,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
-#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED;
-#endif
if (!unaligned_enabled)
return -1;
@@ -455,7 +454,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs)
val.data_u64 = 0;
if (user_mode(regs)) {
- if (copy_from_user(&val, (u8 __user *)addr, len))
+ if (copy_from_user_nofault(&val, (u8 __user *)addr, len))
return -1;
} else {
memcpy(&val, (u8 *)addr, len);
@@ -556,7 +555,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs)
return -EOPNOTSUPP;
if (user_mode(regs)) {
- if (copy_to_user((u8 __user *)addr, &val, len))
+ if (copy_to_user_nofault((u8 __user *)addr, &val, len))
return -1;
} else {
memcpy((u8 *)addr, &val, len);
@@ -626,6 +625,10 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;
+ /*
+ * While being documented as very slow, schedule_on_each_cpu() is used since
+ * kernel_vector_begin() expects irqs to be enabled or it will panic()
+ */
schedule_on_each_cpu(check_vector_unaligned_access_emulated);
for_each_online_cpu(cpu)
@@ -642,11 +645,23 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void)
}
#endif
+static bool all_cpus_unaligned_scalar_access_emulated(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ if (per_cpu(misaligned_access_speed, cpu) !=
+ RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED)
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
static bool unaligned_ctl __read_mostly;
-void check_unaligned_access_emulated(struct work_struct *work __always_unused)
+static void check_unaligned_access_emulated(void *arg __always_unused)
{
int cpu = smp_processor_id();
long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
@@ -657,6 +672,13 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
__asm__ __volatile__ (
" "REG_L" %[tmp], 1(%[ptr])\n"
: [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
+}
+
+static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
+{
+ long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+
+ check_unaligned_access_emulated(NULL);
/*
* If unaligned_ctl is already set, this means that we detected that all
@@ -665,26 +687,23 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
*/
if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) {
pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
- while (true)
- cpu_relax();
+ return -EINVAL;
}
+
+ return 0;
}
bool __init check_unaligned_access_emulated_all_cpus(void)
{
- int cpu;
-
/*
* We can only support PR_UNALIGN controls if all CPUs have misaligned
* accesses emulated since tasks requesting such control can run on any
* CPU.
*/
- schedule_on_each_cpu(check_unaligned_access_emulated);
+ on_each_cpu(check_unaligned_access_emulated, NULL, 1);
- for_each_online_cpu(cpu)
- if (per_cpu(misaligned_access_speed, cpu)
- != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED)
- return false;
+ if (!all_cpus_unaligned_scalar_access_emulated())
+ return false;
unaligned_ctl = true;
return true;
@@ -699,4 +718,73 @@ bool __init check_unaligned_access_emulated_all_cpus(void)
{
return false;
}
+static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
+{
+ return 0;
+}
+#endif
+
+static bool misaligned_traps_delegated;
+
+#ifdef CONFIG_RISCV_SBI
+
+static int cpu_online_sbi_unaligned_setup(unsigned int cpu)
+{
+ if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) &&
+ misaligned_traps_delegated) {
+ pr_crit("Misaligned trap delegation non homogeneous (expected delegated)");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void __init unaligned_access_init(void)
+{
+ int ret;
+
+ ret = sbi_fwft_set_online_cpus(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0);
+ if (ret)
+ return;
+
+ misaligned_traps_delegated = true;
+ pr_info("SBI misaligned access exception delegation ok\n");
+ /*
+ * Note that we don't have to take any specific action here, if
+ * the delegation is successful, then
+ * check_unaligned_access_emulated() will verify that indeed the
+ * platform traps on misaligned accesses.
+ */
+}
+#else
+void __init unaligned_access_init(void) {}
+
+static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused)
+{
+ return 0;
+}
+
#endif
+
+int cpu_online_unaligned_access_init(unsigned int cpu)
+{
+ int ret;
+
+ ret = cpu_online_sbi_unaligned_setup(cpu);
+ if (ret)
+ return ret;
+
+ return cpu_online_check_unaligned_access_emulated(cpu);
+}
+
+bool misaligned_traps_can_delegate(void)
+{
+ /*
+ * Either we successfully requested misaligned traps delegation for all
+ * CPUs, or the SBI does not implement the FWFT extension but delegated
+ * the exception by default.
+ */
+ return misaligned_traps_delegated ||
+ all_cpus_unaligned_scalar_access_emulated();
+}
+EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate);
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index b8ba13819d05..ae2068425fbc 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -236,6 +236,11 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
static int riscv_online_cpu(unsigned int cpu)
{
+ int ret = cpu_online_unaligned_access_init(cpu);
+
+ if (ret)
+ return ret;
+
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
goto exit;
@@ -248,7 +253,6 @@ static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
- check_unaligned_access_emulated(NULL);
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
@@ -439,6 +443,8 @@ static int __init check_unaligned_access_all_cpus(void)
{
int cpu;
+ unaligned_access_init();
+
if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n",
speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param);
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index cc2895d1fbc2..3a8e038b10a2 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -136,7 +136,7 @@ static int __setup_additional_pages(struct mm_struct *mm,
ret =
_install_special_mapping(mm, vdso_base, vdso_text_len,
- (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+ (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_SEALED_SYSMAP),
vdso_info->cm);
if (IS_ERR(ret))
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index ad73607abc28..9ebb5e590f93 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -13,9 +13,17 @@ vdso-syms += flush_icache
vdso-syms += hwprobe
vdso-syms += sys_hwprobe
+ifdef CONFIG_VDSO_GETRANDOM
+vdso-syms += getrandom
+endif
+
# Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+ifdef CONFIG_VDSO_GETRANDOM
+obj-vdso += vgetrandom-chacha.o
+endif
+
ccflags-y := -fno-stack-protector
ccflags-y += -DDISABLE_BRANCH_PROFILING
ccflags-y += -fno-builtin
@@ -24,6 +32,10 @@ ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
endif
+ifneq ($(c-getrandom-y),)
+ CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y)
+endif
+
CFLAGS_hwprobe.o += -fPIC
# Build rules
@@ -38,6 +50,7 @@ endif
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Force dependency
@@ -47,7 +60,7 @@ $(obj)/vdso.o: $(obj)/vdso.so
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold_and_check)
LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \
- --build-id=sha1 --hash-style=both --eh-frame-hdr
+ --build-id=sha1 --eh-frame-hdr
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c
new file mode 100644
index 000000000000..f21922e8cebd
--- /dev/null
+++ b/arch/riscv/kernel/vdso/getrandom.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 8e86965a8aae..7c15b0f4ee3b 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -80,6 +80,9 @@ VERSION
#ifndef COMPAT_VDSO
__vdso_riscv_hwprobe;
#endif
+#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO)
+ __vdso_getrandom;
+#endif
local: *;
};
}
diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..5f0dad8f2373
--- /dev/null
+++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ *
+ * Based on arch/loongarch/vdso/vgetrandom-chacha.S.
+ */
+
+#include <asm/asm.h>
+#include <linux/linkage.h>
+
+.text
+
+.macro ROTRI rd rs imm
+ slliw t0, \rs, 32 - \imm
+ srliw \rd, \rs, \imm
+ or \rd, \rd, t0
+.endm
+
+.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
+ \op \d0, \d0, \s0
+ \op \d1, \d1, \s1
+ \op \d2, \d2, \s2
+ \op \d3, \d3, \s3
+.endm
+
+/*
+ * a0: output bytes
+ * a1: 32-byte key input
+ * a2: 8-byte counter input/output
+ * a3: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+#define output a0
+#define key a1
+#define counter a2
+#define nblocks a3
+#define i a4
+#define state0 s0
+#define state1 s1
+#define state2 s2
+#define state3 s3
+#define state4 s4
+#define state5 s5
+#define state6 s6
+#define state7 s7
+#define state8 s8
+#define state9 s9
+#define state10 s10
+#define state11 s11
+#define state12 a5
+#define state13 a6
+#define state14 a7
+#define state15 t1
+#define cnt t2
+#define copy0 t3
+#define copy1 t4
+#define copy2 t5
+#define copy3 t6
+
+/* Packs to be used with OP_4REG */
+#define line0 state0, state1, state2, state3
+#define line1 state4, state5, state6, state7
+#define line2 state8, state9, state10, state11
+#define line3 state12, state13, state14, state15
+
+#define line1_perm state5, state6, state7, state4
+#define line2_perm state10, state11, state8, state9
+#define line3_perm state15, state12, state13, state14
+
+#define copy copy0, copy1, copy2, copy3
+
+#define _16 16, 16, 16, 16
+#define _20 20, 20, 20, 20
+#define _24 24, 24, 24, 24
+#define _25 25, 25, 25, 25
+
+ /*
+ * The ABI requires s0-s9 saved.
+ * This does not violate the stack-less requirement: no sensitive data
+ * is spilled onto the stack.
+ */
+ addi sp, sp, -12*SZREG
+ REG_S s0, (sp)
+ REG_S s1, SZREG(sp)
+ REG_S s2, 2*SZREG(sp)
+ REG_S s3, 3*SZREG(sp)
+ REG_S s4, 4*SZREG(sp)
+ REG_S s5, 5*SZREG(sp)
+ REG_S s6, 6*SZREG(sp)
+ REG_S s7, 7*SZREG(sp)
+ REG_S s8, 8*SZREG(sp)
+ REG_S s9, 9*SZREG(sp)
+ REG_S s10, 10*SZREG(sp)
+ REG_S s11, 11*SZREG(sp)
+
+ ld cnt, (counter)
+
+ li copy0, 0x61707865
+ li copy1, 0x3320646e
+ li copy2, 0x79622d32
+ li copy3, 0x6b206574
+
+.Lblock:
+ /* state[0,1,2,3] = "expand 32-byte k" */
+ mv state0, copy0
+ mv state1, copy1
+ mv state2, copy2
+ mv state3, copy3
+
+ /* state[4,5,..,11] = key */
+ lw state4, (key)
+ lw state5, 4(key)
+ lw state6, 8(key)
+ lw state7, 12(key)
+ lw state8, 16(key)
+ lw state9, 20(key)
+ lw state10, 24(key)
+ lw state11, 28(key)
+
+ /* state[12,13] = counter */
+ mv state12, cnt
+ srli state13, cnt, 32
+
+ /* state[14,15] = 0 */
+ mv state14, zero
+ mv state15, zero
+
+ li i, 10
+.Lpermute:
+ /* odd round */
+ OP_4REG addw line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG ROTRI line3, _16
+
+ OP_4REG addw line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG ROTRI line1, _20
+
+ OP_4REG addw line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG ROTRI line3, _24
+
+ OP_4REG addw line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG ROTRI line1, _25
+
+ /* even round */
+ OP_4REG addw line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG ROTRI line3_perm, _16
+
+ OP_4REG addw line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG ROTRI line1_perm, _20
+
+ OP_4REG addw line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG ROTRI line3_perm, _24
+
+ OP_4REG addw line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG ROTRI line1_perm, _25
+
+ addi i, i, -1
+ bnez i, .Lpermute
+
+ /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
+ OP_4REG addw line0, copy
+ sw state0, (output)
+ sw state1, 4(output)
+ sw state2, 8(output)
+ sw state3, 12(output)
+
+ /* from now on state[0,1,2,3] are scratch registers */
+
+ /* state[0,1,2,3] = lo(key) */
+ lw state0, (key)
+ lw state1, 4(key)
+ lw state2, 8(key)
+ lw state3, 12(key)
+
+ /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
+ OP_4REG addw line1, line0
+ sw state4, 16(output)
+ sw state5, 20(output)
+ sw state6, 24(output)
+ sw state7, 28(output)
+
+ /* state[0,1,2,3] = hi(key) */
+ lw state0, 16(key)
+ lw state1, 20(key)
+ lw state2, 24(key)
+ lw state3, 28(key)
+
+ /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
+ OP_4REG addw line2, line0
+ sw state8, 32(output)
+ sw state9, 36(output)
+ sw state10, 40(output)
+ sw state11, 44(output)
+
+ /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
+ addw state12, state12, cnt
+ srli state0, cnt, 32
+ addw state13, state13, state0
+ sw state12, 48(output)
+ sw state13, 52(output)
+ sw state14, 56(output)
+ sw state15, 60(output)
+
+ /* ++counter */
+ addi cnt, cnt, 1
+
+ /* output += 64 */
+ addi output, output, 64
+ /* --nblocks */
+ addi nblocks, nblocks, -1
+ bnez nblocks, .Lblock
+
+ /* counter = [cnt_lo, cnt_hi] */
+ sd cnt, (counter)
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these
+ * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and
+ * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
+ * only need to zero state[12,...,15].
+ */
+ mv state12, zero
+ mv state13, zero
+ mv state14, zero
+ mv state15, zero
+
+ REG_L s0, (sp)
+ REG_L s1, SZREG(sp)
+ REG_L s2, 2*SZREG(sp)
+ REG_L s3, 3*SZREG(sp)
+ REG_L s4, 4*SZREG(sp)
+ REG_L s5, 5*SZREG(sp)
+ REG_L s6, 6*SZREG(sp)
+ REG_L s7, 7*SZREG(sp)
+ REG_L s8, 8*SZREG(sp)
+ REG_L s9, 9*SZREG(sp)
+ REG_L s10, 10*SZREG(sp)
+ REG_L s11, 11*SZREG(sp)
+ addi sp, sp, 12*SZREG
+
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c
index 9feb7f67a0a3..92d8ff81f42c 100644
--- a/arch/riscv/kernel/vendor_extensions.c
+++ b/arch/riscv/kernel/vendor_extensions.c
@@ -6,6 +6,7 @@
#include <asm/vendorid_list.h>
#include <asm/vendor_extensions.h>
#include <asm/vendor_extensions/andes.h>
+#include <asm/vendor_extensions/sifive.h>
#include <asm/vendor_extensions/thead.h>
#include <linux/array_size.h>
@@ -15,6 +16,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = {
#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
&riscv_isa_vendor_ext_list_andes,
#endif
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+ &riscv_isa_vendor_ext_list_sifive,
+#endif
#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
&riscv_isa_vendor_ext_list_thead,
#endif
@@ -45,6 +49,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig
cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap;
break;
#endif
+ #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+ case SIFIVE_VENDOR_ID:
+ bmap = &riscv_isa_vendor_ext_list_sifive.all_harts_isa_bitmap;
+ cpu_bmap = riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap;
+ break;
+ #endif
#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
case THEAD_VENDOR_ID:
bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap;
diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile
index 866414c81a9f..a4eca96d1c8a 100644
--- a/arch/riscv/kernel/vendor_extensions/Makefile
+++ b/arch/riscv/kernel/vendor_extensions/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive_hwprobe.o
obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o
obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o
diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c
new file mode 100644
index 000000000000..1411337dc1e6
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/sifive.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/sifive.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/* All SiFive vendor extensions supported in Linux */
+const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = {
+ __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF),
+ __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ),
+ __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD),
+ __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive = {
+ .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_sifive),
+ .ext_data = riscv_isa_vendor_ext_sifive,
+};
diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c
new file mode 100644
index 000000000000..1f77f6309763
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/vendor_extensions/sifive.h>
+#include <asm/vendor_extensions/sifive_hwprobe.h>
+#include <asm/vendor_extensions/vendor_hwprobe.h>
+
+#include <linux/cpumask.h>
+#include <linux/types.h>
+
+#include <uapi/asm/hwprobe.h>
+#include <uapi/asm/vendor/sifive.h>
+
+void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus)
+{
+ VENDOR_EXTENSION_SUPPORTED(pair, cpus,
+ riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, {
+ VENDOR_EXT_KEY(XSFVQMACCDOD);
+ VENDOR_EXT_KEY(XSFVQMACCQOQ);
+ VENDOR_EXT_KEY(XSFVFNRCLIPXFQF);
+ VENDOR_EXT_KEY(XSFVFWMACCQQQ);
+ });
+}
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
index be38a93cedae..7bbdfc6d4552 100644
--- a/arch/riscv/lib/riscv_v_helpers.c
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -16,8 +16,11 @@
#ifdef CONFIG_MMU
size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int __asm_vector_usercopy_sum_enabled(void *dst, void *src, size_t n);
int fallback_scalar_usercopy(void *dst, void *src, size_t n);
-asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+int fallback_scalar_usercopy_sum_enabled(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n,
+ bool enable_sum)
{
size_t remain, copied;
@@ -26,7 +29,8 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
goto fallback;
kernel_vector_begin();
- remain = __asm_vector_usercopy(dst, src, n);
+ remain = enable_sum ? __asm_vector_usercopy(dst, src, n) :
+ __asm_vector_usercopy_sum_enabled(dst, src, n);
kernel_vector_end();
if (remain) {
@@ -40,6 +44,7 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
return remain;
fallback:
- return fallback_scalar_usercopy(dst, src, n);
+ return enable_sum ? fallback_scalar_usercopy(dst, src, n) :
+ fallback_scalar_usercopy_sum_enabled(dst, src, n);
}
#endif
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 6a9f116bb545..4efea1b3326c 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -17,14 +17,43 @@ SYM_FUNC_START(__asm_copy_to_user)
ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
REG_L t0, riscv_v_usercopy_threshold
bltu a2, t0, fallback_scalar_usercopy
- tail enter_vector_usercopy
+ li a3, 1
+ tail enter_vector_usercopy
#endif
-SYM_FUNC_START(fallback_scalar_usercopy)
+SYM_FUNC_END(__asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_to_user)
+SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_from_user)
+SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */
- li t6, SR_SUM
- csrs CSR_STATUS, t6
+ li t6, SR_SUM
+ csrs CSR_STATUS, t6
+ mv t6, ra
+ call fallback_scalar_usercopy_sum_enabled
+
+ /* Disable access to user memory */
+ mv ra, t6
+ li t6, SR_SUM
+ csrc CSR_STATUS, t6
+ ret
+SYM_FUNC_END(fallback_scalar_usercopy)
+
+SYM_FUNC_START(__asm_copy_to_user_sum_enabled)
+#ifdef CONFIG_RISCV_ISA_V
+ ALTERNATIVE("j fallback_scalar_usercopy_sum_enabled", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
+ REG_L t0, riscv_v_usercopy_threshold
+ bltu a2, t0, fallback_scalar_usercopy_sum_enabled
+ li a3, 0
+ tail enter_vector_usercopy
+#endif
+SYM_FUNC_END(__asm_copy_to_user_sum_enabled)
+SYM_FUNC_ALIAS(__asm_copy_from_user_sum_enabled, __asm_copy_to_user_sum_enabled)
+EXPORT_SYMBOL(__asm_copy_from_user_sum_enabled)
+EXPORT_SYMBOL(__asm_copy_to_user_sum_enabled)
+
+SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled)
/*
* Save the terminal address which will be used to compute the number
* of bytes copied in case of a fixup exception.
@@ -178,23 +207,12 @@ SYM_FUNC_START(fallback_scalar_usercopy)
bltu a0, t0, 4b /* t0 - end of dst */
.Lout_copy_user:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
li a0, 0
ret
-
- /* Exception fixup code */
10:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
sub a0, t5, a0
ret
-SYM_FUNC_END(__asm_copy_to_user)
-SYM_FUNC_END(fallback_scalar_usercopy)
-EXPORT_SYMBOL(__asm_copy_to_user)
-SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
-EXPORT_SYMBOL(__asm_copy_from_user)
-
+SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled)
SYM_FUNC_START(__clear_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
index 7c45f26de4f7..03b5560609a2 100644
--- a/arch/riscv/lib/uaccess_vector.S
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -24,7 +24,18 @@ SYM_FUNC_START(__asm_vector_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
csrs CSR_STATUS, t6
+ mv t6, ra
+ call __asm_vector_usercopy_sum_enabled
+
+ /* Disable access to user memory */
+ mv ra, t6
+ li t6, SR_SUM
+ csrc CSR_STATUS, t6
+ ret
+SYM_FUNC_END(__asm_vector_usercopy)
+
+SYM_FUNC_START(__asm_vector_usercopy_sum_enabled)
loop:
vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
fixup vle8.v vData, (pSrc), 10f
@@ -36,8 +47,6 @@ loop:
/* Exception fixup for vector load is shared with normal exit */
10:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
mv a0, iNum
ret
@@ -49,4 +58,4 @@ loop:
csrr t2, CSR_VSTART
sub iNum, iNum, t2
j 10b
-SYM_FUNC_END(__asm_vector_usercopy)
+SYM_FUNC_END(__asm_vector_usercopy_sum_enabled)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index b8e96dfff19d..4ca5aafce22e 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -24,7 +24,20 @@ void flush_icache_all(void)
if (num_online_cpus() < 2)
return;
- else if (riscv_use_sbi_for_rfence())
+
+ /*
+ * Make sure all previous writes to the D$ are ordered before making
+ * the IPI. The RISC-V spec states that a hart must execute a data fence
+ * before triggering a remote fence.i in order to make the modification
+ * visable for remote harts.
+ *
+ * IPIs on RISC-V are triggered by MMIO writes to either CLINT or
+ * S-IMSIC, so the fence ensures previous data writes "happen before"
+ * the MMIO.
+ */
+ RISCV_FENCE(w, o);
+
+ if (riscv_use_sbi_for_rfence())
sbi_remote_fence_i(NULL);
else
on_each_cpu(ipi_remote_fence_i, NULL, 1);
@@ -101,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
unsigned int riscv_cboz_block_size;
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
+unsigned int riscv_cbop_block_size;
+EXPORT_SYMBOL_GPL(riscv_cbop_block_size);
+
static void __init cbo_get_block_size(struct device_node *node,
const char *name, u32 *block_size,
unsigned long *first_hartid)
@@ -125,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node,
void __init riscv_init_cbo_blocksizes(void)
{
- unsigned long cbom_hartid, cboz_hartid;
- u32 cbom_block_size = 0, cboz_block_size = 0;
+ unsigned long cbom_hartid, cboz_hartid, cbop_hartid;
+ u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0;
struct device_node *node;
struct acpi_table_header *rhct;
acpi_status status;
@@ -138,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void)
&cbom_block_size, &cbom_hartid);
cbo_get_block_size(node, "riscv,cboz-block-size",
&cboz_block_size, &cboz_hartid);
+ cbo_get_block_size(node, "riscv,cbop-block-size",
+ &cbop_block_size, &cbop_hartid);
}
} else {
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
if (ACPI_FAILURE(status))
return;
- acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
+ acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size);
acpi_put_table((struct acpi_table_header *)rhct);
}
@@ -153,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
+
+ if (cbop_block_size)
+ riscv_cbop_block_size = cbop_block_size;
}
#ifdef CONFIG_SMP
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 4ae67324f992..8b6c0a112a8d 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -154,4 +154,14 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
flush_tlb_mm(vma->vm_mm);
return pmd;
}
+
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return old;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index f9e27ba1df99..e737ba7949b1 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -7,6 +7,27 @@
#include <linux/mmu_notifier.h>
#include <asm/sbi.h>
#include <asm/mmu_context.h>
+#include <asm/cpufeature.h>
+
+#define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
+
+static inline void local_sfence_inval_ir(void)
+{
+ asm volatile(SFENCE_INVAL_IR() ::: "memory");
+}
+
+static inline void local_sfence_w_inval(void)
+{
+ asm volatile(SFENCE_W_INVAL() ::: "memory");
+}
+
+static inline void local_sinval_vma(unsigned long vma, unsigned long asid)
+{
+ if (asid != FLUSH_TLB_NO_ASID)
+ asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory");
+ else
+ asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory");
+}
/*
* Flush entire TLB if number of entries to be flushed is greater
@@ -27,6 +48,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
return;
}
+ if (has_svinval()) {
+ local_sfence_w_inval();
+ for (i = 0; i < nr_ptes_in_range; ++i) {
+ local_sinval_vma(start, asid);
+ start += stride;
+ }
+ local_sfence_inval_ir();
+ return;
+ }
+
for (i = 0; i < nr_ptes_in_range; ++i) {
local_flush_tlb_page_asid(start, asid);
start += stride;
@@ -182,6 +213,13 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
__flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm),
start, end - start, PMD_SIZE);
}
+
+void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm),
+ start, end - start, PUD_SIZE);
+}
#endif
bool arch_tlbbatch_should_defer(struct mm_struct *mm)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index e23670e1949c..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -319,7 +319,7 @@ enum prot_type {
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
- PROT_NONE,
+ PROT_TYPE_DUMMY,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -335,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
- case PROT_NONE:
+ case PROT_TYPE_DUMMY:
/* We should never get here, acts like termination */
WARN_ON_ONCE(1);
break;
@@ -805,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
gpa = kvm_s390_real_to_abs(vcpu, ga);
if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
}
}
if (rc)
@@ -963,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION)
prot = PROT_TYPE_KEYC;
else
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3829521450dd..e1ad05bfd28a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -441,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs)
if (rc)
BUG();
} else {
+ if (faulthandler_disabled())
+ return handle_fault_error_nolock(regs, 0);
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 5686369779be..9f8a3a5bed7e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -180,6 +180,7 @@ config CRYPTO_PAES_S390
depends on PKEY
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
+ select CRYPTO_ENGINE
help
This is the s390 hardware accelerated implementation of the
AES cipher algorithms for use with protected key.
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 8c384c25dca1..0a5888b53d6d 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -93,7 +93,6 @@ enum {
#define AAC_NUM_MGT_FIB 8
#define AAC_NUM_IO_FIB (1024 - AAC_NUM_MGT_FIB)
-#define AAC_NUM_FIB (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB)
#define AAC_MAX_LUN 256
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index ffef61c4aa01..7d9a4dce236b 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -48,15 +48,7 @@
static int fib_map_alloc(struct aac_dev *dev)
{
- if (dev->max_fib_size > AAC_MAX_NATIVE_SIZE)
- dev->max_cmd_size = AAC_MAX_NATIVE_SIZE;
- else
- dev->max_cmd_size = dev->max_fib_size;
- if (dev->max_fib_size < AAC_MAX_NATIVE_SIZE) {
- dev->max_cmd_size = AAC_MAX_NATIVE_SIZE;
- } else {
- dev->max_cmd_size = dev->max_fib_size;
- }
+ dev->max_cmd_size = AAC_MAX_NATIVE_SIZE;
dprintk((KERN_INFO
"allocate hardware fibs dma_alloc_coherent(%p, %d * (%d + %d), %p)\n",
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index a348df895dca..8ff679e67bbf 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -836,7 +836,7 @@ int __init scsi_init_devinfo(void)
goto out;
for (i = 0; scsi_static_device_list[i].vendor; i++) {
- error = scsi_dev_info_list_add(1 /* compatibile */,
+ error = scsi_dev_info_list_add(1 /* compatible */,
scsi_static_device_list[i].vendor,
scsi_static_device_list[i].model,
NULL,
diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index 644b44d2aef2..18261cbd413b 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -745,7 +745,7 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
if (IS_ERR(clk))
return PTR_ERR(clk);
- reset = devm_reset_control_get_optional_exclusive(dev, NULL);
+ reset = devm_reset_control_get_optional_shared(dev, NULL);
if (IS_ERR(reset))
return PTR_ERR(reset);
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index c8f64ec69344..b56210734caa 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -523,7 +523,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
return PTR_ERR(clk);
}
- reset = devm_reset_control_get_optional_exclusive(dev, NULL);
+ reset = devm_reset_control_get_optional_shared(dev, NULL);
if (IS_ERR(reset))
return PTR_ERR(reset);
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index 330078b1d50f..c6489e90b8b9 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -685,6 +685,17 @@ static irqreturn_t pci1xxxx_spi_isr(int irq, void *dev)
return pci1xxxx_spi_isr_io(irq, dev);
}
+static irqreturn_t pci1xxxx_spi_shared_isr(int irq, void *dev)
+{
+ struct pci1xxxx_spi *par = dev;
+ u8 i = 0;
+
+ for (i = 0; i < par->total_hw_instances; i++)
+ pci1xxxx_spi_isr(irq, par->spi_int[i]);
+
+ return IRQ_HANDLED;
+}
+
static bool pci1xxxx_spi_can_dma(struct spi_controller *host,
struct spi_device *spi,
struct spi_transfer *xfer)
@@ -702,6 +713,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
struct device *dev = &pdev->dev;
struct pci1xxxx_spi *spi_bus;
struct spi_controller *spi_host;
+ int num_vector = 0;
u32 regval;
int ret;
@@ -749,9 +761,9 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
if (!spi_bus->reg_base)
return -EINVAL;
- ret = pci_alloc_irq_vectors(pdev, hw_inst_cnt, hw_inst_cnt,
- PCI_IRQ_ALL_TYPES);
- if (ret < 0) {
+ num_vector = pci_alloc_irq_vectors(pdev, 1, hw_inst_cnt,
+ PCI_IRQ_ALL_TYPES);
+ if (num_vector < 0) {
dev_err(&pdev->dev, "Error allocating MSI vectors\n");
return ret;
}
@@ -765,9 +777,15 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
SPI_MST_EVENT_MASK_REG_OFFSET(spi_sub_ptr->hw_inst));
spi_sub_ptr->irq = pci_irq_vector(pdev, 0);
- ret = devm_request_irq(&pdev->dev, spi_sub_ptr->irq,
- pci1xxxx_spi_isr, PCI1XXXX_IRQ_FLAGS,
- pci_name(pdev), spi_sub_ptr);
+ if (num_vector >= hw_inst_cnt)
+ ret = devm_request_irq(&pdev->dev, spi_sub_ptr->irq,
+ pci1xxxx_spi_isr, PCI1XXXX_IRQ_FLAGS,
+ pci_name(pdev), spi_sub_ptr);
+ else
+ ret = devm_request_irq(&pdev->dev, spi_sub_ptr->irq,
+ pci1xxxx_spi_shared_isr,
+ PCI1XXXX_IRQ_FLAGS | IRQF_SHARED,
+ pci_name(pdev), spi_bus);
if (ret < 0) {
dev_err(&pdev->dev, "Unable to request irq : %d",
spi_sub_ptr->irq);
@@ -798,14 +816,16 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
regval &= ~SPI_INTR;
writel(regval, spi_bus->reg_base +
SPI_MST_EVENT_MASK_REG_OFFSET(spi_sub_ptr->hw_inst));
- spi_sub_ptr->irq = pci_irq_vector(pdev, iter);
- ret = devm_request_irq(&pdev->dev, spi_sub_ptr->irq,
- pci1xxxx_spi_isr, PCI1XXXX_IRQ_FLAGS,
- pci_name(pdev), spi_sub_ptr);
- if (ret < 0) {
- dev_err(&pdev->dev, "Unable to request irq : %d",
- spi_sub_ptr->irq);
- return -ENODEV;
+ if (num_vector >= hw_inst_cnt) {
+ spi_sub_ptr->irq = pci_irq_vector(pdev, iter);
+ ret = devm_request_irq(&pdev->dev, spi_sub_ptr->irq,
+ pci1xxxx_spi_isr, PCI1XXXX_IRQ_FLAGS,
+ pci_name(pdev), spi_sub_ptr);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Unable to request irq : %d",
+ spi_sub_ptr->irq);
+ return -ENODEV;
+ }
}
}
diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c
index 3f747fd61d19..77d9cc65477a 100644
--- a/drivers/spi/spi-qpic-snand.c
+++ b/drivers/spi/spi-qpic-snand.c
@@ -639,6 +639,20 @@ static int qcom_spi_check_error(struct qcom_nand_controller *snandc)
unsigned int stat;
stat = buffer & BS_CORRECTABLE_ERR_MSK;
+
+ /*
+ * The exact number of the corrected bits is
+ * unknown because the hardware only reports the
+ * number of the corrected bytes.
+ *
+ * Since we have no better solution at the moment,
+ * report that value as the number of bit errors
+ * despite that it is inaccurate in most cases.
+ */
+ if (stat && stat != ecc_cfg->strength)
+ dev_warn_once(snandc->dev,
+ "Warning: due to hw limitation, the reported number of the corrected bits may be inaccurate\n");
+
snandc->qspi->ecc_stats.corrected += stat;
max_bitflips = max(max_bitflips, stat);
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 76cedd30c274..4410e7d93b7d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1397,6 +1397,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us)
* make sure that there are no outstanding requests when
* clock scaling is in progress
*/
+ mutex_lock(&hba->host->scan_mutex);
blk_mq_quiesce_tagset(&hba->host->tag_set);
mutex_lock(&hba->wb_mutex);
down_write(&hba->clk_scaling_lock);
@@ -1407,6 +1408,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us)
up_write(&hba->clk_scaling_lock);
mutex_unlock(&hba->wb_mutex);
blk_mq_unquiesce_tagset(&hba->host->tag_set);
+ mutex_unlock(&hba->host->scan_mutex);
goto out;
}
@@ -1428,6 +1430,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
mutex_unlock(&hba->wb_mutex);
blk_mq_unquiesce_tagset(&hba->host->tag_set);
+ mutex_unlock(&hba->host->scan_mutex);
ufshcd_release(hba);
}
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 37887ec68412..18a978452001 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -122,7 +122,9 @@ static const struct {
};
static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host);
-static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq);
+static unsigned long ufs_qcom_opp_freq_to_clk_freq(struct ufs_hba *hba,
+ unsigned long freq, char *name);
+static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up, unsigned long freq);
static struct ufs_qcom_host *rcdev_to_ufs_host(struct reset_controller_dev *rcd)
{
@@ -506,10 +508,9 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba)
if (ret)
return ret;
- if (phy->power_count) {
+ if (phy->power_count)
phy_power_off(phy);
- phy_exit(phy);
- }
+
/* phy initialization - calibrate the phy */
ret = phy_init(phy);
@@ -597,13 +598,14 @@ static int ufs_qcom_hce_enable_notify(struct ufs_hba *hba,
*
* @hba: host controller instance
* @is_pre_scale_up: flag to check if pre scale up condition.
+ * @freq: target opp freq
* Return: zero for success and non-zero in case of a failure.
*/
-static int ufs_qcom_cfg_timers(struct ufs_hba *hba, bool is_pre_scale_up)
+static int ufs_qcom_cfg_timers(struct ufs_hba *hba, bool is_pre_scale_up, unsigned long freq)
{
struct ufs_qcom_host *host = ufshcd_get_variant(hba);
struct ufs_clk_info *clki;
- unsigned long core_clk_rate = 0;
+ unsigned long clk_freq = 0;
u32 core_clk_cycles_per_us;
/*
@@ -615,22 +617,34 @@ static int ufs_qcom_cfg_timers(struct ufs_hba *hba, bool is_pre_scale_up)
if (host->hw_ver.major < 4 && !ufshcd_is_intr_aggr_allowed(hba))
return 0;
+ if (hba->use_pm_opp && freq != ULONG_MAX) {
+ clk_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk");
+ if (clk_freq)
+ goto cfg_timers;
+ }
+
list_for_each_entry(clki, &hba->clk_list_head, list) {
if (!strcmp(clki->name, "core_clk")) {
+ if (freq == ULONG_MAX) {
+ clk_freq = clki->max_freq;
+ break;
+ }
+
if (is_pre_scale_up)
- core_clk_rate = clki->max_freq;
+ clk_freq = clki->max_freq;
else
- core_clk_rate = clk_get_rate(clki->clk);
+ clk_freq = clk_get_rate(clki->clk);
break;
}
}
+cfg_timers:
/* If frequency is smaller than 1MHz, set to 1MHz */
- if (core_clk_rate < DEFAULT_CLK_RATE_HZ)
- core_clk_rate = DEFAULT_CLK_RATE_HZ;
+ if (clk_freq < DEFAULT_CLK_RATE_HZ)
+ clk_freq = DEFAULT_CLK_RATE_HZ;
- core_clk_cycles_per_us = core_clk_rate / USEC_PER_SEC;
+ core_clk_cycles_per_us = clk_freq / USEC_PER_SEC;
if (ufshcd_readl(hba, REG_UFS_SYS1CLK_1US) != core_clk_cycles_per_us) {
ufshcd_writel(hba, core_clk_cycles_per_us, REG_UFS_SYS1CLK_1US);
/*
@@ -650,13 +664,13 @@ static int ufs_qcom_link_startup_notify(struct ufs_hba *hba,
switch (status) {
case PRE_CHANGE:
- if (ufs_qcom_cfg_timers(hba, false)) {
+ if (ufs_qcom_cfg_timers(hba, false, ULONG_MAX)) {
dev_err(hba->dev, "%s: ufs_qcom_cfg_timers() failed\n",
__func__);
return -EINVAL;
}
- err = ufs_qcom_set_core_clk_ctrl(hba, ULONG_MAX);
+ err = ufs_qcom_set_core_clk_ctrl(hba, true, ULONG_MAX);
if (err)
dev_err(hba->dev, "cfg core clk ctrl failed\n");
/*
@@ -928,17 +942,6 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
break;
case POST_CHANGE:
- if (ufs_qcom_cfg_timers(hba, false)) {
- dev_err(hba->dev, "%s: ufs_qcom_cfg_timers() failed\n",
- __func__);
- /*
- * we return error code at the end of the routine,
- * but continue to configure UFS_PHY_TX_LANE_ENABLE
- * and bus voting as usual
- */
- ret = -EINVAL;
- }
-
/* cache the power mode parameters to use internally */
memcpy(&host->dev_req_params,
dev_req_params, sizeof(*dev_req_params));
@@ -1414,29 +1417,46 @@ static int ufs_qcom_set_clk_40ns_cycles(struct ufs_hba *hba,
return ufshcd_dme_set(hba, UIC_ARG_MIB(PA_VS_CORE_CLK_40NS_CYCLES), reg);
}
-static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq)
+static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up, unsigned long freq)
{
struct ufs_qcom_host *host = ufshcd_get_variant(hba);
struct list_head *head = &hba->clk_list_head;
struct ufs_clk_info *clki;
u32 cycles_in_1us = 0;
u32 core_clk_ctrl_reg;
+ unsigned long clk_freq;
int err;
+ if (hba->use_pm_opp && freq != ULONG_MAX) {
+ clk_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk_unipro");
+ if (clk_freq) {
+ cycles_in_1us = ceil(clk_freq, HZ_PER_MHZ);
+ goto set_core_clk_ctrl;
+ }
+ }
+
list_for_each_entry(clki, head, list) {
if (!IS_ERR_OR_NULL(clki->clk) &&
!strcmp(clki->name, "core_clk_unipro")) {
- if (!clki->max_freq)
+ if (!clki->max_freq) {
cycles_in_1us = 150; /* default for backwards compatibility */
- else if (freq == ULONG_MAX)
+ break;
+ }
+
+ if (freq == ULONG_MAX) {
cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ);
- else
- cycles_in_1us = ceil(freq, HZ_PER_MHZ);
+ break;
+ }
+ if (is_scale_up)
+ cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ);
+ else
+ cycles_in_1us = ceil(clk_get_rate(clki->clk), HZ_PER_MHZ);
break;
}
}
+set_core_clk_ctrl:
err = ufshcd_dme_get(hba,
UIC_ARG_MIB(DME_VS_CORE_CLK_CTRL),
&core_clk_ctrl_reg);
@@ -1473,13 +1493,13 @@ static int ufs_qcom_clk_scale_up_pre_change(struct ufs_hba *hba, unsigned long f
{
int ret;
- ret = ufs_qcom_cfg_timers(hba, true);
+ ret = ufs_qcom_cfg_timers(hba, true, freq);
if (ret) {
dev_err(hba->dev, "%s ufs cfg timer failed\n", __func__);
return ret;
}
/* set unipro core clock attributes and clear clock divider */
- return ufs_qcom_set_core_clk_ctrl(hba, freq);
+ return ufs_qcom_set_core_clk_ctrl(hba, true, freq);
}
static int ufs_qcom_clk_scale_up_post_change(struct ufs_hba *hba)
@@ -1510,8 +1530,15 @@ static int ufs_qcom_clk_scale_down_pre_change(struct ufs_hba *hba)
static int ufs_qcom_clk_scale_down_post_change(struct ufs_hba *hba, unsigned long freq)
{
+ int ret;
+
+ ret = ufs_qcom_cfg_timers(hba, false, freq);
+ if (ret) {
+ dev_err(hba->dev, "%s: ufs_qcom_cfg_timers() failed\n", __func__);
+ return ret;
+ }
/* set unipro core clock attributes and clear clock divider */
- return ufs_qcom_set_core_clk_ctrl(hba, freq);
+ return ufs_qcom_set_core_clk_ctrl(hba, false, freq);
}
static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, bool scale_up,
@@ -2092,11 +2119,53 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
return 0;
}
+static unsigned long ufs_qcom_opp_freq_to_clk_freq(struct ufs_hba *hba,
+ unsigned long freq, char *name)
+{
+ struct ufs_clk_info *clki;
+ struct dev_pm_opp *opp;
+ unsigned long clk_freq;
+ int idx = 0;
+ bool found = false;
+
+ opp = dev_pm_opp_find_freq_exact_indexed(hba->dev, freq, 0, true);
+ if (IS_ERR(opp)) {
+ dev_err(hba->dev, "Failed to find OPP for exact frequency %lu\n", freq);
+ return 0;
+ }
+
+ list_for_each_entry(clki, &hba->clk_list_head, list) {
+ if (!strcmp(clki->name, name)) {
+ found = true;
+ break;
+ }
+
+ idx++;
+ }
+
+ if (!found) {
+ dev_err(hba->dev, "Failed to find clock '%s' in clk list\n", name);
+ dev_pm_opp_put(opp);
+ return 0;
+ }
+
+ clk_freq = dev_pm_opp_get_freq_indexed(opp, idx);
+
+ dev_pm_opp_put(opp);
+
+ return clk_freq;
+}
+
static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq)
{
- u32 gear = 0;
+ u32 gear = UFS_HS_DONT_CHANGE;
+ unsigned long unipro_freq;
- switch (freq) {
+ if (!hba->use_pm_opp)
+ return gear;
+
+ unipro_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk_unipro");
+ switch (unipro_freq) {
case 403000000:
gear = UFS_HS_G5;
break;
@@ -2116,10 +2185,10 @@ static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq)
break;
default:
dev_err(hba->dev, "%s: Unsupported clock freq : %lu\n", __func__, freq);
- break;
+ return UFS_HS_DONT_CHANGE;
}
- return gear;
+ return min_t(u32, gear, hba->max_pwr_info.info.gear_rx);
}
/*
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b95c4cb21c13..60a621b00c65 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -409,6 +409,15 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct page **pages;
size_t page_off;
+ /*
+ * FIXME: io_iter.count needs to be corrected to aligned
+ * length. Otherwise, iov_iter_get_pages_alloc2() operates
+ * with the initial unaligned length value. As a result,
+ * ceph_msg_data_cursor_init() triggers BUG_ON() in the case
+ * if msg->sparse_read_total > msg->data_length.
+ */
+ subreq->io_iter.count = len;
+
err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off);
if (err < 0) {
doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 150076ced937..b2f2af104679 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -33,12 +33,19 @@ struct ceph_nfs_snapfh {
u32 hash;
} __attribute__ ((packed));
+#define BYTES_PER_U32 (sizeof(u32))
+#define CEPH_FH_BASIC_SIZE \
+ (sizeof(struct ceph_nfs_fh) / BYTES_PER_U32)
+#define CEPH_FH_WITH_PARENT_SIZE \
+ (sizeof(struct ceph_nfs_confh) / BYTES_PER_U32)
+#define CEPH_FH_SNAPPED_INODE_SIZE \
+ (sizeof(struct ceph_nfs_snapfh) / BYTES_PER_U32)
+
static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
struct ceph_client *cl = ceph_inode_to_client(inode);
- static const int snap_handle_length =
- sizeof(struct ceph_nfs_snapfh) >> 2;
+ static const int snap_handle_length = CEPH_FH_SNAPPED_INODE_SIZE;
struct ceph_nfs_snapfh *sfh = (void *)rawfh;
u64 snapid = ceph_snap(inode);
int ret;
@@ -88,10 +95,8 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
struct ceph_client *cl = ceph_inode_to_client(inode);
- static const int handle_length =
- sizeof(struct ceph_nfs_fh) >> 2;
- static const int connected_handle_length =
- sizeof(struct ceph_nfs_confh) >> 2;
+ static const int handle_length = CEPH_FH_BASIC_SIZE;
+ static const int connected_handle_length = CEPH_FH_WITH_PARENT_SIZE;
int type;
if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -308,7 +313,7 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
if (fh_type != FILEID_INO32_GEN &&
fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
- if (fh_len < sizeof(*fh) / 4)
+ if (fh_len < sizeof(*fh) / BYTES_PER_U32)
return NULL;
doutc(fsc->client, "%llx\n", fh->ino);
@@ -427,7 +432,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
if (fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
- if (fh_len < sizeof(*cfh) / 4)
+ if (fh_len < sizeof(*cfh) / BYTES_PER_U32)
return NULL;
doutc(fsc->client, "%llx\n", cfh->parent_ino);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 851d70200c6b..a7254cab44cc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2616,7 +2616,7 @@ static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
s32 stripe_unit = ci->i_layout.stripe_unit;
s32 stripe_count = ci->i_layout.stripe_count;
s32 object_size = ci->i_layout.object_size;
- u64 object_set_size = object_size * stripe_count;
+ u64 object_set_size = (u64) object_size * stripe_count;
u64 nearly, t;
/* round offset up to next period boundary */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f3951253e393..2b8438d8a324 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1033,8 +1033,7 @@ void ceph_umount_begin(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
doutc(fsc->client, "starting forced umount\n");
- if (!fsc)
- return;
+
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
__ceph_umount_begin(fsc);
}
@@ -1227,6 +1226,7 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc)
s->s_time_min = 0;
s->s_time_max = U32_MAX;
s->s_flags |= SB_NODIRATIME | SB_NOATIME;
+ s->s_magic = CEPH_SUPER_MAGIC;
ceph_fscrypt_set_ops(s);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 969b458100fe..dfea7bd800cb 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -48,8 +48,8 @@ static struct file *ovl_open_realfile(const struct file *file,
if (!inode_owner_or_capable(real_idmap, realinode))
flags &= ~O_NOATIME;
- realfile = backing_file_open(&file->f_path, flags, realpath,
- current_cred());
+ realfile = backing_file_open(file_user_path((struct file *) file),
+ flags, realpath, current_cred());
}
ovl_revert_creds(old_cred);
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index bf722daf19a9..0b8b28392eb7 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -16,6 +16,7 @@
struct ovl_lookup_data {
struct super_block *sb;
+ struct dentry *dentry;
const struct ovl_layer *layer;
struct qstr name;
bool is_dir;
@@ -24,6 +25,7 @@ struct ovl_lookup_data {
bool stop;
bool last;
char *redirect;
+ char *upperredirect;
int metacopy;
/* Referring to last redirect xattr */
bool absolute_redirect;
@@ -1024,6 +1026,31 @@ int ovl_verify_lowerdata(struct dentry *dentry)
return ovl_maybe_validate_verity(dentry);
}
+/*
+ * Following redirects/metacopy can have security consequences: it's like a
+ * symlink into the lower layer without the permission checks.
+ *
+ * This is only a problem if the upper layer is untrusted (e.g comes from an USB
+ * drive). This can allow a non-readable file or directory to become readable.
+ *
+ * Only following redirects when redirects are enabled disables this attack
+ * vector when not necessary.
+ */
+static bool ovl_check_follow_redirect(struct ovl_lookup_data *d)
+{
+ struct ovl_fs *ofs = OVL_FS(d->sb);
+
+ if (d->metacopy && !ofs->config.metacopy) {
+ pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", d->dentry);
+ return false;
+ }
+ if ((d->redirect || d->upperredirect) && !ovl_redirect_follow(ofs)) {
+ pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", d->dentry);
+ return false;
+ }
+ return true;
+}
+
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -1039,7 +1066,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
- char *upperredirect = NULL;
+ bool check_redirect = (ovl_redirect_follow(ofs) || ofs->numdatalayer);
struct dentry *this;
unsigned int i;
int err;
@@ -1047,12 +1074,14 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
int metacopy_size = 0;
struct ovl_lookup_data d = {
.sb = dentry->d_sb,
+ .dentry = dentry,
.name = dentry->d_name,
.is_dir = false,
.opaque = false,
.stop = false,
- .last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(poe),
+ .last = check_redirect ? false : !ovl_numlower(poe),
.redirect = NULL,
+ .upperredirect = NULL,
.metacopy = 0,
};
@@ -1094,8 +1123,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (d.redirect) {
err = -ENOMEM;
- upperredirect = kstrdup(d.redirect, GFP_KERNEL);
- if (!upperredirect)
+ d.upperredirect = kstrdup(d.redirect, GFP_KERNEL);
+ if (!d.upperredirect)
goto out_put_upper;
if (d.redirect[0] == '/')
poe = roe;
@@ -1113,7 +1142,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
for (i = 0; !d.stop && i < ovl_numlower(poe); i++) {
struct ovl_path lower = ovl_lowerstack(poe)[i];
- if (!ovl_redirect_follow(ofs))
+ if (!ovl_check_follow_redirect(&d)) {
+ err = -EPERM;
+ goto out_put;
+ }
+
+ if (!check_redirect)
d.last = i == ovl_numlower(poe) - 1;
else if (d.is_dir || !ofs->numdatalayer)
d.last = lower.layer->idx == ovl_numlower(roe);
@@ -1126,13 +1160,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!this)
continue;
- if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
- dput(this);
- err = -EPERM;
- pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
- goto out_put;
- }
-
/*
* If no origin fh is stored in upper of a merge dir, store fh
* of lower dir and set upper parent "impure".
@@ -1185,23 +1212,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
ctr++;
}
- /*
- * Following redirects can have security consequences: it's like
- * a symlink into the lower layer without the permission checks.
- * This is only a problem if the upper layer is untrusted (e.g
- * comes from an USB drive). This can allow a non-readable file
- * or directory to become readable.
- *
- * Only following redirects when redirects are enabled disables
- * this attack vector when not necessary.
- */
- err = -EPERM;
- if (d.redirect && !ovl_redirect_follow(ofs)) {
- pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
- dentry);
- goto out_put;
- }
-
if (d.stop)
break;
@@ -1212,10 +1222,16 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
- /* Defer lookup of lowerdata in data-only layers to first access */
+ /*
+ * Defer lookup of lowerdata in data-only layers to first access.
+ * Don't require redirect=follow and metacopy=on in this case.
+ */
if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) {
d.metacopy = 0;
ctr++;
+ } else if (!ovl_check_follow_redirect(&d)) {
+ err = -EPERM;
+ goto out_put;
}
/*
@@ -1298,20 +1314,26 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
/*
* It's safe to assign upperredirect here: the previous
- * assignment of happens only if upperdentry is non-NULL, and
+ * assignment happens only if upperdentry is non-NULL, and
* this one only if upperdentry is NULL.
*/
- upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
- if (IS_ERR(upperredirect)) {
- err = PTR_ERR(upperredirect);
- upperredirect = NULL;
+ d.upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
+ if (IS_ERR(d.upperredirect)) {
+ err = PTR_ERR(d.upperredirect);
+ d.upperredirect = NULL;
goto out_free_oe;
}
+
err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL);
if (err < 0)
goto out_free_oe;
- uppermetacopy = err;
+ d.metacopy = uppermetacopy = err;
metacopy_size = err;
+
+ if (!ovl_check_follow_redirect(&d)) {
+ err = -EPERM;
+ goto out_free_oe;
+ }
}
if (upperdentry || ctr) {
@@ -1319,7 +1341,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
.upperdentry = upperdentry,
.oe = oe,
.index = index,
- .redirect = upperredirect,
+ .redirect = d.upperredirect,
};
/* Store lowerdata redirect for lazy lookup */
@@ -1361,7 +1383,7 @@ out_put_upper:
kfree(origin_path);
}
dput(upperdentry);
- kfree(upperredirect);
+ kfree(d.upperredirect);
out:
kfree(d.redirect);
ovl_revert_creds(old_cred);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index cb449ab310a7..afb7762f873f 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -51,7 +51,7 @@ struct ovl_path {
struct ovl_entry {
unsigned int __numlower;
- struct ovl_path __lowerstack[];
+ struct ovl_path __lowerstack[] __counted_by(__numlower);
};
/* private information held for overlayfs's superblock */
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 6759f7d040c8..f42488c01957 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -871,18 +871,6 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
config->uuid = OVL_UUID_NULL;
}
- /* Resolve verity -> metacopy dependency */
- if (config->verity_mode && !config->metacopy) {
- /* Don't allow explicit specified conflicting combinations */
- if (set.metacopy) {
- pr_err("conflicting options: metacopy=off,verity=%s\n",
- ovl_verity_mode(config));
- return -EINVAL;
- }
- /* Otherwise automatically enable metacopy. */
- config->metacopy = true;
- }
-
/*
* This is to make the logic below simpler. It doesn't make any other
* difference, since redirect_dir=on is only used for upper.
@@ -890,18 +878,13 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
if (!config->upperdir && config->redirect_mode == OVL_REDIRECT_FOLLOW)
config->redirect_mode = OVL_REDIRECT_ON;
- /* Resolve verity -> metacopy -> redirect_dir dependency */
+ /* metacopy -> redirect_dir dependency */
if (config->metacopy && config->redirect_mode != OVL_REDIRECT_ON) {
if (set.metacopy && set.redirect) {
pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
ovl_redirect_mode(config));
return -EINVAL;
}
- if (config->verity_mode && set.redirect) {
- pr_err("conflicting options: verity=%s,redirect_dir=%s\n",
- ovl_verity_mode(config), ovl_redirect_mode(config));
- return -EINVAL;
- }
if (set.redirect) {
/*
* There was an explicit redirect_dir=... that resulted
@@ -970,7 +953,7 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
}
- /* Resolve userxattr -> !redirect && !metacopy && !verity dependency */
+ /* Resolve userxattr -> !redirect && !metacopy dependency */
if (config->userxattr) {
if (set.redirect &&
config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
@@ -982,11 +965,6 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
pr_err("conflicting options: userxattr,metacopy=on\n");
return -EINVAL;
}
- if (config->verity_mode) {
- pr_err("conflicting options: userxattr,verity=%s\n",
- ovl_verity_mode(config));
- return -EINVAL;
- }
/*
* Silently disable default setting of redirect and metacopy.
* This shall be the default in the future as well: these
@@ -1025,11 +1003,6 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
*/
}
- if (ctx->nr_data > 0 && !config->metacopy) {
- pr_err("lower data-only dirs require metacopy support.\n");
- return -EINVAL;
- }
-
return 0;
}
@@ -1078,17 +1051,16 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",redirect_dir=%s",
ovl_redirect_mode(&ofs->config));
if (ofs->config.index != ovl_index_def)
- seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+ seq_printf(m, ",index=%s", str_on_off(ofs->config.index));
if (ofs->config.uuid != ovl_uuid_def())
seq_printf(m, ",uuid=%s", ovl_uuid_mode(&ofs->config));
if (ofs->config.nfs_export != ovl_nfs_export_def)
- seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
- "on" : "off");
+ seq_printf(m, ",nfs_export=%s",
+ str_on_off(ofs->config.nfs_export));
if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(ofs))
seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
if (ofs->config.metacopy != ovl_metacopy_def)
- seq_printf(m, ",metacopy=%s",
- ofs->config.metacopy ? "on" : "off");
+ seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy));
if (ofs->config.ovl_volatile)
seq_puts(m, ",volatile");
if (ofs->config.userxattr)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 44e208da417c..474c80d210d1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -13,6 +13,7 @@
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/ratelimit.h>
+#include <linux/overflow.h>
#include "overlayfs.h"
struct ovl_cache_entry {
@@ -147,9 +148,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
- size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
- p = kmalloc(size, GFP_KERNEL);
+ p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL);
if (!p)
return NULL;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 0819c739cc2f..dcccb4b4a66c 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -15,6 +15,7 @@
#include <linux/uuid.h>
#include <linux/namei.h>
#include <linux/ratelimit.h>
+#include <linux/overflow.h>
#include "overlayfs.h"
/* Get write access to upper mnt - may fail if upper sb was remounted ro */
@@ -145,9 +146,9 @@ void ovl_stack_free(struct ovl_path *stack, unsigned int n)
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
- size_t size = offsetof(struct ovl_entry, __lowerstack[numlower]);
- struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
+ struct ovl_entry *oe;
+ oe = kzalloc(struct_size(oe, __lowerstack, numlower), GFP_KERNEL);
if (oe)
oe->__numlower = numlower;
@@ -305,7 +306,9 @@ enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
- return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
+ struct inode *inode = d_inode(dentry);
+
+ return inode ? ovl_upperdentry_dereference(OVL_I(inode)) : NULL;
}
struct dentry *ovl_dentry_lower(struct dentry *dentry)
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index 0ee4c21c6dbc..5f2b9a1f722c 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -36,8 +36,8 @@ union codetag_ref {
struct codetag_type_desc {
const char *section;
size_t tag_size;
- void (*module_load)(struct module *mod,
- struct codetag *start, struct codetag *end);
+ int (*module_load)(struct module *mod,
+ struct codetag *start, struct codetag *end);
void (*module_unload)(struct module *mod,
struct codetag *start, struct codetag *end);
#ifdef CONFIG_MODULES
@@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struct module *mod, const char *name,
unsigned long align);
void codetag_free_module_sections(struct module *mod);
void codetag_module_replaced(struct module *mod, struct module *new_mod);
-void codetag_load_module(struct module *mod);
+int codetag_load_module(struct module *mod);
void codetag_unload_module(struct module *mod);
#else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */
@@ -103,7 +103,7 @@ codetag_alloc_module_section(struct module *mod, const char *name,
unsigned long align) { return NULL; }
static inline void codetag_free_module_sections(struct module *mod) {}
static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {}
-static inline void codetag_load_module(struct module *mod) {}
+static inline int codetag_load_module(struct module *mod) { return 0; }
static inline void codetag_unload_module(struct module *mod) {}
#endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a4d3816d252a..b672ca15f265 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -635,6 +635,8 @@ enum {
#define ftrace_get_symaddr(fentry_ip) (0)
#endif
+void ftrace_sync_ipi(void *data);
+
#ifdef CONFIG_DYNAMIC_FTRACE
void ftrace_arch_code_modify_prepare(void);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0598f36931de..42f374e828a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 98030accf641..72ff44cca864 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -108,6 +108,10 @@ extern const struct raid6_calls raid6_vpermxor4;
extern const struct raid6_calls raid6_vpermxor8;
extern const struct raid6_calls raid6_lsx;
extern const struct raid6_calls raid6_lasx;
+extern const struct raid6_calls raid6_rvvx1;
+extern const struct raid6_calls raid6_rvvx2;
+extern const struct raid6_calls raid6_rvvx4;
+extern const struct raid6_calls raid6_rvvx8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
@@ -125,6 +129,7 @@ extern const struct raid6_recov_calls raid6_recov_s390xc;
extern const struct raid6_recov_calls raid6_recov_neon;
extern const struct raid6_recov_calls raid6_recov_lsx;
extern const struct raid6_recov_calls raid6_recov_lasx;
+extern const struct raid6_recov_calls raid6_recov_rvv;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 3d64e69cc03e..08b59c37735e 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3386,11 +3386,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto sysfs_cleanup;
}
+ if (codetag_load_module(mod))
+ goto sysfs_cleanup;
+
/* Get rid of temporary copy. */
free_copy(info, flags);
- codetag_load_module(mod);
-
/* Done! */
trace_module_load(mod);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 56b21219442b..486c00536207 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -20,6 +20,28 @@
int sysctl_panic_on_rcu_stall __read_mostly;
int sysctl_max_rcu_stall_to_panic __read_mostly;
+#ifdef CONFIG_SYSFS
+
+static unsigned int rcu_stall_count;
+
+static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", rcu_stall_count);
+}
+
+static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count);
+
+static __init int kernel_rcu_stall_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_rcu_stall_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
@@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
if (kvm_check_and_clear_guest_paused())
return;
+#ifdef CONFIG_SYSFS
+ ++rcu_stall_count;
+#endif
+
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a7291685902e..4203fad56b6c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -188,7 +188,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
op->saved_func(ip, parent_ip, op, fregs);
}
-static void ftrace_sync_ipi(void *data)
+void ftrace_sync_ipi(void *data)
{
/* Probably not needed, but do it anyway */
smp_rmb();
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 45dae7da70e1..d48b80f3f007 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -607,15 +607,16 @@ out:
mas_unlock(&mas);
}
-static void load_module(struct module *mod, struct codetag *start, struct codetag *stop)
+static int load_module(struct module *mod, struct codetag *start, struct codetag *stop)
{
/* Allocate module alloc_tag percpu counters */
struct alloc_tag *start_tag;
struct alloc_tag *stop_tag;
struct alloc_tag *tag;
+ /* percpu counters for core allocations are already statically allocated */
if (!mod)
- return;
+ return 0;
start_tag = ct_to_alloc_tag(start);
stop_tag = ct_to_alloc_tag(stop);
@@ -627,12 +628,13 @@ static void load_module(struct module *mod, struct codetag *start, struct codeta
free_percpu(tag->counters);
tag->counters = NULL;
}
- shutdown_mem_profiling(true);
- pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n",
+ pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n",
mod->name);
- break;
+ return -ENOMEM;
}
}
+
+ return 0;
}
static void replace_module(struct module *mod, struct module *new_mod)
diff --git a/lib/codetag.c b/lib/codetag.c
index de332e98d6f5..650d54d7e14d 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -167,6 +167,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
{
struct codetag_range range;
struct codetag_module *cmod;
+ int mod_id;
int err;
range = get_section_range(mod, cttype->desc.section);
@@ -190,11 +191,20 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
cmod->range = range;
down_write(&cttype->mod_lock);
- err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL);
- if (err >= 0) {
- cttype->count += range_size(cttype, &range);
- if (cttype->desc.module_load)
- cttype->desc.module_load(mod, range.start, range.stop);
+ mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL);
+ if (mod_id >= 0) {
+ if (cttype->desc.module_load) {
+ err = cttype->desc.module_load(mod, range.start, range.stop);
+ if (!err)
+ cttype->count += range_size(cttype, &range);
+ else
+ idr_remove(&cttype->mod_idr, mod_id);
+ } else {
+ cttype->count += range_size(cttype, &range);
+ err = 0;
+ }
+ } else {
+ err = mod_id;
}
up_write(&cttype->mod_lock);
@@ -295,17 +305,23 @@ void codetag_module_replaced(struct module *mod, struct module *new_mod)
mutex_unlock(&codetag_lock);
}
-void codetag_load_module(struct module *mod)
+int codetag_load_module(struct module *mod)
{
struct codetag_type *cttype;
+ int ret = 0;
if (!mod)
- return;
+ return 0;
mutex_lock(&codetag_lock);
- list_for_each_entry(cttype, &codetag_types, link)
- codetag_module_init(cttype, mod);
+ list_for_each_entry(cttype, &codetag_types, link) {
+ ret = codetag_module_init(cttype, mod);
+ if (ret)
+ break;
+ }
mutex_unlock(&codetag_lock);
+
+ return ret;
}
void codetag_unload_module(struct module *mod)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 969d4ad510df..f9193f952f49 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -817,7 +817,7 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
size_t size = i->count;
do {
- size_t len = bvec->bv_len;
+ size_t len = bvec->bv_len - skip;
if (len > size)
len = size;
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 29127dd05d63..5be0a4e60ab1 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -10,6 +10,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
+raid6_pq-$(CONFIG_RISCV_ISA_V) += rvv.o recov_rvv.o
hostprogs += mktables
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dfd3f800ac9b..75ce3e134b7c 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -77,6 +77,12 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_lsx,
#endif
#endif
+#ifdef CONFIG_RISCV_ISA_V
+ &raid6_rvvx1,
+ &raid6_rvvx2,
+ &raid6_rvvx4,
+ &raid6_rvvx8,
+#endif
&raid6_intx8,
&raid6_intx4,
&raid6_intx2,
@@ -110,6 +116,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
&raid6_recov_lsx,
#endif
#endif
+#ifdef CONFIG_RISCV_ISA_V
+ &raid6_recov_rvv,
+#endif
&raid6_recov_intx1,
NULL
};
diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c
new file mode 100644
index 000000000000..f29303795ccf
--- /dev/null
+++ b/lib/raid6/recov_rvv.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Institute of Software, CAS.
+ * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/simd.h>
+#include <linux/raid/pq.h>
+
+static int rvv_has_vector(void)
+{
+ return has_vector();
+}
+
+static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp,
+ u8 *dq, const u8 *pbmul,
+ const u8 *qmul)
+{
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli x0, %[avl], e8, m1, ta, ma\n"
+ ".option pop\n"
+ : :
+ [avl]"r"(16)
+ );
+
+ /*
+ * while ( bytes-- ) {
+ * uint8_t px, qx, db;
+ *
+ * px = *p ^ *dp;
+ * qx = qmul[*q ^ *dq];
+ * *dq++ = db = pbmul[px] ^ qx;
+ * *dp++ = db ^ px;
+ * p++; q++;
+ * }
+ */
+ while (bytes) {
+ /*
+ * v0:px, v1:dp,
+ * v2:qx, v3:dq,
+ * v4:vx, v5:vy,
+ * v6:qm0, v7:qm1,
+ * v8:pm0, v9:pm1,
+ * v14:p/qm[vx], v15:p/qm[vy]
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[px])\n"
+ "vle8.v v1, (%[dp])\n"
+ "vxor.vv v0, v0, v1\n"
+ "vle8.v v2, (%[qx])\n"
+ "vle8.v v3, (%[dq])\n"
+ "vxor.vv v4, v2, v3\n"
+ "vsrl.vi v5, v4, 4\n"
+ "vand.vi v4, v4, 0xf\n"
+ "vle8.v v6, (%[qm0])\n"
+ "vle8.v v7, (%[qm1])\n"
+ "vrgather.vv v14, v6, v4\n" /* v14 = qm[vx] */
+ "vrgather.vv v15, v7, v5\n" /* v15 = qm[vy] */
+ "vxor.vv v2, v14, v15\n" /* v2 = qmul[*q ^ *dq] */
+
+ "vsrl.vi v5, v0, 4\n"
+ "vand.vi v4, v0, 0xf\n"
+ "vle8.v v8, (%[pm0])\n"
+ "vle8.v v9, (%[pm1])\n"
+ "vrgather.vv v14, v8, v4\n" /* v14 = pm[vx] */
+ "vrgather.vv v15, v9, v5\n" /* v15 = pm[vy] */
+ "vxor.vv v4, v14, v15\n" /* v4 = pbmul[px] */
+ "vxor.vv v3, v4, v2\n" /* v3 = db = pbmul[px] ^ qx */
+ "vxor.vv v1, v3, v0\n" /* v1 = db ^ px; */
+ "vse8.v v3, (%[dq])\n"
+ "vse8.v v1, (%[dp])\n"
+ ".option pop\n"
+ : :
+ [px]"r"(p),
+ [dp]"r"(dp),
+ [qx]"r"(q),
+ [dq]"r"(dq),
+ [qm0]"r"(qmul),
+ [qm1]"r"(qmul + 16),
+ [pm0]"r"(pbmul),
+ [pm1]"r"(pbmul + 16)
+ :);
+
+ bytes -= 16;
+ p += 16;
+ q += 16;
+ dp += 16;
+ dq += 16;
+ }
+}
+
+static void __raid6_datap_recov_rvv(int bytes, u8 *p, u8 *q,
+ u8 *dq, const u8 *qmul)
+{
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli x0, %[avl], e8, m1, ta, ma\n"
+ ".option pop\n"
+ : :
+ [avl]"r"(16)
+ );
+
+ /*
+ * while (bytes--) {
+ * *p++ ^= *dq = qmul[*q ^ *dq];
+ * q++; dq++;
+ * }
+ */
+ while (bytes) {
+ /*
+ * v0:vx, v1:vy,
+ * v2:dq, v3:p,
+ * v4:qm0, v5:qm1,
+ * v10:m[vx], v11:m[vy]
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[vx])\n"
+ "vle8.v v2, (%[dq])\n"
+ "vxor.vv v0, v0, v2\n"
+ "vsrl.vi v1, v0, 4\n"
+ "vand.vi v0, v0, 0xf\n"
+ "vle8.v v4, (%[qm0])\n"
+ "vle8.v v5, (%[qm1])\n"
+ "vrgather.vv v10, v4, v0\n"
+ "vrgather.vv v11, v5, v1\n"
+ "vxor.vv v0, v10, v11\n"
+ "vle8.v v1, (%[vy])\n"
+ "vxor.vv v1, v0, v1\n"
+ "vse8.v v0, (%[dq])\n"
+ "vse8.v v1, (%[vy])\n"
+ ".option pop\n"
+ : :
+ [vx]"r"(q),
+ [vy]"r"(p),
+ [dq]"r"(dq),
+ [qm0]"r"(qmul),
+ [qm1]"r"(qmul + 16)
+ :);
+
+ bytes -= 16;
+ p += 16;
+ q += 16;
+ dq += 16;
+ }
+}
+
+static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks - 2] = p;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+ raid6_gfexp[failb]]];
+
+ kernel_vector_begin();
+ __raid6_2data_recov_rvv(bytes, p, q, dp, dq, pbmul, qmul);
+ kernel_vector_end();
+}
+
+static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_vector_begin();
+ __raid6_datap_recov_rvv(bytes, p, q, dq, qmul);
+ kernel_vector_end();
+}
+
+const struct raid6_recov_calls raid6_recov_rvv = {
+ .data2 = raid6_2data_recov_rvv,
+ .datap = raid6_datap_recov_rvv,
+ .valid = rvv_has_vector,
+ .name = "rvv",
+ .priority = 1,
+};
diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c
new file mode 100644
index 000000000000..f0887344b274
--- /dev/null
+++ b/lib/raid6/rvv.c
@@ -0,0 +1,1212 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID-6 syndrome calculation using RISC-V vector instructions
+ *
+ * Copyright 2024 Institute of Software, CAS.
+ * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+ *
+ * Based on neon.uc:
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/simd.h>
+#include <linux/raid/pq.h>
+#include <linux/types.h>
+#include "rvv.h"
+
+#define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */
+
+static int rvv_has_vector(void)
+{
+ return has_vector();
+}
+
+static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ unsigned long d;
+ int z, z0;
+ u8 *p, *q;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0 + 1]; /* XOR parity */
+ q = dptr[z0 + 2]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
+ for (d = 0; d < bytes; d += NSIZE * 1) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
+ );
+
+ for (z = z0 - 1 ; z >= 0 ; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vse8.v v0, (%[wp0])\n"
+ "vse8.v v1, (%[wq0])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0])
+ );
+ }
+}
+
+static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
+ unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ unsigned long d;
+ int z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks - 2]; /* XOR parity */
+ q = dptr[disks - 1]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
+ for (d = 0 ; d < bytes ; d += NSIZE * 1) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
+ );
+
+ /* P/Q data pages */
+ for (z = z0 - 1; z >= start; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /* P/Q left side optimization */
+ for (z = start - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * wq$$ = w1$$ ^ w2$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v1, v3, v2\n"
+ ".option pop\n"
+ : :
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ * v0:wp0, v1:wq0, v2:p0, v3:q0
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v2, (%[wp0])\n"
+ "vle8.v v3, (%[wq0])\n"
+ "vxor.vv v2, v2, v0\n"
+ "vxor.vv v3, v3, v1\n"
+ "vse8.v v2, (%[wp0])\n"
+ "vse8.v v3, (%[wq0])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0])
+ );
+ }
+}
+
+static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ unsigned long d;
+ int z, z0;
+ u8 *p, *q;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0 + 1]; /* XOR parity */
+ q = dptr[z0 + 2]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /*
+ * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
+ * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
+ */
+ for (d = 0; d < bytes; d += NSIZE * 2) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ "vle8.v v4, (%[wp1])\n"
+ "vle8.v v5, (%[wp1])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
+ [wp1]"r"(&dptr[z0][d + 1 * NSIZE])
+ );
+
+ for (z = z0 - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v7, v7, v6\n"
+ "vle8.v v6, (%[wd1])\n"
+ "vxor.vv v5, v7, v6\n"
+ "vxor.vv v4, v4, v6\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vse8.v v0, (%[wp0])\n"
+ "vse8.v v1, (%[wq0])\n"
+ "vse8.v v4, (%[wp1])\n"
+ "vse8.v v5, (%[wq1])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0]),
+ [wp1]"r"(&p[d + NSIZE * 1]),
+ [wq1]"r"(&q[d + NSIZE * 1])
+ );
+ }
+}
+
+static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
+ unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ unsigned long d;
+ int z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks - 2]; /* XOR parity */
+ q = dptr[disks - 1]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /*
+ * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
+ * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
+ */
+ for (d = 0; d < bytes; d += NSIZE * 2) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ "vle8.v v4, (%[wp1])\n"
+ "vle8.v v5, (%[wp1])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
+ [wp1]"r"(&dptr[z0][d + 1 * NSIZE])
+ );
+
+ /* P/Q data pages */
+ for (z = z0 - 1; z >= start; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v7, v7, v6\n"
+ "vle8.v v6, (%[wd1])\n"
+ "vxor.vv v5, v7, v6\n"
+ "vxor.vv v4, v4, v6\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /* P/Q left side optimization */
+ for (z = start - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * wq$$ = w1$$ ^ w2$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v1, v3, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v5, v7, v6\n"
+ ".option pop\n"
+ : :
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ * v0:wp0, v1:wq0, v2:p0, v3:q0
+ * v4:wp1, v5:wq1, v6:p1, v7:q1
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v2, (%[wp0])\n"
+ "vle8.v v3, (%[wq0])\n"
+ "vxor.vv v2, v2, v0\n"
+ "vxor.vv v3, v3, v1\n"
+ "vse8.v v2, (%[wp0])\n"
+ "vse8.v v3, (%[wq0])\n"
+
+ "vle8.v v6, (%[wp1])\n"
+ "vle8.v v7, (%[wq1])\n"
+ "vxor.vv v6, v6, v4\n"
+ "vxor.vv v7, v7, v5\n"
+ "vse8.v v6, (%[wp1])\n"
+ "vse8.v v7, (%[wq1])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0]),
+ [wp1]"r"(&p[d + NSIZE * 1]),
+ [wq1]"r"(&q[d + NSIZE * 1])
+ );
+ }
+}
+
+static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ unsigned long d;
+ int z, z0;
+ u8 *p, *q;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0 + 1]; /* XOR parity */
+ q = dptr[z0 + 2]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /*
+ * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
+ * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
+ * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
+ * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
+ */
+ for (d = 0; d < bytes; d += NSIZE * 4) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ "vle8.v v4, (%[wp1])\n"
+ "vle8.v v5, (%[wp1])\n"
+ "vle8.v v8, (%[wp2])\n"
+ "vle8.v v9, (%[wp2])\n"
+ "vle8.v v12, (%[wp3])\n"
+ "vle8.v v13, (%[wp3])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
+ [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
+ [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
+ [wp3]"r"(&dptr[z0][d + 3 * NSIZE])
+ );
+
+ for (z = z0 - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v7, v7, v6\n"
+ "vle8.v v6, (%[wd1])\n"
+ "vxor.vv v5, v7, v6\n"
+ "vxor.vv v4, v4, v6\n"
+
+ "vsra.vi v10, v9, 7\n"
+ "vsll.vi v11, v9, 1\n"
+ "vand.vx v10, v10, %[x1d]\n"
+ "vxor.vv v11, v11, v10\n"
+ "vle8.v v10, (%[wd2])\n"
+ "vxor.vv v9, v11, v10\n"
+ "vxor.vv v8, v8, v10\n"
+
+ "vsra.vi v14, v13, 7\n"
+ "vsll.vi v15, v13, 1\n"
+ "vand.vx v14, v14, %[x1d]\n"
+ "vxor.vv v15, v15, v14\n"
+ "vle8.v v14, (%[wd3])\n"
+ "vxor.vv v13, v15, v14\n"
+ "vxor.vv v12, v12, v14\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
+ [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
+ [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vse8.v v0, (%[wp0])\n"
+ "vse8.v v1, (%[wq0])\n"
+ "vse8.v v4, (%[wp1])\n"
+ "vse8.v v5, (%[wq1])\n"
+ "vse8.v v8, (%[wp2])\n"
+ "vse8.v v9, (%[wq2])\n"
+ "vse8.v v12, (%[wp3])\n"
+ "vse8.v v13, (%[wq3])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0]),
+ [wp1]"r"(&p[d + NSIZE * 1]),
+ [wq1]"r"(&q[d + NSIZE * 1]),
+ [wp2]"r"(&p[d + NSIZE * 2]),
+ [wq2]"r"(&q[d + NSIZE * 2]),
+ [wp3]"r"(&p[d + NSIZE * 3]),
+ [wq3]"r"(&q[d + NSIZE * 3])
+ );
+ }
+}
+
+static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
+ unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ unsigned long d;
+ int z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks - 2]; /* XOR parity */
+ q = dptr[disks - 1]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /*
+ * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
+ * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
+ * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
+ * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
+ */
+ for (d = 0; d < bytes; d += NSIZE * 4) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ "vle8.v v4, (%[wp1])\n"
+ "vle8.v v5, (%[wp1])\n"
+ "vle8.v v8, (%[wp2])\n"
+ "vle8.v v9, (%[wp2])\n"
+ "vle8.v v12, (%[wp3])\n"
+ "vle8.v v13, (%[wp3])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
+ [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
+ [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
+ [wp3]"r"(&dptr[z0][d + 3 * NSIZE])
+ );
+
+ /* P/Q data pages */
+ for (z = z0 - 1; z >= start; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v7, v7, v6\n"
+ "vle8.v v6, (%[wd1])\n"
+ "vxor.vv v5, v7, v6\n"
+ "vxor.vv v4, v4, v6\n"
+
+ "vsra.vi v10, v9, 7\n"
+ "vsll.vi v11, v9, 1\n"
+ "vand.vx v10, v10, %[x1d]\n"
+ "vxor.vv v11, v11, v10\n"
+ "vle8.v v10, (%[wd2])\n"
+ "vxor.vv v9, v11, v10\n"
+ "vxor.vv v8, v8, v10\n"
+
+ "vsra.vi v14, v13, 7\n"
+ "vsll.vi v15, v13, 1\n"
+ "vand.vx v14, v14, %[x1d]\n"
+ "vxor.vv v15, v15, v14\n"
+ "vle8.v v14, (%[wd3])\n"
+ "vxor.vv v13, v15, v14\n"
+ "vxor.vv v12, v12, v14\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
+ [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
+ [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /* P/Q left side optimization */
+ for (z = start - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * wq$$ = w1$$ ^ w2$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v1, v3, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v5, v7, v6\n"
+
+ "vsra.vi v10, v9, 7\n"
+ "vsll.vi v11, v9, 1\n"
+ "vand.vx v10, v10, %[x1d]\n"
+ "vxor.vv v9, v11, v10\n"
+
+ "vsra.vi v14, v13, 7\n"
+ "vsll.vi v15, v13, 1\n"
+ "vand.vx v14, v14, %[x1d]\n"
+ "vxor.vv v13, v15, v14\n"
+ ".option pop\n"
+ : :
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ * v0:wp0, v1:wq0, v2:p0, v3:q0
+ * v4:wp1, v5:wq1, v6:p1, v7:q1
+ * v8:wp2, v9:wq2, v10:p2, v11:q2
+ * v12:wp3, v13:wq3, v14:p3, v15:q3
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v2, (%[wp0])\n"
+ "vle8.v v3, (%[wq0])\n"
+ "vxor.vv v2, v2, v0\n"
+ "vxor.vv v3, v3, v1\n"
+ "vse8.v v2, (%[wp0])\n"
+ "vse8.v v3, (%[wq0])\n"
+
+ "vle8.v v6, (%[wp1])\n"
+ "vle8.v v7, (%[wq1])\n"
+ "vxor.vv v6, v6, v4\n"
+ "vxor.vv v7, v7, v5\n"
+ "vse8.v v6, (%[wp1])\n"
+ "vse8.v v7, (%[wq1])\n"
+
+ "vle8.v v10, (%[wp2])\n"
+ "vle8.v v11, (%[wq2])\n"
+ "vxor.vv v10, v10, v8\n"
+ "vxor.vv v11, v11, v9\n"
+ "vse8.v v10, (%[wp2])\n"
+ "vse8.v v11, (%[wq2])\n"
+
+ "vle8.v v14, (%[wp3])\n"
+ "vle8.v v15, (%[wq3])\n"
+ "vxor.vv v14, v14, v12\n"
+ "vxor.vv v15, v15, v13\n"
+ "vse8.v v14, (%[wp3])\n"
+ "vse8.v v15, (%[wq3])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0]),
+ [wp1]"r"(&p[d + NSIZE * 1]),
+ [wq1]"r"(&q[d + NSIZE * 1]),
+ [wp2]"r"(&p[d + NSIZE * 2]),
+ [wq2]"r"(&q[d + NSIZE * 2]),
+ [wp3]"r"(&p[d + NSIZE * 3]),
+ [wq3]"r"(&q[d + NSIZE * 3])
+ );
+ }
+}
+
+static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ unsigned long d;
+ int z, z0;
+ u8 *p, *q;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0 + 1]; /* XOR parity */
+ q = dptr[z0 + 2]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /*
+ * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
+ * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
+ * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
+ * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
+ * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
+ * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
+ * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
+ * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
+ */
+ for (d = 0; d < bytes; d += NSIZE * 8) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ "vle8.v v4, (%[wp1])\n"
+ "vle8.v v5, (%[wp1])\n"
+ "vle8.v v8, (%[wp2])\n"
+ "vle8.v v9, (%[wp2])\n"
+ "vle8.v v12, (%[wp3])\n"
+ "vle8.v v13, (%[wp3])\n"
+ "vle8.v v16, (%[wp4])\n"
+ "vle8.v v17, (%[wp4])\n"
+ "vle8.v v20, (%[wp5])\n"
+ "vle8.v v21, (%[wp5])\n"
+ "vle8.v v24, (%[wp6])\n"
+ "vle8.v v25, (%[wp6])\n"
+ "vle8.v v28, (%[wp7])\n"
+ "vle8.v v29, (%[wp7])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
+ [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
+ [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
+ [wp3]"r"(&dptr[z0][d + 3 * NSIZE]),
+ [wp4]"r"(&dptr[z0][d + 4 * NSIZE]),
+ [wp5]"r"(&dptr[z0][d + 5 * NSIZE]),
+ [wp6]"r"(&dptr[z0][d + 6 * NSIZE]),
+ [wp7]"r"(&dptr[z0][d + 7 * NSIZE])
+ );
+
+ for (z = z0 - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v7, v7, v6\n"
+ "vle8.v v6, (%[wd1])\n"
+ "vxor.vv v5, v7, v6\n"
+ "vxor.vv v4, v4, v6\n"
+
+ "vsra.vi v10, v9, 7\n"
+ "vsll.vi v11, v9, 1\n"
+ "vand.vx v10, v10, %[x1d]\n"
+ "vxor.vv v11, v11, v10\n"
+ "vle8.v v10, (%[wd2])\n"
+ "vxor.vv v9, v11, v10\n"
+ "vxor.vv v8, v8, v10\n"
+
+ "vsra.vi v14, v13, 7\n"
+ "vsll.vi v15, v13, 1\n"
+ "vand.vx v14, v14, %[x1d]\n"
+ "vxor.vv v15, v15, v14\n"
+ "vle8.v v14, (%[wd3])\n"
+ "vxor.vv v13, v15, v14\n"
+ "vxor.vv v12, v12, v14\n"
+
+ "vsra.vi v18, v17, 7\n"
+ "vsll.vi v19, v17, 1\n"
+ "vand.vx v18, v18, %[x1d]\n"
+ "vxor.vv v19, v19, v18\n"
+ "vle8.v v18, (%[wd4])\n"
+ "vxor.vv v17, v19, v18\n"
+ "vxor.vv v16, v16, v18\n"
+
+ "vsra.vi v22, v21, 7\n"
+ "vsll.vi v23, v21, 1\n"
+ "vand.vx v22, v22, %[x1d]\n"
+ "vxor.vv v23, v23, v22\n"
+ "vle8.v v22, (%[wd5])\n"
+ "vxor.vv v21, v23, v22\n"
+ "vxor.vv v20, v20, v22\n"
+
+ "vsra.vi v26, v25, 7\n"
+ "vsll.vi v27, v25, 1\n"
+ "vand.vx v26, v26, %[x1d]\n"
+ "vxor.vv v27, v27, v26\n"
+ "vle8.v v26, (%[wd6])\n"
+ "vxor.vv v25, v27, v26\n"
+ "vxor.vv v24, v24, v26\n"
+
+ "vsra.vi v30, v29, 7\n"
+ "vsll.vi v31, v29, 1\n"
+ "vand.vx v30, v30, %[x1d]\n"
+ "vxor.vv v31, v31, v30\n"
+ "vle8.v v30, (%[wd7])\n"
+ "vxor.vv v29, v31, v30\n"
+ "vxor.vv v28, v28, v30\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
+ [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
+ [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
+ [wd4]"r"(&dptr[z][d + 4 * NSIZE]),
+ [wd5]"r"(&dptr[z][d + 5 * NSIZE]),
+ [wd6]"r"(&dptr[z][d + 6 * NSIZE]),
+ [wd7]"r"(&dptr[z][d + 7 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vse8.v v0, (%[wp0])\n"
+ "vse8.v v1, (%[wq0])\n"
+ "vse8.v v4, (%[wp1])\n"
+ "vse8.v v5, (%[wq1])\n"
+ "vse8.v v8, (%[wp2])\n"
+ "vse8.v v9, (%[wq2])\n"
+ "vse8.v v12, (%[wp3])\n"
+ "vse8.v v13, (%[wq3])\n"
+ "vse8.v v16, (%[wp4])\n"
+ "vse8.v v17, (%[wq4])\n"
+ "vse8.v v20, (%[wp5])\n"
+ "vse8.v v21, (%[wq5])\n"
+ "vse8.v v24, (%[wp6])\n"
+ "vse8.v v25, (%[wq6])\n"
+ "vse8.v v28, (%[wp7])\n"
+ "vse8.v v29, (%[wq7])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0]),
+ [wp1]"r"(&p[d + NSIZE * 1]),
+ [wq1]"r"(&q[d + NSIZE * 1]),
+ [wp2]"r"(&p[d + NSIZE * 2]),
+ [wq2]"r"(&q[d + NSIZE * 2]),
+ [wp3]"r"(&p[d + NSIZE * 3]),
+ [wq3]"r"(&q[d + NSIZE * 3]),
+ [wp4]"r"(&p[d + NSIZE * 4]),
+ [wq4]"r"(&q[d + NSIZE * 4]),
+ [wp5]"r"(&p[d + NSIZE * 5]),
+ [wq5]"r"(&q[d + NSIZE * 5]),
+ [wp6]"r"(&p[d + NSIZE * 6]),
+ [wq6]"r"(&q[d + NSIZE * 6]),
+ [wp7]"r"(&p[d + NSIZE * 7]),
+ [wq7]"r"(&q[d + NSIZE * 7])
+ );
+ }
+}
+
+static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
+ unsigned long bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ unsigned long d;
+ int z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks - 2]; /* XOR parity */
+ q = dptr[disks - 1]; /* RS syndrome */
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetvli t0, x0, e8, m1, ta, ma\n"
+ ".option pop\n"
+ );
+
+ /*
+ * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
+ * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
+ * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
+ * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
+ * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
+ * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
+ * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
+ * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
+ */
+ for (d = 0; d < bytes; d += NSIZE * 8) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v0, (%[wp0])\n"
+ "vle8.v v1, (%[wp0])\n"
+ "vle8.v v4, (%[wp1])\n"
+ "vle8.v v5, (%[wp1])\n"
+ "vle8.v v8, (%[wp2])\n"
+ "vle8.v v9, (%[wp2])\n"
+ "vle8.v v12, (%[wp3])\n"
+ "vle8.v v13, (%[wp3])\n"
+ "vle8.v v16, (%[wp4])\n"
+ "vle8.v v17, (%[wp4])\n"
+ "vle8.v v20, (%[wp5])\n"
+ "vle8.v v21, (%[wp5])\n"
+ "vle8.v v24, (%[wp6])\n"
+ "vle8.v v25, (%[wp6])\n"
+ "vle8.v v28, (%[wp7])\n"
+ "vle8.v v29, (%[wp7])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
+ [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
+ [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
+ [wp3]"r"(&dptr[z0][d + 3 * NSIZE]),
+ [wp4]"r"(&dptr[z0][d + 4 * NSIZE]),
+ [wp5]"r"(&dptr[z0][d + 5 * NSIZE]),
+ [wp6]"r"(&dptr[z0][d + 6 * NSIZE]),
+ [wp7]"r"(&dptr[z0][d + 7 * NSIZE])
+ );
+
+ /* P/Q data pages */
+ for (z = z0 - 1; z >= start; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * w1$$ ^= w2$$;
+ * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ * wq$$ = w1$$ ^ wd$$;
+ * wp$$ ^= wd$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v3, v3, v2\n"
+ "vle8.v v2, (%[wd0])\n"
+ "vxor.vv v1, v3, v2\n"
+ "vxor.vv v0, v0, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v7, v7, v6\n"
+ "vle8.v v6, (%[wd1])\n"
+ "vxor.vv v5, v7, v6\n"
+ "vxor.vv v4, v4, v6\n"
+
+ "vsra.vi v10, v9, 7\n"
+ "vsll.vi v11, v9, 1\n"
+ "vand.vx v10, v10, %[x1d]\n"
+ "vxor.vv v11, v11, v10\n"
+ "vle8.v v10, (%[wd2])\n"
+ "vxor.vv v9, v11, v10\n"
+ "vxor.vv v8, v8, v10\n"
+
+ "vsra.vi v14, v13, 7\n"
+ "vsll.vi v15, v13, 1\n"
+ "vand.vx v14, v14, %[x1d]\n"
+ "vxor.vv v15, v15, v14\n"
+ "vle8.v v14, (%[wd3])\n"
+ "vxor.vv v13, v15, v14\n"
+ "vxor.vv v12, v12, v14\n"
+
+ "vsra.vi v18, v17, 7\n"
+ "vsll.vi v19, v17, 1\n"
+ "vand.vx v18, v18, %[x1d]\n"
+ "vxor.vv v19, v19, v18\n"
+ "vle8.v v18, (%[wd4])\n"
+ "vxor.vv v17, v19, v18\n"
+ "vxor.vv v16, v16, v18\n"
+
+ "vsra.vi v22, v21, 7\n"
+ "vsll.vi v23, v21, 1\n"
+ "vand.vx v22, v22, %[x1d]\n"
+ "vxor.vv v23, v23, v22\n"
+ "vle8.v v22, (%[wd5])\n"
+ "vxor.vv v21, v23, v22\n"
+ "vxor.vv v20, v20, v22\n"
+
+ "vsra.vi v26, v25, 7\n"
+ "vsll.vi v27, v25, 1\n"
+ "vand.vx v26, v26, %[x1d]\n"
+ "vxor.vv v27, v27, v26\n"
+ "vle8.v v26, (%[wd6])\n"
+ "vxor.vv v25, v27, v26\n"
+ "vxor.vv v24, v24, v26\n"
+
+ "vsra.vi v30, v29, 7\n"
+ "vsll.vi v31, v29, 1\n"
+ "vand.vx v30, v30, %[x1d]\n"
+ "vxor.vv v31, v31, v30\n"
+ "vle8.v v30, (%[wd7])\n"
+ "vxor.vv v29, v31, v30\n"
+ "vxor.vv v28, v28, v30\n"
+ ".option pop\n"
+ : :
+ [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
+ [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
+ [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
+ [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
+ [wd4]"r"(&dptr[z][d + 4 * NSIZE]),
+ [wd5]"r"(&dptr[z][d + 5 * NSIZE]),
+ [wd6]"r"(&dptr[z][d + 6 * NSIZE]),
+ [wd7]"r"(&dptr[z][d + 7 * NSIZE]),
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /* P/Q left side optimization */
+ for (z = start - 1; z >= 0; z--) {
+ /*
+ * w2$$ = MASK(wq$$);
+ * w1$$ = SHLBYTE(wq$$);
+ * w2$$ &= NBYTES(0x1d);
+ * wq$$ = w1$$ ^ w2$$;
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsra.vi v2, v1, 7\n"
+ "vsll.vi v3, v1, 1\n"
+ "vand.vx v2, v2, %[x1d]\n"
+ "vxor.vv v1, v3, v2\n"
+
+ "vsra.vi v6, v5, 7\n"
+ "vsll.vi v7, v5, 1\n"
+ "vand.vx v6, v6, %[x1d]\n"
+ "vxor.vv v5, v7, v6\n"
+
+ "vsra.vi v10, v9, 7\n"
+ "vsll.vi v11, v9, 1\n"
+ "vand.vx v10, v10, %[x1d]\n"
+ "vxor.vv v9, v11, v10\n"
+
+ "vsra.vi v14, v13, 7\n"
+ "vsll.vi v15, v13, 1\n"
+ "vand.vx v14, v14, %[x1d]\n"
+ "vxor.vv v13, v15, v14\n"
+
+ "vsra.vi v18, v17, 7\n"
+ "vsll.vi v19, v17, 1\n"
+ "vand.vx v18, v18, %[x1d]\n"
+ "vxor.vv v17, v19, v18\n"
+
+ "vsra.vi v22, v21, 7\n"
+ "vsll.vi v23, v21, 1\n"
+ "vand.vx v22, v22, %[x1d]\n"
+ "vxor.vv v21, v23, v22\n"
+
+ "vsra.vi v26, v25, 7\n"
+ "vsll.vi v27, v25, 1\n"
+ "vand.vx v26, v26, %[x1d]\n"
+ "vxor.vv v25, v27, v26\n"
+
+ "vsra.vi v30, v29, 7\n"
+ "vsll.vi v31, v29, 1\n"
+ "vand.vx v30, v30, %[x1d]\n"
+ "vxor.vv v29, v31, v30\n"
+ ".option pop\n"
+ : :
+ [x1d]"r"(0x1d)
+ );
+ }
+
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ * v0:wp0, v1:wq0, v2:p0, v3:q0
+ * v4:wp1, v5:wq1, v6:p1, v7:q1
+ * v8:wp2, v9:wq2, v10:p2, v11:q2
+ * v12:wp3, v13:wq3, v14:p3, v15:q3
+ * v16:wp4, v17:wq4, v18:p4, v19:q4
+ * v20:wp5, v21:wq5, v22:p5, v23:q5
+ * v24:wp6, v25:wq6, v26:p6, v27:q6
+ * v28:wp7, v29:wq7, v30:p7, v31:q7
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v2, (%[wp0])\n"
+ "vle8.v v3, (%[wq0])\n"
+ "vxor.vv v2, v2, v0\n"
+ "vxor.vv v3, v3, v1\n"
+ "vse8.v v2, (%[wp0])\n"
+ "vse8.v v3, (%[wq0])\n"
+
+ "vle8.v v6, (%[wp1])\n"
+ "vle8.v v7, (%[wq1])\n"
+ "vxor.vv v6, v6, v4\n"
+ "vxor.vv v7, v7, v5\n"
+ "vse8.v v6, (%[wp1])\n"
+ "vse8.v v7, (%[wq1])\n"
+
+ "vle8.v v10, (%[wp2])\n"
+ "vle8.v v11, (%[wq2])\n"
+ "vxor.vv v10, v10, v8\n"
+ "vxor.vv v11, v11, v9\n"
+ "vse8.v v10, (%[wp2])\n"
+ "vse8.v v11, (%[wq2])\n"
+
+ "vle8.v v14, (%[wp3])\n"
+ "vle8.v v15, (%[wq3])\n"
+ "vxor.vv v14, v14, v12\n"
+ "vxor.vv v15, v15, v13\n"
+ "vse8.v v14, (%[wp3])\n"
+ "vse8.v v15, (%[wq3])\n"
+
+ "vle8.v v18, (%[wp4])\n"
+ "vle8.v v19, (%[wq4])\n"
+ "vxor.vv v18, v18, v16\n"
+ "vxor.vv v19, v19, v17\n"
+ "vse8.v v18, (%[wp4])\n"
+ "vse8.v v19, (%[wq4])\n"
+
+ "vle8.v v22, (%[wp5])\n"
+ "vle8.v v23, (%[wq5])\n"
+ "vxor.vv v22, v22, v20\n"
+ "vxor.vv v23, v23, v21\n"
+ "vse8.v v22, (%[wp5])\n"
+ "vse8.v v23, (%[wq5])\n"
+
+ "vle8.v v26, (%[wp6])\n"
+ "vle8.v v27, (%[wq6])\n"
+ "vxor.vv v26, v26, v24\n"
+ "vxor.vv v27, v27, v25\n"
+ "vse8.v v26, (%[wp6])\n"
+ "vse8.v v27, (%[wq6])\n"
+
+ "vle8.v v30, (%[wp7])\n"
+ "vle8.v v31, (%[wq7])\n"
+ "vxor.vv v30, v30, v28\n"
+ "vxor.vv v31, v31, v29\n"
+ "vse8.v v30, (%[wp7])\n"
+ "vse8.v v31, (%[wq7])\n"
+ ".option pop\n"
+ : :
+ [wp0]"r"(&p[d + NSIZE * 0]),
+ [wq0]"r"(&q[d + NSIZE * 0]),
+ [wp1]"r"(&p[d + NSIZE * 1]),
+ [wq1]"r"(&q[d + NSIZE * 1]),
+ [wp2]"r"(&p[d + NSIZE * 2]),
+ [wq2]"r"(&q[d + NSIZE * 2]),
+ [wp3]"r"(&p[d + NSIZE * 3]),
+ [wq3]"r"(&q[d + NSIZE * 3]),
+ [wp4]"r"(&p[d + NSIZE * 4]),
+ [wq4]"r"(&q[d + NSIZE * 4]),
+ [wp5]"r"(&p[d + NSIZE * 5]),
+ [wq5]"r"(&q[d + NSIZE * 5]),
+ [wp6]"r"(&p[d + NSIZE * 6]),
+ [wq6]"r"(&q[d + NSIZE * 6]),
+ [wp7]"r"(&p[d + NSIZE * 7]),
+ [wq7]"r"(&q[d + NSIZE * 7])
+ );
+ }
+}
+
+RAID6_RVV_WRAPPER(1);
+RAID6_RVV_WRAPPER(2);
+RAID6_RVV_WRAPPER(4);
+RAID6_RVV_WRAPPER(8);
diff --git a/lib/raid6/rvv.h b/lib/raid6/rvv.h
new file mode 100644
index 000000000000..94044a1b707b
--- /dev/null
+++ b/lib/raid6/rvv.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2024 Institute of Software, CAS.
+ *
+ * raid6/rvv.h
+ *
+ * Definitions for RISC-V RAID-6 code
+ */
+
+#define RAID6_RVV_WRAPPER(_n) \
+ static void raid6_rvv ## _n ## _gen_syndrome(int disks, \
+ size_t bytes, void **ptrs) \
+ { \
+ void raid6_rvv ## _n ## _gen_syndrome_real(int d, \
+ unsigned long b, void **p); \
+ kernel_vector_begin(); \
+ raid6_rvv ## _n ## _gen_syndrome_real(disks, \
+ (unsigned long)bytes, ptrs); \
+ kernel_vector_end(); \
+ } \
+ static void raid6_rvv ## _n ## _xor_syndrome(int disks, \
+ int start, int stop, \
+ size_t bytes, void **ptrs) \
+ { \
+ void raid6_rvv ## _n ## _xor_syndrome_real(int d, \
+ int s1, int s2, \
+ unsigned long b, void **p); \
+ kernel_vector_begin(); \
+ raid6_rvv ## _n ## _xor_syndrome_real(disks, \
+ start, stop, (unsigned long)bytes, ptrs); \
+ kernel_vector_end(); \
+ } \
+ struct raid6_calls const raid6_rvvx ## _n = { \
+ raid6_rvv ## _n ## _gen_syndrome, \
+ raid6_rvv ## _n ## _xor_syndrome, \
+ rvv_has_vector, \
+ "rvvx" #_n, \
+ 0 \
+ }
diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c
index 7cf96574cde7..86d58f8c4f63 100644
--- a/mm/damon/modules-common.c
+++ b/mm/damon/modules-common.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Common Primitives for DAMON Modules
+ * Common Code for DAMON Modules
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h
index f49cdb417005..f103ad556368 100644
--- a/mm/damon/modules-common.h
+++ b/mm/damon/modules-common.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Common Primitives for DAMON Modules
+ * Common Code for DAMON Modules
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 0db1fc70c84d..b43620fee6bb 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Common Primitives for Data Access Monitoring
+ * Common Code for Data Access Monitoring
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
index 18d837d11bce..cc9f5da9c012 100644
--- a/mm/damon/ops-common.h
+++ b/mm/damon/ops-common.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Common Primitives for Data Access Monitoring
+ * Common Code for Data Access Monitoring
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index e8464f7e0014..4102a8c5f992 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * DAMON Primitives for The Physical Address Space
+ * DAMON Code for The Physical Address Space
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c
index 70edf45c2174..ffaf285e241a 100644
--- a/mm/damon/sysfs-common.c
+++ b/mm/damon/sysfs-common.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Common Primitives for DAMON Sysfs Interface
+ * Common Code for DAMON Sysfs Interface
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
index 70d84bdc9f5f..2099adee11d0 100644
--- a/mm/damon/sysfs-common.h
+++ b/mm/damon/sysfs-common.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Common Primitives for DAMON Sysfs Interface
+ * Common Code for DAMON Sysfs Interface
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index e6d99106a7f9..46554e49a478 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * DAMON Primitives for Virtual Address Spaces
+ * DAMON Code for Virtual Address Spaces
*
* Author: SeongJae Park <sj@kernel.org>
*/
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0b1d53079f9..8746ed2fec13 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio)
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+ return 0;
+}
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
+{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ * This function is called in the middle of a VMA split operation, with
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
+ * walks (except hardware and gup_fast()).
*/
+ vma_assert_write_locked(vma);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
if (addr & ~PUD_MASK) {
- /*
- * hugetlb_vm_op_split is called right before we attempt to
- * split the VMA. We will need to unshare PMDs in the old and
- * new VMAs, so let's unshare before we split.
- */
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
- hugetlb_unshare_pmds(vma, floor, ceil);
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
+ /*
+ * Locking:
+ * Use take_locks=false here.
+ * The file rmap lock is already held.
+ * The hugetlb VMA lock can't be taken when we already
+ * hold the file rmap lock, and we don't need it because
+ * its purpose is to synchronize against concurrent page
+ * table walks, which are not possible thanks to the
+ * locks held by our caller.
+ */
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
+ }
}
-
- return 0;
}
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7615,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
pud_clear(pud);
+ /*
+ * Once our caller drops the rmap lock, some other process might be
+ * using this page table as a normal, non-hugetlb page table.
+ * Wait for pending gup_fast() in other threads to finish before letting
+ * that happen.
+ */
+ tlb_remove_table_sync_one();
ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;
@@ -7885,9 +7906,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
spin_unlock_irq(&hugetlb_lock);
}
+/*
+ * If @take_locks is false, the caller must ensure that no concurrent page table
+ * access can happen (except for gup_fast() and hardware page walks).
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
+ * concurrent page fault handling) and the file rmap lock.
+ */
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
@@ -7911,8 +7939,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
- hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (take_locks) {
+ hugetlb_vma_lock_write(vma);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ }
for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
@@ -7922,8 +7954,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
- hugetlb_vma_unlock_write(vma);
+ if (take_locks) {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
@@ -7938,7 +7972,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
+ /* take_locks = */ true);
}
/*
diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c
index 9733a22c46c1..c6c5b2bbede0 100644
--- a/mm/kmsan/kmsan_test.c
+++ b/mm/kmsan/kmsan_test.c
@@ -732,3 +732,4 @@ kunit_test_suites(&kmsan_test_suite);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Alexander Potapenko <glider@google.com>");
+MODULE_DESCRIPTION("Test cases for KMSAN");
diff --git a/mm/madvise.c b/mm/madvise.c
index 8433ac9b27e0..5f7a66a1617e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1881,7 +1881,9 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
/* Drop and reacquire lock to unwind race. */
madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);
- madvise_lock(mm, behavior);
+ ret = madvise_lock(mm, behavior);
+ if (ret)
+ goto out;
madvise_init_tlb(&madv_behavior, mm);
continue;
}
@@ -1892,6 +1894,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);
+out:
ret = (total_len - iov_iter_count(iter)) ? : ret;
return ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 72fd72e156b1..3b1dfd08338b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -3708,15 +3708,13 @@ static void wi_state_free(void)
lockdep_is_held(&wi_state_lock));
if (!old_wi_state) {
mutex_unlock(&wi_state_lock);
- goto out;
+ return;
}
rcu_assign_pointer(wi_state, NULL);
mutex_unlock(&wi_state_lock);
synchronize_rcu();
kfree(old_wi_state);
-out:
- kfree(&wi_group->wi_kobj);
}
static struct kobj_attribute wi_auto_attr =
diff --git a/mm/mremap.c b/mm/mremap.c
index 83e359754961..60f6b8d0d5f0 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
new_pte++, new_addr += PAGE_SIZE) {
+ VM_WARN_ON_ONCE(!pte_none(*new_pte));
+
if (pte_none(ptep_get(old_pte)))
continue;
diff --git a/mm/vma.c b/mm/vma.c
index 1c6595f282e5..726b2a31ce59 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -169,6 +169,9 @@ static void init_multi_vma_prep(struct vma_prepare *vp,
vp->file = vma->vm_file;
if (vp->file)
vp->mapping = vma->vm_file->f_mapping;
+
+ if (vmg && vmg->skip_vma_uprobe)
+ vp->skip_vma_uprobe = true;
}
/*
@@ -358,10 +361,13 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
if (vp->file) {
i_mmap_unlock_write(vp->mapping);
- uprobe_mmap(vp->vma);
- if (vp->adj_next)
- uprobe_mmap(vp->adj_next);
+ if (!vp->skip_vma_uprobe) {
+ uprobe_mmap(vp->vma);
+
+ if (vp->adj_next)
+ uprobe_mmap(vp->adj_next);
+ }
}
if (vp->remove) {
@@ -539,7 +545,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
+
+ /*
+ * Get rid of huge pages and shared page tables straddling the split
+ * boundary.
+ */
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
@@ -1823,6 +1836,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
faulted_in_anon_vma = false;
}
+ /*
+ * If the VMA we are copying might contain a uprobe PTE, ensure
+ * that we do not establish one upon merge. Otherwise, when mremap()
+ * moves page tables, it will orphan the newly created PTE.
+ */
+ if (vma->vm_file)
+ vmg.skip_vma_uprobe = true;
+
new_vma = find_vma_prev(mm, addr, &vmg.prev);
if (new_vma && new_vma->vm_start < addr + len)
return NULL; /* should never get here */
diff --git a/mm/vma.h b/mm/vma.h
index 9a8af9be29a8..0db066e7a45d 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -19,6 +19,8 @@ struct vma_prepare {
struct vm_area_struct *insert;
struct vm_area_struct *remove;
struct vm_area_struct *remove2;
+
+ bool skip_vma_uprobe :1;
};
struct unlink_vma_file_batch {
@@ -120,6 +122,11 @@ struct vma_merge_struct {
*/
bool give_up_on_oom :1;
+ /*
+ * If set, skip uprobe_mmap upon merged vma.
+ */
+ bool skip_vma_uprobe :1;
+
/* Internal flags set during merge process: */
/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6f740f070b3d..429ae5339bfe 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1201,7 +1201,6 @@ const char * const vmstat_text[] = {
"nr_zone_unevictable",
"nr_zone_write_pending",
"nr_mlock",
- "nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
#endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 01818abd02df..6d2c280a1730 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1668,6 +1668,12 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
continue;
}
+ /* Reject RISCV ELF "mapping symbols" */
+ if (ehdr.e_machine == EM_RISCV) {
+ if (elf_name[0] == '$' && strchr("dx", elf_name[1]))
+ continue;
+ }
+
if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) {
u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr;
u64 *opd = opddata->d_buf + offset;
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index dcdd5bb20f3d..e80deac1436b 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -58,40 +58,12 @@ int debug;
static int ksm_write_sysfs(const char *file_path, unsigned long val)
{
- FILE *f = fopen(file_path, "w");
-
- if (!f) {
- fprintf(stderr, "f %s\n", file_path);
- perror("fopen");
- return 1;
- }
- if (fprintf(f, "%lu", val) < 0) {
- perror("fprintf");
- fclose(f);
- return 1;
- }
- fclose(f);
-
- return 0;
+ return write_sysfs(file_path, val);
}
static int ksm_read_sysfs(const char *file_path, unsigned long *val)
{
- FILE *f = fopen(file_path, "r");
-
- if (!f) {
- fprintf(stderr, "f %s\n", file_path);
- perror("fopen");
- return 1;
- }
- if (fscanf(f, "%lu", val) != 1) {
- perror("fscanf");
- fclose(f);
- return 1;
- }
- fclose(f);
-
- return 0;
+ return read_sysfs(file_path, val);
}
static void ksm_print_sysfs(void)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
index c76646cdf6e6..bbae66fc5038 100644
--- a/tools/testing/selftests/mm/merge.c
+++ b/tools/testing/selftests/mm/merge.c
@@ -2,11 +2,14 @@
#define _GNU_SOURCE
#include "../kselftest_harness.h"
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
+#include <sys/syscall.h>
#include <sys/wait.h>
+#include <linux/perf_event.h>
#include "vm_util.h"
FIXTURE(merge)
@@ -452,4 +455,44 @@ TEST_F(merge, forked_source_vma)
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
}
+TEST_F(merge, handle_uprobe_upon_merged_vma)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ unsigned int page_size = self->page_size;
+ const char *probe_file = "./foo";
+ char *carveout = self->carveout;
+ struct perf_event_attr attr;
+ unsigned long type;
+ void *ptr1, *ptr2;
+ int fd;
+
+ fd = open(probe_file, O_RDWR|O_CREAT, 0600);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(ftruncate(fd, page_size), 0);
+ ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config1 = (__u64)(long)probe_file;
+ attr.config2 = 0x0;
+
+ ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
+
+ ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
+ MAP_PRIVATE | MAP_FIXED, fd, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+
+ ptr2 = mremap(ptr1, page_size, 2 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_NE(mremap(ptr2, page_size, page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
+
+ close(fd);
+ remove(probe_file);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index a41bc1234b37..95b6f043a3cb 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -77,7 +77,7 @@ void show(unsigned long ps)
system(buf);
}
-unsigned long read_sysfs(int warn, char *fmt, ...)
+unsigned long thuge_read_sysfs(int warn, char *fmt, ...)
{
char *line = NULL;
size_t linelen = 0;
@@ -106,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
unsigned long read_free(unsigned long ps)
{
- return read_sysfs(ps != getpagesize(),
+ return thuge_read_sysfs(ps != getpagesize(),
"/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
ps >> 10);
}
@@ -195,7 +195,7 @@ void find_pagesizes(void)
}
globfree(&g);
- if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
+ if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
largest * NUM_PAGES);
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 61d7bf1f8c62..5492e3f784df 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -486,3 +486,41 @@ int close_procmap(struct procmap_fd *procmap)
{
return close(procmap->fd);
}
+
+int write_sysfs(const char *file_path, unsigned long val)
+{
+ FILE *f = fopen(file_path, "w");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fprintf(f, "%lu", val) < 0) {
+ perror("fprintf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+int read_sysfs(const char *file_path, unsigned long *val)
+{
+ FILE *f = fopen(file_path, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fscanf(f, "%lu", val) != 1) {
+ perror("fscanf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index adb5d294a220..b8136d12a0f8 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -88,6 +88,8 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
int query_procmap(struct procmap_fd *procmap);
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
int close_procmap(struct procmap_fd *procmap);
+int write_sysfs(const char *file_path, unsigned long val);
+int read_sysfs(const char *file_path, unsigned long *val);
static inline int open_self_procmap(struct procmap_fd *procmap_out)
{
diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S
index d6e09af7c0a9..a4a82e1c28a9 100644
--- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S
+++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S
@@ -11,6 +11,8 @@
#include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S"
#elif defined(__powerpc__) || defined(__powerpc64__)
#include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S"
+#elif defined(__riscv) && __riscv_xlen == 64
+#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S"
#elif defined(__s390x__)
#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S"
#elif defined(__x86_64__)
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 441feb21aa5a..4505b1c31be1 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -932,6 +932,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
(void)next;
}
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
static inline void vma_iter_free(struct vma_iterator *vmi)
{
mas_destroy(&vmi->mas);