summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst20
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-v3.rst3
-rw-r--r--arch/arm64/include/asm/el2_setup.h38
-rw-r--r--arch/arm64/include/asm/kvm_host.h50
-rw-r--r--arch/arm64/include/asm/sysreg.h11
-rw-r--r--arch/arm64/kernel/entry-common.c8
-rw-r--r--arch/arm64/kvm/arch_timer.c105
-rw-r--r--arch/arm64/kvm/arm.c7
-rw-r--r--arch/arm64/kvm/at.c7
-rw-r--r--arch/arm64/kvm/config.c90
-rw-r--r--arch/arm64/kvm/debug.c15
-rw-r--r--arch/arm64/kvm/guest.c70
-rw-r--r--arch/arm64/kvm/handle_exit.c7
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h148
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c1
-rw-r--r--arch/arm64/kvm/nested.c9
-rw-r--r--arch/arm64/kvm/sys_regs.c131
-rw-r--r--arch/arm64/kvm/sys_regs.h6
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c5
-rw-r--r--arch/powerpc/kernel/fadump.c3
-rw-r--r--arch/powerpc/kvm/book3s_xive.c12
-rw-r--r--arch/powerpc/platforms/powernv/vas.c2
-rw-r--r--arch/powerpc/platforms/pseries/msi.c3
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/x86/kvm/pmu.c8
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--drivers/misc/ocxl/afu_irq.c2
-rw-r--r--fs/exfat/exfat_fs.h1
-rw-r--r--fs/exfat/file.c7
-rw-r--r--fs/exfat/namei.c8
-rw-r--r--fs/exfat/nls.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c35
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4proc.c13
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/smb/client/Kconfig7
-rw-r--r--fs/smb/client/cifsacl.c5
-rw-r--r--fs/smb/client/cifsencrypt.c201
-rw-r--r--fs/smb/client/cifsfs.c4
-rw-r--r--fs/smb/client/cifsglob.h22
-rw-r--r--fs/smb/client/cifsproto.h10
-rw-r--r--fs/smb/client/inode.c6
-rw-r--r--fs/smb/client/link.c31
-rw-r--r--fs/smb/client/misc.c17
-rw-r--r--fs/smb/client/sess.c2
-rw-r--r--fs/smb/client/smb2misc.c53
-rw-r--r--fs/smb/client/smb2ops.c8
-rw-r--r--fs/smb/client/smb2proto.h8
-rw-r--r--fs/smb/client/smb2transport.c164
-rw-r--r--fs/smb/client/smbdirect.c321
-rw-r--r--fs/smb/client/smbdirect.h2
-rw-r--r--fs/smb/client/xattr.c1
-rw-r--r--fs/smb/common/cifsglob.h30
-rw-r--r--fs/smb/common/smbdirect/smbdirect_socket.h11
-rw-r--r--fs/smb/server/smb_common.h14
-rw-r--r--include/kvm/arm_arch_timer.h24
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--include/linux/nfs_xdr.h1
-rw-r--r--include/uapi/linux/kvm.h5
-rw-r--r--mm/slub.c16
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c43
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c99
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c3
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c3
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c169
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h12
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h27
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h19
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c14
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c49
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c7
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c5
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c5
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c131
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c16
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c17
-rw-r--r--virt/kvm/Kconfig1
-rw-r--r--virt/kvm/guest_memfd.c75
-rw-r--r--virt/kvm/kvm_main.c4
82 files changed, 1445 insertions, 1082 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 6ae24c5ca559..57061fa29e6a 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1229,6 +1229,9 @@ It is not possible to read back a pending external abort (injected via
KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
directly to the virtual CPU).
+Calling this ioctl on a vCPU that hasn't been initialized will return
+-ENOEXEC.
+
::
struct kvm_vcpu_events {
@@ -1309,6 +1312,8 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.
See KVM_GET_VCPU_EVENTS for the data structure.
+Calling this ioctl on a vCPU that hasn't been initialized will return
+-ENOEXEC.
4.33 KVM_GET_DEBUGREGS
----------------------
@@ -6432,9 +6437,18 @@ most one mapping per page, i.e. binding multiple memory regions to a single
guest_memfd range is not allowed (any number of memory regions can be bound to
a single guest_memfd file, but the bound ranges must not overlap).
-When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field
-supports GUEST_MEMFD_FLAG_MMAP. Setting this flag on guest_memfd creation
-enables mmap() and faulting of guest_memfd memory to host userspace.
+The capability KVM_CAP_GUEST_MEMFD_FLAGS enumerates the `flags` that can be
+specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags:
+
+ ============================ ================================================
+ GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file
+ descriptor.
+ GUEST_MEMFD_FLAG_INIT_SHARED Make all memory in the file shared during
+ KVM_CREATE_GUEST_MEMFD (memory files created
+ without INIT_SHARED will be marked private).
+ Shared memory can be faulted into host userspace
+ page tables. Private memory cannot.
+ ============================ ================================================
When the KVM MMU performs a PFN lookup to service a guest fault and the backing
guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
index ff02102f7141..5395ee66fc32 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
@@ -13,7 +13,8 @@ will act as the VM interrupt controller, requiring emulated user-space devices
to inject interrupts to the VGIC instead of directly to CPUs. It is not
possible to create both a GICv3 and GICv2 on the same VM.
-Creating a guest GICv3 device requires a host GICv3 as well.
+Creating a guest GICv3 device requires a host GICv3 host, or a GICv5 host with
+support for FEAT_GCIE_LEGACY.
Groups:
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index b37da3ee8529..99a7c0235e6d 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -24,22 +24,48 @@
* ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
* can reset into an UNKNOWN state and might not read as 1 until it has
* been initialized explicitly.
- *
- * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
- * don't advertise it (they predate this relaxation).
- *
* Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
* indicating whether the CPU is running in E2H mode.
*/
mrs_s x1, SYS_ID_AA64MMFR4_EL1
sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
cmp x1, #0
- b.ge .LnVHE_\@
+ b.lt .LnE2H0_\@
+ /*
+ * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised
+ * as such via ID_AA64MMFR4_EL1.E2H0:
+ *
+ * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to
+ * have HCR_EL2.E2H implemented as RAO/WI.
+ *
+ * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest
+ * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV
+ * guests on these hosts can write to HCR_EL2.E2H without
+ * trapping to the hypervisor, but these writes have no
+ * functional effect.
+ *
+ * Handle both cases by checking for an essential VHE property
+ * (system register remapping) to decide whether we're
+ * effectively VHE-only or not.
+ */
+ msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE
+ isb
+ mov x1, #1 // Write something to FAR_EL1
+ msr far_el1, x1
+ isb
+ mov x1, #2 // Try to overwrite it via FAR_EL2
+ msr far_el2, x1
+ isb
+ mrs x1, far_el1 // If we see the latest write in FAR_EL1,
+ cmp x1, #2 // we can safely assume we are VHE only.
+ b.ne .LnVHE_\@ // Otherwise, we know that nVHE works.
+
+.LnE2H0_\@:
orr x0, x0, #HCR_E2H
-.LnVHE_\@:
msr_hcr_el2 x0
isb
+.LnVHE_\@:
.endm
.macro __init_el2_sctlr
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b763293281c8..64302c438355 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -816,6 +816,11 @@ struct kvm_vcpu_arch {
u64 hcrx_el2;
u64 mdcr_el2;
+ struct {
+ u64 r;
+ u64 w;
+ } fgt[__NR_FGT_GROUP_IDS__];
+
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void)
void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
void check_feature_map(void);
+void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu);
+
+static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg)
+{
+ switch (reg) {
+ case HFGRTR_EL2:
+ case HFGWTR_EL2:
+ return HFGRTR_GROUP;
+ case HFGITR_EL2:
+ return HFGITR_GROUP;
+ case HDFGRTR_EL2:
+ case HDFGWTR_EL2:
+ return HDFGRTR_GROUP;
+ case HAFGRTR_EL2:
+ return HAFGRTR_GROUP;
+ case HFGRTR2_EL2:
+ case HFGWTR2_EL2:
+ return HFGRTR2_GROUP;
+ case HFGITR2_EL2:
+ return HFGITR2_GROUP;
+ case HDFGRTR2_EL2:
+ case HDFGWTR2_EL2:
+ return HDFGRTR2_GROUP;
+ default:
+ BUILD_BUG_ON(1);
+ }
+}
+#define vcpu_fgt(vcpu, reg) \
+ ({ \
+ enum fgt_group_id id = __fgt_reg_to_group_id(reg); \
+ u64 *p; \
+ switch (reg) { \
+ case HFGWTR_EL2: \
+ case HDFGWTR_EL2: \
+ case HFGWTR2_EL2: \
+ case HDFGWTR2_EL2: \
+ p = &(vcpu)->arch.fgt[id].w; \
+ break; \
+ default: \
+ p = &(vcpu)->arch.fgt[id].r; \
+ break; \
+ } \
+ \
+ p; \
+ })
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6455db1b54fd..c231d2a3e515 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1220,10 +1220,19 @@
__val; \
})
+/*
+ * The "Z" constraint combined with the "%x0" template should be enough
+ * to force XZR generation if (v) is a constant 0 value but LLVM does not
+ * yet understand that modifier/constraint combo so a conditional is required
+ * to nudge the compiler into using XZR as a source for a 0 constant value.
+ */
#define write_sysreg_s(v, r) do { \
u64 __val = (u64)(v); \
u32 __maybe_unused __check_r = (u32)(r); \
- asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \
+ if (__builtin_constant_p(__val) && __val == 0) \
+ asm volatile(__msr_s(r, "xzr")); \
+ else \
+ asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \
} while (0)
/*
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index f546a914f041..a9c81715ce59 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -697,6 +697,8 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr)
static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
{
+ bool step_done;
+
if (!is_ttbr0_addr(regs->pc))
arm64_apply_bp_hardening();
@@ -707,10 +709,10 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
* If we are stepping a suspended breakpoint there's nothing more to do:
* the single-step is complete.
*/
- if (!try_step_suspended_breakpoints(regs)) {
- local_daif_restore(DAIF_PROCCTX);
+ step_done = try_step_suspended_breakpoints(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ if (!step_done)
do_el0_softstep(esr, regs);
- }
arm64_exit_to_user_mode(regs);
}
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index dbd74e4885e2..3f675875abea 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu)
u32 timer_get_ctl(struct arch_timer_context *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
@@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt)
u64 timer_get_cval(struct arch_timer_context *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
@@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
@@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
@@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
}
}
-static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
-{
- if (!ctxt->offset.vm_offset) {
- WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
- return;
- }
-
- WRITE_ONCE(*ctxt->offset.vm_offset, offset);
-}
-
u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
@@ -343,7 +333,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
u64 ns;
ctx = container_of(hrt, struct arch_timer_context, hrtimer);
- vcpu = ctx->vcpu;
+ vcpu = timer_context_to_vcpu(ctx);
trace_kvm_timer_hrtimer_expire(ctx);
@@ -436,8 +426,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
*
* But hey, it's fast, right?
*/
- if (is_hyp_ctxt(ctx->vcpu) &&
- (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
+ struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
+ if (is_hyp_ctxt(vcpu) &&
+ (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) {
unsigned long val = timer_get_ctl(ctx);
__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
timer_set_ctl(ctx, val);
@@ -470,7 +461,7 @@ static void timer_emulate(struct arch_timer_context *ctx)
trace_kvm_timer_emulate(ctx, should_fire);
if (should_fire != ctx->irq.level)
- kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
+ kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
kvm_timer_update_status(ctx, should_fire);
@@ -498,7 +489,7 @@ static void set_cntpoff(u64 cntpoff)
static void timer_save_state(struct arch_timer_context *ctx)
{
- struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+ struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
unsigned long flags;
@@ -609,7 +600,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
static void timer_restore_state(struct arch_timer_context *ctx)
{
- struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+ struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
unsigned long flags;
@@ -668,7 +659,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo
static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
{
- struct kvm_vcpu *vcpu = ctx->vcpu;
+ struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
bool phys_active = false;
/*
@@ -677,7 +668,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
* this point and the register restoration, we'll take the
* interrupt anyway.
*/
- kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
+ kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);
if (irqchip_in_kernel(vcpu->kvm))
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
@@ -1063,7 +1054,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
struct kvm *kvm = vcpu->kvm;
- ctxt->vcpu = vcpu;
+ ctxt->timer_id = timerid;
if (timerid == TIMER_VTIMER)
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
@@ -1121,49 +1112,6 @@ void kvm_timer_cpu_down(void)
disable_percpu_irq(host_ptimer_irq);
}
-int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
- struct arch_timer_context *timer;
-
- switch (regid) {
- case KVM_REG_ARM_TIMER_CTL:
- timer = vcpu_vtimer(vcpu);
- kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
- break;
- case KVM_REG_ARM_TIMER_CNT:
- if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
- &vcpu->kvm->arch.flags)) {
- timer = vcpu_vtimer(vcpu);
- timer_set_offset(timer, kvm_phys_timer_read() - value);
- }
- break;
- case KVM_REG_ARM_TIMER_CVAL:
- timer = vcpu_vtimer(vcpu);
- kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
- break;
- case KVM_REG_ARM_PTIMER_CTL:
- timer = vcpu_ptimer(vcpu);
- kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
- break;
- case KVM_REG_ARM_PTIMER_CNT:
- if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
- &vcpu->kvm->arch.flags)) {
- timer = vcpu_ptimer(vcpu);
- timer_set_offset(timer, kvm_phys_timer_read() - value);
- }
- break;
- case KVM_REG_ARM_PTIMER_CVAL:
- timer = vcpu_ptimer(vcpu);
- kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
- break;
-
- default:
- return -1;
- }
-
- return 0;
-}
-
static u64 read_timer_ctl(struct arch_timer_context *timer)
{
/*
@@ -1180,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
return ctl;
}
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
- switch (regid) {
- case KVM_REG_ARM_TIMER_CTL:
- return kvm_arm_timer_read(vcpu,
- vcpu_vtimer(vcpu), TIMER_REG_CTL);
- case KVM_REG_ARM_TIMER_CNT:
- return kvm_arm_timer_read(vcpu,
- vcpu_vtimer(vcpu), TIMER_REG_CNT);
- case KVM_REG_ARM_TIMER_CVAL:
- return kvm_arm_timer_read(vcpu,
- vcpu_vtimer(vcpu), TIMER_REG_CVAL);
- case KVM_REG_ARM_PTIMER_CTL:
- return kvm_arm_timer_read(vcpu,
- vcpu_ptimer(vcpu), TIMER_REG_CTL);
- case KVM_REG_ARM_PTIMER_CNT:
- return kvm_arm_timer_read(vcpu,
- vcpu_ptimer(vcpu), TIMER_REG_CNT);
- case KVM_REG_ARM_PTIMER_CVAL:
- return kvm_arm_timer_read(vcpu,
- vcpu_ptimer(vcpu), TIMER_REG_CVAL);
- }
- return (u64)-1;
-}
-
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f21d1b7f20f8..870953b4a8a7 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -642,6 +642,7 @@ nommu:
vcpu->arch.hcr_el2 |= HCR_TWI;
vcpu_set_pauth_traps(vcpu);
+ kvm_vcpu_load_fgt(vcpu);
if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load,
@@ -1794,6 +1795,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_VCPU_EVENTS: {
struct kvm_vcpu_events events;
+ if (!kvm_vcpu_initialized(vcpu))
+ return -ENOEXEC;
+
if (kvm_arm_vcpu_get_events(vcpu, &events))
return -EINVAL;
@@ -1805,6 +1809,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_SET_VCPU_EVENTS: {
struct kvm_vcpu_events events;
+ if (!kvm_vcpu_initialized(vcpu))
+ return -ENOEXEC;
+
if (copy_from_user(&events, argp, sizeof(events)))
return -EFAULT;
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 20bb9af125b1..be26d5aa668c 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -91,7 +91,6 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
case OP_AT_S1E2W:
case OP_AT_S1E2A:
return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
- break;
default:
return (vcpu_el2_e2h_is_set(vcpu) &&
vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
@@ -1602,13 +1601,17 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
.fn = match_s1_desc,
.priv = &dm,
},
- .regime = TR_EL10,
.as_el0 = false,
.pan = false,
};
struct s1_walk_result wr = {};
int ret;
+ if (is_hyp_ctxt(vcpu))
+ wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
+ else
+ wi.regime = TR_EL10;
+
ret = setup_s1_walk(vcpu, &wi, &wr, va);
if (ret)
return ret;
diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c
index fbd8944a3dea..24bb3f36e9d5 100644
--- a/arch/arm64/kvm/config.c
+++ b/arch/arm64/kvm/config.c
@@ -5,6 +5,8 @@
*/
#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
#include <asm/sysreg.h>
/*
@@ -1428,3 +1430,91 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r
break;
}
}
+
+static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg)
+{
+ switch (reg) {
+ case HFGRTR_EL2:
+ return &hfgrtr_masks;
+ case HFGWTR_EL2:
+ return &hfgwtr_masks;
+ case HFGITR_EL2:
+ return &hfgitr_masks;
+ case HDFGRTR_EL2:
+ return &hdfgrtr_masks;
+ case HDFGWTR_EL2:
+ return &hdfgwtr_masks;
+ case HAFGRTR_EL2:
+ return &hafgrtr_masks;
+ case HFGRTR2_EL2:
+ return &hfgrtr2_masks;
+ case HFGWTR2_EL2:
+ return &hfgwtr2_masks;
+ case HFGITR2_EL2:
+ return &hfgitr2_masks;
+ case HDFGRTR2_EL2:
+ return &hdfgrtr2_masks;
+ case HDFGWTR2_EL2:
+ return &hdfgwtr2_masks;
+ default:
+ BUILD_BUG_ON(1);
+ }
+}
+
+static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg)
+{
+ u64 fgu = vcpu->kvm->arch.fgu[__fgt_reg_to_group_id(reg)];
+ struct fgt_masks *m = __fgt_reg_to_masks(reg);
+ u64 clear = 0, set = 0, val = m->nmask;
+
+ set |= fgu & m->mask;
+ clear |= fgu & m->nmask;
+
+ if (is_nested_ctxt(vcpu)) {
+ u64 nested = __vcpu_sys_reg(vcpu, reg);
+ set |= nested & m->mask;
+ clear |= ~nested & m->nmask;
+ }
+
+ val |= set;
+ val &= ~clear;
+ *vcpu_fgt(vcpu, reg) = val;
+}
+
+static void __compute_hfgwtr(struct kvm_vcpu *vcpu)
+{
+ __compute_fgt(vcpu, HFGWTR_EL2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
+ *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1;
+}
+
+static void __compute_hdfgwtr(struct kvm_vcpu *vcpu)
+{
+ __compute_fgt(vcpu, HDFGWTR_EL2);
+
+ if (is_hyp_ctxt(vcpu))
+ *vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1;
+}
+
+void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ return;
+
+ __compute_fgt(vcpu, HFGRTR_EL2);
+ __compute_hfgwtr(vcpu);
+ __compute_fgt(vcpu, HFGITR_EL2);
+ __compute_fgt(vcpu, HDFGRTR_EL2);
+ __compute_hdfgwtr(vcpu);
+ __compute_fgt(vcpu, HAFGRTR_EL2);
+
+ if (!cpus_have_final_cap(ARM64_HAS_FGT2))
+ return;
+
+ __compute_fgt(vcpu, HFGRTR2_EL2);
+ __compute_fgt(vcpu, HFGWTR2_EL2);
+ __compute_fgt(vcpu, HFGITR2_EL2);
+ __compute_fgt(vcpu, HDFGRTR2_EL2);
+ __compute_fgt(vcpu, HDFGWTR2_EL2);
+}
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 3515a273eaa2..3ad6b7c6e4ba 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -15,6 +15,12 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
+static int cpu_has_spe(u64 dfr0)
+{
+ return cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
+ !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P);
+}
+
/**
* kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value
*
@@ -77,13 +83,12 @@ void kvm_init_host_debug_data(void)
*host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0);
*host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0);
+ if (cpu_has_spe(dfr0))
+ host_data_set_flag(HAS_SPE);
+
if (has_vhe())
return;
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
- !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
- host_data_set_flag(HAS_SPE);
-
/* Check if we have BRBE implemented and available at the host */
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT))
host_data_set_flag(HAS_BRBE);
@@ -102,7 +107,7 @@ void kvm_init_host_debug_data(void)
void kvm_debug_init_vhe(void)
{
/* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */
- if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1)))
+ if (host_data_test_flag(HAS_SPE))
write_sysreg_el1(0, SYS_PMSCR);
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 16ba5e9ac86c..1c87699fd886 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -591,64 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
return copy_core_reg_indices(vcpu, NULL);
}
-static const u64 timer_reg_list[] = {
- KVM_REG_ARM_TIMER_CTL,
- KVM_REG_ARM_TIMER_CNT,
- KVM_REG_ARM_TIMER_CVAL,
- KVM_REG_ARM_PTIMER_CTL,
- KVM_REG_ARM_PTIMER_CNT,
- KVM_REG_ARM_PTIMER_CVAL,
-};
-
-#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list)
-
-static bool is_timer_reg(u64 index)
-{
- switch (index) {
- case KVM_REG_ARM_TIMER_CTL:
- case KVM_REG_ARM_TIMER_CNT:
- case KVM_REG_ARM_TIMER_CVAL:
- case KVM_REG_ARM_PTIMER_CTL:
- case KVM_REG_ARM_PTIMER_CNT:
- case KVM_REG_ARM_PTIMER_CVAL:
- return true;
- }
- return false;
-}
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- for (int i = 0; i < NUM_TIMER_REGS; i++) {
- if (put_user(timer_reg_list[i], uindices))
- return -EFAULT;
- uindices++;
- }
-
- return 0;
-}
-
-static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
- int ret;
-
- ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
- if (ret != 0)
- return -EFAULT;
-
- return kvm_arm_timer_set_reg(vcpu, reg->id, val);
-}
-
-static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
-
- val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
-}
-
static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
{
const unsigned int slices = vcpu_sve_slices(vcpu);
@@ -724,7 +666,6 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
res += num_sve_regs(vcpu);
res += kvm_arm_num_sys_reg_descs(vcpu);
res += kvm_arm_get_fw_num_regs(vcpu);
- res += NUM_TIMER_REGS;
return res;
}
@@ -755,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return ret;
uindices += kvm_arm_get_fw_num_regs(vcpu);
- ret = copy_timer_indices(vcpu, uindices);
- if (ret < 0)
- return ret;
- uindices += NUM_TIMER_REGS;
-
return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
}
@@ -777,9 +713,6 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg);
}
- if (is_timer_reg(reg->id))
- return get_timer_reg(vcpu, reg);
-
return kvm_arm_sys_reg_get_reg(vcpu, reg);
}
@@ -797,9 +730,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg);
}
- if (is_timer_reg(reg->id))
- return set_timer_reg(vcpu, reg);
-
return kvm_arm_sys_reg_set_reg(vcpu, reg);
}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index bca8c80e11da..cc7d5d1709cb 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -147,7 +147,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
if (esr & ESR_ELx_WFx_ISS_RV) {
u64 val, now;
- now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT);
+ now = kvm_phys_timer_read();
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ now -= timer_get_offset(vcpu_hvtimer(vcpu));
+ else
+ now -= timer_get_offset(vcpu_vtimer(vcpu));
+
val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
if (now >= val)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index b6682202edf3..c5d5e5b86eaf 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -195,123 +195,6 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
__deactivate_cptr_traps_nvhe(vcpu);
}
-#define reg_to_fgt_masks(reg) \
- ({ \
- struct fgt_masks *m; \
- switch(reg) { \
- case HFGRTR_EL2: \
- m = &hfgrtr_masks; \
- break; \
- case HFGWTR_EL2: \
- m = &hfgwtr_masks; \
- break; \
- case HFGITR_EL2: \
- m = &hfgitr_masks; \
- break; \
- case HDFGRTR_EL2: \
- m = &hdfgrtr_masks; \
- break; \
- case HDFGWTR_EL2: \
- m = &hdfgwtr_masks; \
- break; \
- case HAFGRTR_EL2: \
- m = &hafgrtr_masks; \
- break; \
- case HFGRTR2_EL2: \
- m = &hfgrtr2_masks; \
- break; \
- case HFGWTR2_EL2: \
- m = &hfgwtr2_masks; \
- break; \
- case HFGITR2_EL2: \
- m = &hfgitr2_masks; \
- break; \
- case HDFGRTR2_EL2: \
- m = &hdfgrtr2_masks; \
- break; \
- case HDFGWTR2_EL2: \
- m = &hdfgwtr2_masks; \
- break; \
- default: \
- BUILD_BUG_ON(1); \
- } \
- \
- m; \
- })
-
-#define compute_clr_set(vcpu, reg, clr, set) \
- do { \
- u64 hfg = __vcpu_sys_reg(vcpu, reg); \
- struct fgt_masks *m = reg_to_fgt_masks(reg); \
- set |= hfg & m->mask; \
- clr |= ~hfg & m->nmask; \
- } while(0)
-
-#define reg_to_fgt_group_id(reg) \
- ({ \
- enum fgt_group_id id; \
- switch(reg) { \
- case HFGRTR_EL2: \
- case HFGWTR_EL2: \
- id = HFGRTR_GROUP; \
- break; \
- case HFGITR_EL2: \
- id = HFGITR_GROUP; \
- break; \
- case HDFGRTR_EL2: \
- case HDFGWTR_EL2: \
- id = HDFGRTR_GROUP; \
- break; \
- case HAFGRTR_EL2: \
- id = HAFGRTR_GROUP; \
- break; \
- case HFGRTR2_EL2: \
- case HFGWTR2_EL2: \
- id = HFGRTR2_GROUP; \
- break; \
- case HFGITR2_EL2: \
- id = HFGITR2_GROUP; \
- break; \
- case HDFGRTR2_EL2: \
- case HDFGWTR2_EL2: \
- id = HDFGRTR2_GROUP; \
- break; \
- default: \
- BUILD_BUG_ON(1); \
- } \
- \
- id; \
- })
-
-#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \
- do { \
- u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \
- struct fgt_masks *m = reg_to_fgt_masks(reg); \
- set |= hfg & m->mask; \
- clr |= hfg & m->nmask; \
- } while(0)
-
-#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \
- do { \
- struct fgt_masks *m = reg_to_fgt_masks(reg); \
- u64 c = clr, s = set; \
- u64 val; \
- \
- ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
- if (is_nested_ctxt(vcpu)) \
- compute_clr_set(vcpu, reg, c, s); \
- \
- compute_undef_clr_set(vcpu, kvm, reg, c, s); \
- \
- val = m->nmask; \
- val |= s; \
- val &= ~c; \
- write_sysreg_s(val, SYS_ ## reg); \
- } while(0)
-
-#define update_fgt_traps(hctxt, vcpu, kvm, reg) \
- update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0)
-
static inline bool cpu_has_amu(void)
{
u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
@@ -320,33 +203,36 @@ static inline bool cpu_has_amu(void)
ID_AA64PFR0_EL1_AMU_SHIFT);
}
+#define __activate_fgt(hctxt, vcpu, reg) \
+ do { \
+ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
+ write_sysreg_s(*vcpu_fgt(vcpu, reg), SYS_ ## reg); \
+ } while (0)
+
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
- update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2);
- update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0,
- cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ?
- HFGWTR_EL2_TCR_EL1_MASK : 0);
- update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2);
- update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2);
- update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2);
+ __activate_fgt(hctxt, vcpu, HFGRTR_EL2);
+ __activate_fgt(hctxt, vcpu, HFGWTR_EL2);
+ __activate_fgt(hctxt, vcpu, HFGITR_EL2);
+ __activate_fgt(hctxt, vcpu, HDFGRTR_EL2);
+ __activate_fgt(hctxt, vcpu, HDFGWTR_EL2);
if (cpu_has_amu())
- update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2);
+ __activate_fgt(hctxt, vcpu, HAFGRTR_EL2);
if (!cpus_have_final_cap(ARM64_HAS_FGT2))
return;
- update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2);
- update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2);
- update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2);
- update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2);
- update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2);
+ __activate_fgt(hctxt, vcpu, HFGRTR2_EL2);
+ __activate_fgt(hctxt, vcpu, HFGWTR2_EL2);
+ __activate_fgt(hctxt, vcpu, HFGITR2_EL2);
+ __activate_fgt(hctxt, vcpu, HDFGRTR2_EL2);
+ __activate_fgt(hctxt, vcpu, HDFGWTR2_EL2);
}
#define __deactivate_fgt(htcxt, vcpu, reg) \
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 05774aed09cb..43bde061b65d 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -172,6 +172,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
/* Trust the host for non-protected vcpu features. */
vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2;
+ memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt));
return 0;
}
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 7a045cad6bdf..f04cda40545b 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1859,13 +1859,16 @@ void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu)
{
u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2);
+ if (is_nested_ctxt(vcpu))
+ vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE);
/*
* In yet another example where FEAT_NV2 is fscking broken, accesses
* to MDSCR_EL1 are redirected to the VNCR despite having an effect
* at EL2. Use a big hammer to apply sanity.
+ *
+ * Unless of course we have FEAT_FGT, in which case we can precisely
+ * trap MDSCR_EL1.
*/
- if (is_hyp_ctxt(vcpu))
+ else if (!cpus_have_final_cap(ARM64_HAS_FGT))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
- else
- vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 91053aa832d0..e67eb39ddc11 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -203,7 +203,6 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg,
MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
- MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
case CNTHCTL_EL2:
@@ -1595,14 +1594,47 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
return true;
}
-static bool access_hv_timer(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int arch_timer_set_user(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ u64 val)
{
- if (!vcpu_el2_e2h_is_set(vcpu))
- return undef_access(vcpu, p, r);
+ switch (reg_to_encoding(rd)) {
+ case SYS_CNTV_CTL_EL0:
+ case SYS_CNTP_CTL_EL0:
+ case SYS_CNTHV_CTL_EL2:
+ case SYS_CNTHP_CTL_EL2:
+ val &= ~ARCH_TIMER_CTRL_IT_STAT;
+ break;
+ case SYS_CNTVCT_EL0:
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags))
+ timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val);
+ return 0;
+ case SYS_CNTPCT_EL0:
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags))
+ timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val);
+ return 0;
+ }
- return access_arch_timer(vcpu, p, r);
+ __vcpu_assign_sys_reg(vcpu, rd->reg, val);
+ return 0;
+}
+
+static int arch_timer_get_user(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ u64 *val)
+{
+ switch (reg_to_encoding(rd)) {
+ case SYS_CNTVCT_EL0:
+ *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu));
+ break;
+ case SYS_CNTPCT_EL0:
+ *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu));
+ break;
+ default:
+ *val = __vcpu_sys_reg(vcpu, rd->reg);
+ }
+
+ return 0;
}
static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
@@ -2507,15 +2539,20 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
"trap of EL2 register redirected to EL1");
}
-#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \
+#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \
SYS_DESC(SYS_##name), \
.access = acc, \
.reset = rst, \
.reg = name, \
+ .get_user = gu, \
+ .set_user = su, \
.visibility = filter, \
.val = v, \
}
+#define EL2_REG_FILTERED(name, acc, rst, v, filter) \
+ SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter)
+
#define EL2_REG(name, acc, rst, v) \
EL2_REG_FILTERED(name, acc, rst, v, el2_visibility)
@@ -2526,6 +2563,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
EL2_REG_VNCR_FILT(name, hidden_visibility)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
+#define TIMER_REG(name, vis) \
+ SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \
+ arch_timer_get_user, arch_timer_set_user, vis)
+
/*
* Since reset() callback and field val are not used for idregs, they will be
* used for specific purposes for idregs.
@@ -2705,18 +2746,17 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
if (guest_hyp_sve_traps_enabled(vcpu)) {
kvm_inject_nested_sve_trap(vcpu);
- return true;
+ return false;
}
if (!p->is_write) {
- p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2);
+ p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2);
return true;
}
vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1;
vq = min(vq, vcpu_sve_max_vq(vcpu));
- vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2);
-
+ __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1);
return true;
}
@@ -2833,6 +2873,16 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu,
return __el2_visibility(vcpu, rd, s1pie_visibility);
}
+static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (vcpu_has_nv(vcpu) &&
+ !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
static bool access_mdcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -3482,17 +3532,19 @@ static const struct sys_reg_desc sys_reg_descs[] = {
AMU_AMEVTYPER1_EL0(14),
AMU_AMEVTYPER1_EL0(15),
- { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer },
- { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer,
+ .get_user = arch_timer_get_user, .set_user = arch_timer_set_user },
+ { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer,
+ .get_user = arch_timer_get_user, .set_user = arch_timer_set_user },
{ SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
- { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
- { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
+ TIMER_REG(CNTP_CTL_EL0, NULL),
+ TIMER_REG(CNTP_CVAL_EL0, NULL),
{ SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer },
- { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer },
- { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer },
+ TIMER_REG(CNTV_CTL_EL0, NULL),
+ TIMER_REG(CNTV_CVAL_EL0, NULL),
/* PMEVCNTRn_EL0 */
PMU_PMEVCNTR_EL0(0),
@@ -3690,12 +3742,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer },
- EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
- EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),
+ TIMER_REG(CNTHP_CTL_EL2, el2_visibility),
+ TIMER_REG(CNTHP_CVAL_EL2, el2_visibility),
- { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer },
- EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0),
- EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0),
+ { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility },
+ TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility),
+ TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility),
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
@@ -5233,15 +5285,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
}
}
+static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg)
+{
+ switch(reg->id) {
+ case KVM_REG_ARM_TIMER_CVAL:
+ return TO_ARM64_SYS_REG(CNTV_CVAL_EL0);
+ case KVM_REG_ARM_TIMER_CNT:
+ return TO_ARM64_SYS_REG(CNTVCT_EL0);
+ default:
+ return reg->id;
+ }
+}
+
int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
const struct sys_reg_desc table[], unsigned int num)
{
u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
const struct sys_reg_desc *r;
+ u64 id = kvm_one_reg_to_id(reg);
u64 val;
int ret;
- r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
+ r = id_to_sys_reg_desc(vcpu, id, table, num);
if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
@@ -5274,13 +5339,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
{
u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
const struct sys_reg_desc *r;
+ u64 id = kvm_one_reg_to_id(reg);
u64 val;
int ret;
if (get_user(val, uaddr))
return -EFAULT;
- r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
+ r = id_to_sys_reg_desc(vcpu, id, table, num);
if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
@@ -5340,10 +5406,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
{
+ u64 idx;
+
if (!*uind)
return true;
- if (put_user(sys_reg_to_index(reg), *uind))
+ switch (reg_to_encoding(reg)) {
+ case SYS_CNTV_CVAL_EL0:
+ idx = KVM_REG_ARM_TIMER_CVAL;
+ break;
+ case SYS_CNTVCT_EL0:
+ idx = KVM_REG_ARM_TIMER_CNT;
+ break;
+ default:
+ idx = sys_reg_to_index(reg);
+ }
+
+ if (put_user(idx, *uind))
return false;
(*uind)++;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 317abc490368..b3f904472fac 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -257,4 +257,10 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu);
(val); \
})
+#define TO_ARM64_SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \
+ sys_reg_Op1(SYS_ ## r), \
+ sys_reg_CRn(SYS_ ## r), \
+ sys_reg_CRm(SYS_ ## r), \
+ sys_reg_Op2(SYS_ ## r))
+
#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index f1c153106c56..6fbb4b099855 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -297,8 +297,11 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
+ if (!vgic_is_v3(vcpu->kvm))
+ return;
+
/* Hide GICv3 sysreg if necessary */
- if (!kvm_has_gicv3(vcpu->kvm)) {
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC);
return;
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 5782e743fd27..4ebc333dd786 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1747,6 +1747,9 @@ void __init fadump_setup_param_area(void)
{
phys_addr_t range_start, range_end;
+ if (!fw_dump.fadump_enabled)
+ return;
+
if (!fw_dump.param_area_supported || fw_dump.dump_active)
return;
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 1302b5ac5672..89a1b8c21ab4 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -916,8 +916,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
* it fires once.
*/
if (single_escalation) {
- struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[prio]);
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
vcpu->arch.xive_esc_raddr = xd->eoi_page;
@@ -1612,7 +1611,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Grab info about irq */
state->pt_number = hw_irq;
- state->pt_data = irq_data_get_irq_handler_data(host_data);
+ state->pt_data = irq_data_get_irq_chip_data(host_data);
/*
* Configure the IRQ to match the existing configuration of
@@ -1787,8 +1786,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
*/
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
{
- struct irq_data *d = irq_get_irq_data(irq);
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ struct xive_irq_data *xd = irq_get_chip_data(irq);
/*
* This slightly odd sequence gives the right result
@@ -2827,9 +2825,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
i0, i1);
}
if (xc->esc_virq[i]) {
- struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
- struct xive_irq_data *xd =
- irq_data_get_irq_handler_data(d);
+ struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[i]);
u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
seq_printf(m, " ESC %d %c%c EOI @%llx",
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index b65256a63e87..9c9650319f3b 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -121,7 +121,7 @@ static int init_vas_instance(struct platform_device *pdev)
return -EINVAL;
}
- xd = irq_get_handler_data(vinst->virq);
+ xd = irq_get_chip_data(vinst->virq);
if (!xd) {
pr_err("Inst%d: Invalid virq %d\n",
vinst->vas_id, vinst->virq);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 825f9432e03d..a82aaa786e9e 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -443,8 +443,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev
*/
static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg)
{
- struct msi_desc *desc = arg->desc;
- struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+ struct pci_dev *pdev = to_pci_dev(domain->dev);
rtas_disable_msi(pdev);
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 625361a15424..8d0123b0ae84 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1580,7 +1580,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
cpu, irq);
#endif
raw_spin_lock(&desc->lock);
- xd = irq_desc_get_handler_data(desc);
+ xd = irq_desc_get_chip_data(desc);
/*
* Clear saved_p to indicate that it's no longer pending
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 40ac4cb44ed2..487ad19a236e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -108,16 +108,18 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
- perf_get_x86_pmu_capability(&kvm_host_pmu);
-
/*
* Hybrid PMUs don't play nice with virtualization without careful
* configuration by userspace, and KVM's APIs for reporting supported
* vPMU features do not account for hybrid PMUs. Disable vPMU support
* for hybrid PMUs until KVM gains a way to let userspace opt-in.
*/
- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
enable_pmu = false;
+ memset(&kvm_host_pmu, 0, sizeof(kvm_host_pmu));
+ } else {
+ perf_get_x86_pmu_capability(&kvm_host_pmu);
+ }
if (enable_pmu) {
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42ecd093bb4c..b4b5d2d09634 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13941,10 +13941,11 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
#ifdef CONFIG_KVM_GUEST_MEMFD
/*
- * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory
- * (the private vs. shared tracking needs to be moved into guest_memfd).
+ * KVM doesn't yet support initializing guest_memfd memory as shared for VMs
+ * with private memory (the private vs. shared tracking needs to be moved into
+ * guest_memfd).
*/
-bool kvm_arch_supports_gmem_mmap(struct kvm *kvm)
+bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm)
{
return !kvm_arch_has_private_mem(kvm);
}
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
index 36f7379b8e2d..f6b821fc274c 100644
--- a/drivers/misc/ocxl/afu_irq.c
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -203,7 +203,7 @@ u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id)
mutex_lock(&ctx->irq_lock);
irq = idr_find(&ctx->irq_idr, irq_id);
if (irq) {
- xd = irq_get_handler_data(irq->virq);
+ xd = irq_get_chip_data(irq->virq);
addr = xd ? xd->trig_page : 0;
}
mutex_unlock(&ctx->irq_lock);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 329697c89d09..38210fb6901c 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -29,7 +29,6 @@ enum exfat_error_mode {
enum {
NLS_NAME_NO_LOSSY = 0, /* no lossy */
NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */
- NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */
};
#define EXFAT_HASH_BITS 8
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index f246cf439588..adc37b4d7fc2 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -509,8 +509,8 @@ static int exfat_ioctl_get_volume_label(struct super_block *sb, unsigned long ar
static int exfat_ioctl_set_volume_label(struct super_block *sb,
unsigned long arg)
{
- int ret = 0, lossy;
- char label[FSLABEL_MAX];
+ int ret = 0, lossy, label_len;
+ char label[FSLABEL_MAX] = {0};
struct exfat_uni_name uniname;
if (!capable(CAP_SYS_ADMIN))
@@ -520,8 +520,9 @@ static int exfat_ioctl_set_volume_label(struct super_block *sb,
return -EFAULT;
memset(&uniname, 0, sizeof(uniname));
+ label_len = strnlen(label, FSLABEL_MAX - 1);
if (label[0]) {
- ret = exfat_nls_to_utf16(sb, label, FSLABEL_MAX,
+ ret = exfat_nls_to_utf16(sb, label, label_len,
&uniname, &lossy);
if (ret < 0)
return ret;
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 7eb9c67fd35f..745dce29ddb5 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -442,7 +442,7 @@ static int __exfat_resolve_path(struct inode *inode, const unsigned char *path,
return namelen; /* return error value */
if ((lossy && !lookup) || !namelen)
- return (lossy & NLS_NAME_OVERLEN) ? -ENAMETOOLONG : -EINVAL;
+ return -EINVAL;
return 0;
}
@@ -642,10 +642,14 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
info->type = exfat_get_entry_type(ep);
info->attr = le16_to_cpu(ep->dentry.file.attr);
- info->size = le64_to_cpu(ep2->dentry.stream.valid_size);
info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size);
info->size = le64_to_cpu(ep2->dentry.stream.size);
+ if (info->valid_size < 0) {
+ exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size);
+ return -EIO;
+ }
+
if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
return -EIO;
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 8243d94ceaf4..57db08a5271c 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -616,9 +616,6 @@ static int exfat_nls_to_ucs2(struct super_block *sb,
unilen++;
}
- if (p_cstring[i] != '\0')
- lossy |= NLS_NAME_OVERLEN;
-
*uniname = '\0';
p_uniname->name_len = unilen;
p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index df01d2876b68..9056f05a67dc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -270,19 +270,31 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
mirror->layout = NULL;
}
-static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
+static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(u32 dss_count,
+ gfp_t gfp_flags)
{
struct nfs4_ff_layout_mirror *mirror;
- u32 dss_id;
mirror = kzalloc(sizeof(*mirror), gfp_flags);
- if (mirror != NULL) {
- spin_lock_init(&mirror->lock);
- refcount_set(&mirror->ref, 1);
- INIT_LIST_HEAD(&mirror->mirrors);
- for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
- nfs_localio_file_init(&mirror->dss[dss_id].nfl);
+ if (mirror == NULL)
+ return NULL;
+
+ spin_lock_init(&mirror->lock);
+ refcount_set(&mirror->ref, 1);
+ INIT_LIST_HEAD(&mirror->mirrors);
+
+ mirror->dss_count = dss_count;
+ mirror->dss =
+ kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
+ gfp_flags);
+ if (mirror->dss == NULL) {
+ kfree(mirror);
+ return NULL;
}
+
+ for (u32 dss_id = 0; dss_id < mirror->dss_count; dss_id++)
+ nfs_localio_file_init(&mirror->dss[dss_id].nfl);
+
return mirror;
}
@@ -507,17 +519,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
if (dss_count > 1 && stripe_unit == 0)
goto out_err_free;
- fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
+ fls->mirror_array[i] = ff_layout_alloc_mirror(dss_count, gfp_flags);
if (fls->mirror_array[i] == NULL) {
rc = -ENOMEM;
goto out_err_free;
}
- fls->mirror_array[i]->dss_count = dss_count;
- fls->mirror_array[i]->dss =
- kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
- gfp_flags);
-
for (dss_id = 0; dss_id < dss_count; dss_id++) {
dss_info = &fls->mirror_array[i]->dss[dss_id];
dss_info->mirror = fls->mirror_array[i];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 6fddf43d729c..5998d6bd8a4f 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -222,6 +222,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
+ clp->cl_last_renewal = jiffies;
#if IS_ENABLED(CONFIG_NFS_V4_1)
init_waitqueue_head(&clp->cl_lock_waitq);
#endif
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f58098417142..411776718494 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3636,6 +3636,7 @@ struct nfs4_closedata {
} lr;
struct nfs_fattr fattr;
unsigned long timestamp;
+ unsigned short retrans;
};
static void nfs4_free_closedata(void *data)
@@ -3664,6 +3665,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
.state = state,
.inode = calldata->inode,
.stateid = &calldata->arg.stateid,
+ .retrans = calldata->retrans,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -3711,6 +3713,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
default:
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
+ calldata->retrans = exception.retrans;
if (exception.retry)
goto out_restart;
}
@@ -5593,9 +5596,11 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
.inode = hdr->inode,
.state = hdr->args.context->state,
.stateid = &hdr->args.stateid,
+ .retrans = hdr->retrans,
};
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
+ hdr->retrans = exception.retrans;
if (exception.retry) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -5709,10 +5714,12 @@ static int nfs4_write_done_cb(struct rpc_task *task,
.inode = hdr->inode,
.state = hdr->args.context->state,
.stateid = &hdr->args.stateid,
+ .retrans = hdr->retrans,
};
task->tk_status = nfs4_async_handle_exception(task,
NFS_SERVER(inode), task->tk_status,
&exception);
+ hdr->retrans = exception.retrans;
if (exception.retry) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -6726,6 +6733,7 @@ struct nfs4_delegreturndata {
struct nfs_fh fh;
nfs4_stateid stateid;
unsigned long timestamp;
+ unsigned short retrans;
struct {
struct nfs4_layoutreturn_args arg;
struct nfs4_layoutreturn_res res;
@@ -6746,6 +6754,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
.inode = data->inode,
.stateid = &data->stateid,
.task_is_privileged = data->args.seq_args.sa_privileged,
+ .retrans = data->retrans,
};
if (!nfs4_sequence_done(task, &data->res.seq_res))
@@ -6817,6 +6826,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
task->tk_status = nfs4_async_handle_exception(task,
data->res.server, task->tk_status,
&exception);
+ data->retrans = exception.retrans;
if (exception.retry)
goto out_restart;
}
@@ -7093,6 +7103,7 @@ struct nfs4_unlockdata {
struct file_lock fl;
struct nfs_server *server;
unsigned long timestamp;
+ unsigned short retrans;
};
static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
@@ -7147,6 +7158,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
struct nfs4_exception exception = {
.inode = calldata->lsp->ls_state->inode,
.stateid = &calldata->arg.stateid,
+ .retrans = calldata->retrans,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -7180,6 +7192,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
task->tk_status = nfs4_async_handle_exception(task,
calldata->server, task->tk_status,
&exception);
+ calldata->retrans = exception.retrans;
if (exception.retry)
rpc_restart_call_prepare(task);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0fb6905736d5..336c510f3750 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1535,7 +1535,8 @@ static int nfs_writeback_done(struct rpc_task *task,
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE
+ | NFS_INO_REVAL_FORCED);
spin_unlock(&inode->i_lock);
}
return 0;
diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig
index a4c02199fef4..17bd368574e9 100644
--- a/fs/smb/client/Kconfig
+++ b/fs/smb/client/Kconfig
@@ -5,17 +5,16 @@ config CIFS
select NLS
select NLS_UCS2_UTILS
select CRYPTO
- select CRYPTO_MD5
- select CRYPTO_SHA256
- select CRYPTO_SHA512
select CRYPTO_CMAC
- select CRYPTO_HMAC
select CRYPTO_AEAD2
select CRYPTO_CCM
select CRYPTO_GCM
select CRYPTO_ECB
select CRYPTO_AES
select CRYPTO_LIB_ARC4
+ select CRYPTO_LIB_MD5
+ select CRYPTO_LIB_SHA256
+ select CRYPTO_LIB_SHA512
select KEYS
select DNS_RESOLVER
select ASN1
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c
index 63b3b1290bed..ce2ebc213a1d 100644
--- a/fs/smb/client/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
@@ -339,7 +339,6 @@ int
sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid,
struct cifs_fattr *fattr, uint sidtype)
{
- int rc = 0;
struct key *sidkey;
char *sidstr;
const struct cred *saved_cred;
@@ -446,12 +445,12 @@ out_revert_creds:
* fails then we just fall back to using the ctx->linux_uid/linux_gid.
*/
got_valid_id:
- rc = 0;
if (sidtype == SIDOWNER)
fattr->cf_uid = fuid;
else
fattr->cf_gid = fgid;
- return rc;
+
+ return 0;
}
int
diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index 7b7c8c38fdd0..801824825ecf 100644
--- a/fs/smb/client/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -24,14 +24,43 @@
#include <linux/iov_iter.h>
#include <crypto/aead.h>
#include <crypto/arc4.h>
+#include <crypto/md5.h>
+#include <crypto/sha2.h>
-static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len,
- void *priv, void *priv2)
+static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx,
+ const u8 *data, size_t len)
{
- struct shash_desc *shash = priv;
+ if (ctx->md5) {
+ md5_update(ctx->md5, data, len);
+ return 0;
+ }
+ if (ctx->hmac) {
+ hmac_sha256_update(ctx->hmac, data, len);
+ return 0;
+ }
+ return crypto_shash_update(ctx->shash, data, len);
+}
+
+static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out)
+{
+ if (ctx->md5) {
+ md5_final(ctx->md5, out);
+ return 0;
+ }
+ if (ctx->hmac) {
+ hmac_sha256_final(ctx->hmac, out);
+ return 0;
+ }
+ return crypto_shash_final(ctx->shash, out);
+}
+
+static size_t cifs_sig_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
+{
+ struct cifs_calc_sig_ctx *ctx = priv;
int ret, *pret = priv2;
- ret = crypto_shash_update(shash, iter_base, len);
+ ret = cifs_sig_update(ctx, iter_base, len);
if (ret < 0) {
*pret = ret;
return len;
@@ -42,21 +71,20 @@ static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len,
/*
* Pass the data from an iterator into a hash.
*/
-static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize,
- struct shash_desc *shash)
+static int cifs_sig_iter(const struct iov_iter *iter, size_t maxsize,
+ struct cifs_calc_sig_ctx *ctx)
{
struct iov_iter tmp_iter = *iter;
int err = -EIO;
- if (iterate_and_advance_kernel(&tmp_iter, maxsize, shash, &err,
- cifs_shash_step) != maxsize)
+ if (iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err,
+ cifs_sig_step) != maxsize)
return err;
return 0;
}
-int __cifs_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server, char *signature,
- struct shash_desc *shash)
+int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+ char *signature, struct cifs_calc_sig_ctx *ctx)
{
int i;
ssize_t rc;
@@ -82,8 +110,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
return -EIO;
}
- rc = crypto_shash_update(shash,
- iov[i].iov_base, iov[i].iov_len);
+ rc = cifs_sig_update(ctx, iov[i].iov_base, iov[i].iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with payload\n",
__func__);
@@ -91,11 +118,11 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
}
}
- rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash);
+ rc = cifs_sig_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), ctx);
if (rc < 0)
return rc;
- rc = crypto_shash_final(shash, signature);
+ rc = cifs_sig_final(ctx, signature);
if (rc)
cifs_dbg(VFS, "%s: Could not generate hash\n", __func__);
@@ -112,29 +139,22 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
static int cifs_calc_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server, char *signature)
{
- int rc;
+ struct md5_ctx ctx;
if (!rqst->rq_iov || !signature || !server)
return -EINVAL;
-
- rc = cifs_alloc_hash("md5", &server->secmech.md5);
- if (rc)
- return -1;
-
- rc = crypto_shash_init(server->secmech.md5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init md5\n", __func__);
- return rc;
+ if (fips_enabled) {
+ cifs_dbg(VFS,
+ "MD5 signature support is disabled due to FIPS\n");
+ return -EOPNOTSUPP;
}
- rc = crypto_shash_update(server->secmech.md5,
- server->session_key.response, server->session_key.len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
- return rc;
- }
+ md5_init(&ctx);
+ md5_update(&ctx, server->session_key.response, server->session_key.len);
- return __cifs_calc_signature(rqst, server, signature, server->secmech.md5);
+ return __cifs_calc_signature(
+ rqst, server, signature,
+ &(struct cifs_calc_sig_ctx){ .md5 = &ctx });
}
/* must be called with server->srv_mutex held */
@@ -405,11 +425,11 @@ static __le64 find_timestamp(struct cifs_ses *ses)
}
static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
- const struct nls_table *nls_cp, struct shash_desc *hmacmd5)
+ const struct nls_table *nls_cp)
{
- int rc = 0;
int len;
char nt_hash[CIFS_NTHASH_SIZE];
+ struct hmac_md5_ctx hmac_ctx;
__le16 *user;
wchar_t *domain;
wchar_t *server;
@@ -417,17 +437,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash, nls_cp);
- rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc);
- return rc;
- }
-
- rc = crypto_shash_init(hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
- return rc;
- }
+ hmac_md5_init_usingrawkey(&hmac_ctx, nt_hash, CIFS_NTHASH_SIZE);
/* convert ses->user_name to unicode */
len = ses->user_name ? strlen(ses->user_name) : 0;
@@ -442,12 +452,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
*(u16 *)user = 0;
}
- rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len);
+ hmac_md5_update(&hmac_ctx, (const u8 *)user, 2 * len);
kfree(user);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc);
- return rc;
- }
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
@@ -459,12 +465,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len,
nls_cp);
- rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len);
+ hmac_md5_update(&hmac_ctx, (const u8 *)domain, 2 * len);
kfree(domain);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc);
- return rc;
- }
} else {
/* We use ses->ip_addr if no domain name available */
len = strlen(ses->ip_addr);
@@ -474,25 +476,16 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
return -ENOMEM;
len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp);
- rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len);
+ hmac_md5_update(&hmac_ctx, (const u8 *)server, 2 * len);
kfree(server);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc);
- return rc;
- }
}
- rc = crypto_shash_final(hmacmd5, ntlmv2_hash);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
-
- return rc;
+ hmac_md5_final(&hmac_ctx, ntlmv2_hash);
+ return 0;
}
-static int
-CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5)
+static void CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
{
- int rc;
struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *)
(ses->auth_key.response + CIFS_SESS_KEY_SIZE);
unsigned int hash_len;
@@ -501,35 +494,15 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_
hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE +
offsetof(struct ntlmv2_resp, challenge.key[0]));
- rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc);
- return rc;
- }
-
- rc = crypto_shash_init(hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
- return rc;
- }
-
if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED)
memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
else
memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
- rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc);
- return rc;
- }
-
- /* Note that the MD5 digest over writes anon.challenge_key.key */
- rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
-
- return rc;
+ /* Note that the HMAC-MD5 value overwrites ntlmv2->challenge.key */
+ hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE,
+ ntlmv2->challenge.key, hash_len,
+ ntlmv2->ntlmv2_hash);
}
/*
@@ -586,7 +559,6 @@ out:
int
setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
{
- struct shash_desc *hmacmd5 = NULL;
unsigned char *tiblob = NULL; /* target info blob */
struct ntlmv2_resp *ntlmv2;
char ntlmv2_hash[16];
@@ -657,51 +629,29 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
ntlmv2->client_chal = cc;
ntlmv2->reserved2 = 0;
- rc = cifs_alloc_hash("hmac(md5)", &hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc);
+ if (fips_enabled) {
+ cifs_dbg(VFS, "NTLMv2 support is disabled due to FIPS\n");
+ rc = -EOPNOTSUPP;
goto unlock;
}
/* calculate ntlmv2_hash */
- rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5);
+ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
if (rc) {
cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc);
goto unlock;
}
/* calculate first part of the client response (CR1) */
- rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc);
- goto unlock;
- }
+ CalcNTLMv2_response(ses, ntlmv2_hash);
/* now calculate the session key for NTLMv2 */
- rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc);
- goto unlock;
- }
-
- rc = crypto_shash_init(hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
- goto unlock;
- }
-
- rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc);
- goto unlock;
- }
-
- rc = crypto_shash_final(hmacmd5, ses->auth_key.response);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
+ hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE,
+ ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE,
+ ses->auth_key.response);
+ rc = 0;
unlock:
cifs_server_unlock(ses->server);
- cifs_free_hash(&hmacmd5);
setup_ntlmv2_rsp_ret:
kfree_sensitive(tiblob);
@@ -743,9 +693,6 @@ void
cifs_crypto_secmech_release(struct TCP_Server_Info *server)
{
cifs_free_hash(&server->secmech.aes_cmac);
- cifs_free_hash(&server->secmech.hmacsha256);
- cifs_free_hash(&server->secmech.md5);
- cifs_free_hash(&server->secmech.sha512);
if (server->secmech.enc) {
crypto_free_aead(server->secmech.enc);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 05b1fa76e8cc..4f959f1e08d2 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -2139,13 +2139,9 @@ MODULE_DESCRIPTION
"also older servers complying with the SNIA CIFS Specification)");
MODULE_VERSION(CIFS_VERSION);
MODULE_SOFTDEP("ecb");
-MODULE_SOFTDEP("hmac");
-MODULE_SOFTDEP("md5");
MODULE_SOFTDEP("nls");
MODULE_SOFTDEP("aes");
MODULE_SOFTDEP("cmac");
-MODULE_SOFTDEP("sha256");
-MODULE_SOFTDEP("sha512");
MODULE_SOFTDEP("aead2");
MODULE_SOFTDEP("ccm");
MODULE_SOFTDEP("gcm");
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 8f6f567d7474..16a00a61fd2c 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -24,6 +24,7 @@
#include "cifsacl.h"
#include <crypto/internal/hash.h>
#include <uapi/linux/cifs/cifs_mount.h>
+#include "../common/cifsglob.h"
#include "../common/smb2pdu.h"
#include "smb2pdu.h"
#include <linux/filelock.h>
@@ -221,9 +222,6 @@ struct session_key {
/* crypto hashing related structure/fields, not specific to a sec mech */
struct cifs_secmech {
- struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */
- struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */
- struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */
struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */
struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */
@@ -702,12 +700,6 @@ get_rfc1002_length(void *buf)
return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
}
-static inline void
-inc_rfc1001_len(void *buf, int count)
-{
- be32_add_cpu((__be32 *)buf, count);
-}
-
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
@@ -1021,8 +1013,6 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4)
-#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
-
/*
* Windows only supports a max of 60kb reads and 65535 byte writes. Default to
* those values when posix extensions aren't in force. In actuality here, we
@@ -2148,30 +2138,20 @@ extern mempool_t cifs_io_request_pool;
extern mempool_t cifs_io_subrequest_pool;
/* Operations for different SMB versions */
-#define SMB1_VERSION_STRING "1.0"
-#define SMB20_VERSION_STRING "2.0"
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
extern struct smb_version_operations smb1_operations;
extern struct smb_version_values smb1_values;
extern struct smb_version_operations smb20_operations;
extern struct smb_version_values smb20_values;
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
-#define SMB21_VERSION_STRING "2.1"
extern struct smb_version_operations smb21_operations;
extern struct smb_version_values smb21_values;
-#define SMBDEFAULT_VERSION_STRING "default"
extern struct smb_version_values smbdefault_values;
-#define SMB3ANY_VERSION_STRING "3"
extern struct smb_version_values smb3any_values;
-#define SMB30_VERSION_STRING "3.0"
extern struct smb_version_operations smb30_operations;
extern struct smb_version_values smb30_values;
-#define SMB302_VERSION_STRING "3.02"
-#define ALT_SMB302_VERSION_STRING "3.0.2"
/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
extern struct smb_version_values smb302_values;
-#define SMB311_VERSION_STRING "3.1.1"
-#define ALT_SMB311_VERSION_STRING "3.11"
extern struct smb_version_operations smb311_operations;
extern struct smb_version_values smb311_values;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index e8fba98690ce..4976be2c47c1 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -632,9 +632,13 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const unsigned char *path, char *pbuf,
unsigned int *pbytes_written);
-int __cifs_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server, char *signature,
- struct shash_desc *shash);
+struct cifs_calc_sig_ctx {
+ struct md5_ctx *md5;
+ struct hmac_sha256_ctx *hmac;
+ struct shash_desc *shash;
+};
+int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+ char *signature, struct cifs_calc_sig_ctx *ctx);
enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 239dd84a336f..098a79b7a959 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -2431,8 +2431,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
tcon = tlink_tcon(tlink);
server = tcon->ses->server;
- if (!server->ops->rename)
- return -ENOSYS;
+ if (!server->ops->rename) {
+ rc = -ENOSYS;
+ goto do_rename_exit;
+ }
/* try path-based rename first */
rc = server->ops->rename(xid, tcon, from_dentry,
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index fe80e711cd75..70f3c0c67eeb 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -5,6 +5,7 @@
* Author(s): Steve French (sfrench@us.ibm.com)
*
*/
+#include <crypto/md5.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/slab.h>
@@ -37,23 +38,6 @@
#define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash
static int
-symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
-{
- int rc;
- struct shash_desc *md5 = NULL;
-
- rc = cifs_alloc_hash("md5", &md5);
- if (rc)
- return rc;
-
- rc = crypto_shash_digest(md5, link_str, link_len, md5_hash);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
- cifs_free_hash(&md5);
- return rc;
-}
-
-static int
parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
char **_link_str)
{
@@ -77,11 +61,7 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
return -EINVAL;
- rc = symlink_hash(link_len, link_str, md5_hash);
- if (rc) {
- cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
- return rc;
- }
+ md5(link_str, link_len, md5_hash);
scnprintf(md5_str2, sizeof(md5_str2),
CIFS_MF_SYMLINK_MD5_FORMAT,
@@ -103,7 +83,6 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
static int
format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str)
{
- int rc;
unsigned int link_len;
unsigned int ofs;
u8 md5_hash[16];
@@ -116,11 +95,7 @@ format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str)
if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
return -ENAMETOOLONG;
- rc = symlink_hash(link_len, link_str, md5_hash);
- if (rc) {
- cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
- return rc;
- }
+ md5(link_str, link_len, md5_hash);
scnprintf(buf, buf_len,
CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT,
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index dda6dece802a..e10123d8cd7d 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -916,6 +916,14 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
char *data_end;
struct dfs_referral_level_3 *ref;
+ if (rsp_size < sizeof(*rsp)) {
+ cifs_dbg(VFS | ONCE,
+ "%s: header is malformed (size is %u, must be %zu)\n",
+ __func__, rsp_size, sizeof(*rsp));
+ rc = -EINVAL;
+ goto parse_DFS_referrals_exit;
+ }
+
*num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals);
if (*num_of_nodes < 1) {
@@ -925,6 +933,15 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
goto parse_DFS_referrals_exit;
}
+ if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) {
+ cifs_dbg(VFS | ONCE,
+ "%s: malformed buffer (size is %u, must be at least %zu)\n",
+ __func__, rsp_size,
+ sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3));
+ rc = -EINVAL;
+ goto parse_DFS_referrals_exit;
+ }
+
ref = (struct dfs_referral_level_3 *) &(rsp->referrals);
if (ref->VersionNumber != cpu_to_le16(3)) {
cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 0a8c2fcc9ded..ef3b498b0a02 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -584,7 +584,7 @@ cifs_ses_add_channel(struct cifs_ses *ses,
* to sign packets before we generate the channel signing key
* (we sign with the session key)
*/
- rc = smb311_crypto_shash_allocate(chan->server);
+ rc = smb3_crypto_shash_allocate(chan->server);
if (rc) {
cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
mutex_unlock(&ses->session_mutex);
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 89d933b4a8bc..96bfe4c63ccf 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -7,6 +7,7 @@
* Pavel Shilovsky (pshilovsky@samba.org) 2012
*
*/
+#include <crypto/sha2.h>
#include <linux/ctype.h>
#include "cifsglob.h"
#include "cifsproto.h"
@@ -888,13 +889,13 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve
* @iov: array containing the SMB request we will send to the server
* @nvec: number of array entries for the iov
*/
-int
+void
smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
struct kvec *iov, int nvec)
{
- int i, rc;
+ int i;
struct smb2_hdr *hdr;
- struct shash_desc *sha512 = NULL;
+ struct sha512_ctx sha_ctx;
hdr = (struct smb2_hdr *)iov[0].iov_base;
/* neg prot are always taken */
@@ -907,52 +908,22 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
* and we can test it. Preauth requires 3.1.1 for now.
*/
if (server->dialect != SMB311_PROT_ID)
- return 0;
+ return;
if (hdr->Command != SMB2_SESSION_SETUP)
- return 0;
+ return;
/* skip last sess setup response */
if ((hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
&& (hdr->Status == NT_STATUS_OK
|| (hdr->Status !=
cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))))
- return 0;
+ return;
ok:
- rc = smb311_crypto_shash_allocate(server);
- if (rc)
- return rc;
-
- sha512 = server->secmech.sha512;
- rc = crypto_shash_init(sha512);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__);
- return rc;
- }
-
- rc = crypto_shash_update(sha512, ses->preauth_sha_hash,
- SMB2_PREAUTH_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__);
- return rc;
- }
-
- for (i = 0; i < nvec; i++) {
- rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update sha512 shash\n",
- __func__);
- return rc;
- }
- }
-
- rc = crypto_shash_final(sha512, ses->preauth_sha_hash);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n",
- __func__);
- return rc;
- }
-
- return 0;
+ sha512_init(&sha_ctx);
+ sha512_update(&sha_ctx, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE);
+ for (i = 0; i < nvec; i++)
+ sha512_update(&sha_ctx, iov[i].iov_base, iov[i].iov_len);
+ sha512_final(&sha_ctx, ses->preauth_sha_hash);
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 7c392cf5940b..95cd484cfbba 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -3212,8 +3212,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path) {
rc = -ENOMEM;
- free_xid(xid);
- return ERR_PTR(rc);
+ goto put_tlink;
}
oparms = (struct cifs_open_parms) {
@@ -3245,6 +3244,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
}
+put_tlink:
cifs_put_tlink(tlink);
free_xid(xid);
@@ -3285,8 +3285,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen,
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path) {
rc = -ENOMEM;
- free_xid(xid);
- return rc;
+ goto put_tlink;
}
oparms = (struct cifs_open_parms) {
@@ -3307,6 +3306,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
}
+put_tlink:
cifs_put_tlink(tlink);
free_xid(xid);
return rc;
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index b3f1398c9f79..6eb86d134abc 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -295,10 +295,10 @@ extern int smb2_validate_and_copy_iov(unsigned int offset,
extern void smb2_copy_fs_info_to_kstatfs(
struct smb2_fs_full_size_info *pfs_inf,
struct kstatfs *kst);
-extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
-extern int smb311_update_preauth_hash(struct cifs_ses *ses,
- struct TCP_Server_Info *server,
- struct kvec *iov, int nvec);
+extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server);
+extern void smb311_update_preauth_hash(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ struct kvec *iov, int nvec);
extern int smb2_query_info_compound(const unsigned int xid,
struct cifs_tcon *tcon,
const char *path, u32 desired_access,
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 33f33013b392..ad6068e17a2a 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -19,6 +19,7 @@
#include <linux/mempool.h>
#include <linux/highmem.h>
#include <crypto/aead.h>
+#include <crypto/sha2.h>
#include "cifsglob.h"
#include "cifsproto.h"
#include "smb2proto.h"
@@ -26,53 +27,14 @@
#include "../common/smb2status.h"
#include "smb2glob.h"
-static int
+int
smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
{
struct cifs_secmech *p = &server->secmech;
- int rc;
-
- rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
- if (rc)
- goto err;
- rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
- if (rc)
- goto err;
-
- return 0;
-err:
- cifs_free_hash(&p->hmacsha256);
- return rc;
+ return cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
}
-int
-smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
-{
- struct cifs_secmech *p = &server->secmech;
- int rc = 0;
-
- rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
- if (rc)
- return rc;
-
- rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
- if (rc)
- goto err;
-
- rc = cifs_alloc_hash("sha512", &p->sha512);
- if (rc)
- goto err;
-
- return 0;
-
-err:
- cifs_free_hash(&p->aes_cmac);
- cifs_free_hash(&p->hmacsha256);
- return rc;
-}
-
-
static
int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
{
@@ -253,10 +215,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
{
int rc;
unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
- unsigned char *sigptr = smb2_signature;
struct kvec *iov = rqst->rq_iov;
struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base;
- struct shash_desc *shash = NULL;
+ struct hmac_sha256_ctx hmac_ctx;
struct smb_rqst drqst;
__u64 sid = le64_to_cpu(shdr->SessionId);
u8 key[SMB2_NTLMV2_SESSKEY_SIZE];
@@ -271,30 +232,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
- if (allocate_crypto) {
- rc = cifs_alloc_hash("hmac(sha256)", &shash);
- if (rc) {
- cifs_server_dbg(VFS,
- "%s: sha256 alloc failed\n", __func__);
- goto out;
- }
- } else {
- shash = server->secmech.hmacsha256;
- }
-
- rc = crypto_shash_setkey(shash->tfm, key, sizeof(key));
- if (rc) {
- cifs_server_dbg(VFS,
- "%s: Could not update with response\n",
- __func__);
- goto out;
- }
-
- rc = crypto_shash_init(shash);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not init sha256", __func__);
- goto out;
- }
+ hmac_sha256_init_usingrawkey(&hmac_ctx, key, sizeof(key));
/*
* For SMB2+, __cifs_calc_signature() expects to sign only the actual
@@ -305,25 +243,17 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
*/
drqst = *rqst;
if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) {
- rc = crypto_shash_update(shash, iov[0].iov_base,
- iov[0].iov_len);
- if (rc) {
- cifs_server_dbg(VFS,
- "%s: Could not update with payload\n",
- __func__);
- goto out;
- }
+ hmac_sha256_update(&hmac_ctx, iov[0].iov_base, iov[0].iov_len);
drqst.rq_iov++;
drqst.rq_nvec--;
}
- rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
+ rc = __cifs_calc_signature(
+ &drqst, server, smb2_signature,
+ &(struct cifs_calc_sig_ctx){ .hmac = &hmac_ctx });
if (!rc)
- memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+ memcpy(shdr->Signature, smb2_signature, SMB2_SIGNATURE_SIZE);
-out:
- if (allocate_crypto)
- cifs_free_hash(&shash);
return rc;
}
@@ -336,8 +266,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
__u8 L256[4] = {0, 0, 1, 0};
int rc = 0;
unsigned char prfhash[SMB2_HMACSHA256_SIZE];
- unsigned char *hashptr = prfhash;
struct TCP_Server_Info *server = ses->server;
+ struct hmac_sha256_ctx hmac_ctx;
memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
memset(key, 0x0, key_size);
@@ -345,67 +275,26 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
rc = smb3_crypto_shash_allocate(server);
if (rc) {
cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_setkey(server->secmech.hmacsha256->tfm,
- ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not set with session key\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_init(server->secmech.hmacsha256);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not init sign hmac\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_update(server->secmech.hmacsha256, i, 4);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with n\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_update(server->secmech.hmacsha256, label.iov_base, label.iov_len);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with label\n", __func__);
- goto smb3signkey_ret;
+ return rc;
}
- rc = crypto_shash_update(server->secmech.hmacsha256, &zero, 1);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with zero\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_update(server->secmech.hmacsha256, context.iov_base, context.iov_len);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with context\n", __func__);
- goto smb3signkey_ret;
- }
+ hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ hmac_sha256_update(&hmac_ctx, i, 4);
+ hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len);
+ hmac_sha256_update(&hmac_ctx, &zero, 1);
+ hmac_sha256_update(&hmac_ctx, context.iov_base, context.iov_len);
if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
- rc = crypto_shash_update(server->secmech.hmacsha256, L256, 4);
+ hmac_sha256_update(&hmac_ctx, L256, 4);
} else {
- rc = crypto_shash_update(server->secmech.hmacsha256, L128, 4);
- }
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__);
- goto smb3signkey_ret;
+ hmac_sha256_update(&hmac_ctx, L128, 4);
}
+ hmac_sha256_final(&hmac_ctx, prfhash);
- rc = crypto_shash_final(server->secmech.hmacsha256, hashptr);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
- goto smb3signkey_ret;
- }
-
- memcpy(key, hashptr, key_size);
-
-smb3signkey_ret:
- return rc;
+ memcpy(key, prfhash, key_size);
+ return 0;
}
struct derivation {
@@ -582,7 +471,6 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
{
int rc;
unsigned char smb3_signature[SMB2_CMACAES_SIZE];
- unsigned char *sigptr = smb3_signature;
struct kvec *iov = rqst->rq_iov;
struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base;
struct shash_desc *shash = NULL;
@@ -643,9 +531,11 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
drqst.rq_nvec--;
}
- rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
+ rc = __cifs_calc_signature(
+ &drqst, server, smb3_signature,
+ &(struct cifs_calc_sig_ctx){ .shash = shash });
if (!rc)
- memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+ memcpy(shdr->Signature, smb3_signature, SMB2_SIGNATURE_SIZE);
out:
if (allocate_crypto)
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 316f398c70f4..49e2df3ad1f0 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -1575,12 +1575,12 @@ void smbd_destroy(struct TCP_Server_Info *server)
disable_work_sync(&sc->disconnect_work);
log_rdma_event(INFO, "destroying rdma session\n");
- if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) {
+ if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING)
smbd_disconnect_rdma_work(&sc->disconnect_work);
+ if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) {
log_rdma_event(INFO, "wait for transport being disconnected\n");
- wait_event_interruptible(
- sc->status_wait,
- sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
+ wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
+ log_rdma_event(INFO, "waited for transport being disconnected\n");
}
/*
@@ -1624,19 +1624,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "free receive buffers\n");
destroy_receive_buffers(sc);
- /*
- * For performance reasons, memory registration and deregistration
- * are not locked by srv_mutex. It is possible some processes are
- * blocked on transport srv_mutex while holding memory registration.
- * Release the transport srv_mutex to allow them to hit the failure
- * path when sending data, and then release memory registrations.
- */
log_rdma_event(INFO, "freeing mr list\n");
- while (atomic_read(&sc->mr_io.used.count)) {
- cifs_server_unlock(server);
- msleep(1000);
- cifs_server_lock(server);
- }
destroy_mr_list(sc);
ib_free_cq(sc->ib.send_cq);
@@ -2352,18 +2340,84 @@ static void smbd_mr_recovery_work(struct work_struct *work)
}
}
+static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr)
+{
+ struct smbdirect_socket *sc = mr->socket;
+
+ lockdep_assert_held(&mr->mutex);
+
+ if (mr->state == SMBDIRECT_MR_DISABLED)
+ return;
+
+ if (mr->mr)
+ ib_dereg_mr(mr->mr);
+ if (mr->sgt.nents)
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
+ kfree(mr->sgt.sgl);
+
+ mr->mr = NULL;
+ mr->sgt.sgl = NULL;
+ mr->sgt.nents = 0;
+
+ mr->state = SMBDIRECT_MR_DISABLED;
+}
+
+static void smbd_mr_free_locked(struct kref *kref)
+{
+ struct smbdirect_mr_io *mr =
+ container_of(kref, struct smbdirect_mr_io, kref);
+
+ lockdep_assert_held(&mr->mutex);
+
+ /*
+ * smbd_mr_disable_locked() should already be called!
+ */
+ if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED))
+ smbd_mr_disable_locked(mr);
+
+ mutex_unlock(&mr->mutex);
+ mutex_destroy(&mr->mutex);
+ kfree(mr);
+}
+
static void destroy_mr_list(struct smbdirect_socket *sc)
{
struct smbdirect_mr_io *mr, *tmp;
+ LIST_HEAD(all_list);
+ unsigned long flags;
disable_work_sync(&sc->mr_io.recovery_work);
- list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) {
- if (mr->state == SMBDIRECT_MR_INVALIDATED)
- ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl,
- mr->sgt.nents, mr->dir);
- ib_dereg_mr(mr->mr);
- kfree(mr->sgt.sgl);
- kfree(mr);
+
+ spin_lock_irqsave(&sc->mr_io.all.lock, flags);
+ list_splice_tail_init(&sc->mr_io.all.list, &all_list);
+ spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
+
+ list_for_each_entry_safe(mr, tmp, &all_list, list) {
+ mutex_lock(&mr->mutex);
+
+ smbd_mr_disable_locked(mr);
+ list_del(&mr->list);
+ mr->socket = NULL;
+
+ /*
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock.
+ *
+ * If the mr is still registered it will
+ * be dangling (detached from the connection
+ * waiting for smbd_deregister_mr() to be
+ * called in order to free the memory.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
}
}
@@ -2377,10 +2431,9 @@ static void destroy_mr_list(struct smbdirect_socket *sc)
static int allocate_mr_list(struct smbdirect_socket *sc)
{
struct smbdirect_socket_parameters *sp = &sc->parameters;
- int i;
- struct smbdirect_mr_io *smbdirect_mr, *tmp;
-
- INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work);
+ struct smbdirect_mr_io *mr;
+ int ret;
+ u32 i;
if (sp->responder_resources == 0) {
log_rdma_mr(ERR, "responder_resources negotiated as 0\n");
@@ -2389,42 +2442,52 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
/* Allocate more MRs (2x) than hardware responder_resources */
for (i = 0; i < sp->responder_resources * 2; i++) {
- smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
- if (!smbdirect_mr)
- goto cleanup_entries;
- smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type,
- sp->max_frmr_depth);
- if (IS_ERR(smbdirect_mr->mr)) {
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto kzalloc_mr_failed;
+ }
+
+ kref_init(&mr->kref);
+ mutex_init(&mr->mutex);
+
+ mr->mr = ib_alloc_mr(sc->ib.pd,
+ sc->mr_io.type,
+ sp->max_frmr_depth);
+ if (IS_ERR(mr->mr)) {
+ ret = PTR_ERR(mr->mr);
log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
sc->mr_io.type, sp->max_frmr_depth);
- goto out;
+ goto ib_alloc_mr_failed;
}
- smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth,
- sizeof(struct scatterlist),
- GFP_KERNEL);
- if (!smbdirect_mr->sgt.sgl) {
+
+ mr->sgt.sgl = kcalloc(sp->max_frmr_depth,
+ sizeof(struct scatterlist),
+ GFP_KERNEL);
+ if (!mr->sgt.sgl) {
+ ret = -ENOMEM;
log_rdma_mr(ERR, "failed to allocate sgl\n");
- ib_dereg_mr(smbdirect_mr->mr);
- goto out;
+ goto kcalloc_sgl_failed;
}
- smbdirect_mr->state = SMBDIRECT_MR_READY;
- smbdirect_mr->socket = sc;
+ mr->state = SMBDIRECT_MR_READY;
+ mr->socket = sc;
- list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list);
+ list_add_tail(&mr->list, &sc->mr_io.all.list);
atomic_inc(&sc->mr_io.ready.count);
}
+
+ INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work);
+
return 0;
-out:
- kfree(smbdirect_mr);
-cleanup_entries:
- list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) {
- list_del(&smbdirect_mr->list);
- ib_dereg_mr(smbdirect_mr->mr);
- kfree(smbdirect_mr->sgt.sgl);
- kfree(smbdirect_mr);
- }
- return -ENOMEM;
+kcalloc_sgl_failed:
+ ib_dereg_mr(mr->mr);
+ib_alloc_mr_failed:
+ mutex_destroy(&mr->mutex);
+ kfree(mr);
+kzalloc_mr_failed:
+ destroy_mr_list(sc);
+ return ret;
}
/*
@@ -2458,6 +2521,7 @@ again:
list_for_each_entry(ret, &sc->mr_io.all.list, list) {
if (ret->state == SMBDIRECT_MR_READY) {
ret->state = SMBDIRECT_MR_REGISTERED;
+ kref_get(&ret->kref);
spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
atomic_dec(&sc->mr_io.ready.count);
atomic_inc(&sc->mr_io.used.count);
@@ -2504,9 +2568,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
{
struct smbdirect_socket *sc = &info->socket;
struct smbdirect_socket_parameters *sp = &sc->parameters;
- struct smbdirect_mr_io *smbdirect_mr;
+ struct smbdirect_mr_io *mr;
int rc, num_pages;
- enum dma_data_direction dir;
struct ib_reg_wr *reg_wr;
num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1);
@@ -2517,49 +2580,47 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
return NULL;
}
- smbdirect_mr = get_mr(sc);
- if (!smbdirect_mr) {
+ mr = get_mr(sc);
+ if (!mr) {
log_rdma_mr(ERR, "get_mr returning NULL\n");
return NULL;
}
- dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- smbdirect_mr->dir = dir;
- smbdirect_mr->need_invalidate = need_invalidate;
- smbdirect_mr->sgt.nents = 0;
- smbdirect_mr->sgt.orig_nents = 0;
+ mutex_lock(&mr->mutex);
+
+ mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ mr->need_invalidate = need_invalidate;
+ mr->sgt.nents = 0;
+ mr->sgt.orig_nents = 0;
log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n",
num_pages, iov_iter_count(iter), sp->max_frmr_depth);
- smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth);
+ smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth);
- rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents, dir);
+ rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
if (!rc) {
log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
- num_pages, dir, rc);
+ num_pages, mr->dir, rc);
goto dma_map_error;
}
- rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents, NULL, PAGE_SIZE);
- if (rc != smbdirect_mr->sgt.nents) {
+ rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE);
+ if (rc != mr->sgt.nents) {
log_rdma_mr(ERR,
- "ib_map_mr_sg failed rc = %d nents = %x\n",
- rc, smbdirect_mr->sgt.nents);
+ "ib_map_mr_sg failed rc = %d nents = %x\n",
+ rc, mr->sgt.nents);
goto map_mr_error;
}
- ib_update_fast_reg_key(smbdirect_mr->mr,
- ib_inc_rkey(smbdirect_mr->mr->rkey));
- reg_wr = &smbdirect_mr->wr;
+ ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
+ reg_wr = &mr->wr;
reg_wr->wr.opcode = IB_WR_REG_MR;
- smbdirect_mr->cqe.done = register_mr_done;
- reg_wr->wr.wr_cqe = &smbdirect_mr->cqe;
+ mr->cqe.done = register_mr_done;
+ reg_wr->wr.wr_cqe = &mr->cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = IB_SEND_SIGNALED;
- reg_wr->mr = smbdirect_mr->mr;
- reg_wr->key = smbdirect_mr->mr->rkey;
+ reg_wr->mr = mr->mr;
+ reg_wr->key = mr->mr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -2570,24 +2631,51 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
* on the next ib_post_send when we actually send I/O to remote peer
*/
rc = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL);
- if (!rc)
- return smbdirect_mr;
+ if (!rc) {
+ /*
+ * get_mr() gave us a reference
+ * via kref_get(&mr->kref), we keep that and let
+ * the caller use smbd_deregister_mr()
+ * to remove it again.
+ */
+ mutex_unlock(&mr->mutex);
+ return mr;
+ }
log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
rc, reg_wr->key);
/* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/
map_mr_error:
- ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents, smbdirect_mr->dir);
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
dma_map_error:
- smbdirect_mr->state = SMBDIRECT_MR_ERROR;
+ mr->sgt.nents = 0;
+ mr->state = SMBDIRECT_MR_ERROR;
if (atomic_dec_and_test(&sc->mr_io.used.count))
wake_up(&sc->mr_io.cleanup.wait_queue);
smbd_disconnect_rdma_connection(sc);
+ /*
+ * get_mr() gave us a reference
+ * via kref_get(&mr->kref), we need to remove it again
+ * on error.
+ *
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
+
return NULL;
}
@@ -2612,44 +2700,55 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
* and we have to locally invalidate the buffer to prevent data is being
* modified by remote peer after upper layer consumes it
*/
-int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr)
+void smbd_deregister_mr(struct smbdirect_mr_io *mr)
{
- struct ib_send_wr *wr;
- struct smbdirect_socket *sc = smbdirect_mr->socket;
- int rc = 0;
+ struct smbdirect_socket *sc = mr->socket;
+
+ mutex_lock(&mr->mutex);
+ if (mr->state == SMBDIRECT_MR_DISABLED)
+ goto put_kref;
+
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+ smbd_mr_disable_locked(mr);
+ goto put_kref;
+ }
+
+ if (mr->need_invalidate) {
+ struct ib_send_wr *wr = &mr->inv_wr;
+ int rc;
- if (smbdirect_mr->need_invalidate) {
/* Need to finish local invalidation before returning */
- wr = &smbdirect_mr->inv_wr;
wr->opcode = IB_WR_LOCAL_INV;
- smbdirect_mr->cqe.done = local_inv_done;
- wr->wr_cqe = &smbdirect_mr->cqe;
+ mr->cqe.done = local_inv_done;
+ wr->wr_cqe = &mr->cqe;
wr->num_sge = 0;
- wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey;
+ wr->ex.invalidate_rkey = mr->mr->rkey;
wr->send_flags = IB_SEND_SIGNALED;
- init_completion(&smbdirect_mr->invalidate_done);
+ init_completion(&mr->invalidate_done);
rc = ib_post_send(sc->ib.qp, wr, NULL);
if (rc) {
log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
+ smbd_mr_disable_locked(mr);
smbd_disconnect_rdma_connection(sc);
goto done;
}
- wait_for_completion(&smbdirect_mr->invalidate_done);
- smbdirect_mr->need_invalidate = false;
+ wait_for_completion(&mr->invalidate_done);
+ mr->need_invalidate = false;
} else
/*
* For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED
* and defer to mr_recovery_work to recover the MR for next use
*/
- smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED;
+ mr->state = SMBDIRECT_MR_INVALIDATED;
- if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) {
- ib_dma_unmap_sg(
- sc->ib.dev, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents,
- smbdirect_mr->dir);
- smbdirect_mr->state = SMBDIRECT_MR_READY;
+ if (mr->sgt.nents) {
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
+ mr->sgt.nents = 0;
+ }
+
+ if (mr->state == SMBDIRECT_MR_INVALIDATED) {
+ mr->state = SMBDIRECT_MR_READY;
if (atomic_inc_return(&sc->mr_io.ready.count) == 1)
wake_up(&sc->mr_io.ready.wait_queue);
} else
@@ -2663,7 +2762,23 @@ done:
if (atomic_dec_and_test(&sc->mr_io.used.count))
wake_up(&sc->mr_io.cleanup.wait_queue);
- return rc;
+put_kref:
+ /*
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock
+ * and keep the mr in SMBDIRECT_MR_READY or
+ * SMBDIRECT_MR_ERROR state.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
}
static bool smb_set_sge(struct smb_extract_to_rdma *rdma,
diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h
index d67ac5ddaff4..577d37dbeb8a 100644
--- a/fs/smb/client/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
@@ -60,7 +60,7 @@ int smbd_send(struct TCP_Server_Info *server,
struct smbdirect_mr_io *smbd_register_mr(
struct smbd_connection *info, struct iov_iter *iter,
bool writing, bool need_invalidate);
-int smbd_deregister_mr(struct smbdirect_mr_io *mr);
+void smbd_deregister_mr(struct smbdirect_mr_io *mr);
#else
#define cifs_rdma_enabled(server) 0
diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c
index b88fa04f5792..029910d56c22 100644
--- a/fs/smb/client/xattr.c
+++ b/fs/smb/client/xattr.c
@@ -178,7 +178,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
memcpy(pacl, value, size);
if (pTcon->ses->server->ops->set_acl) {
int aclflags = 0;
- rc = 0;
switch (handler->flags) {
case XATTR_CIFS_NTSD_FULL:
diff --git a/fs/smb/common/cifsglob.h b/fs/smb/common/cifsglob.h
new file mode 100644
index 000000000000..00fd215e3eb5
--- /dev/null
+++ b/fs/smb/common/cifsglob.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ *
+ * Copyright (C) International Business Machines Corp., 2002,2008
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Jeremy Allison (jra@samba.org)
+ *
+ */
+#ifndef _COMMON_CIFS_GLOB_H
+#define _COMMON_CIFS_GLOB_H
+
+static inline void inc_rfc1001_len(void *buf, int count)
+{
+ be32_add_cpu((__be32 *)buf, count);
+}
+
+#define SMB1_VERSION_STRING "1.0"
+#define SMB20_VERSION_STRING "2.0"
+#define SMB21_VERSION_STRING "2.1"
+#define SMBDEFAULT_VERSION_STRING "default"
+#define SMB3ANY_VERSION_STRING "3"
+#define SMB30_VERSION_STRING "3.0"
+#define SMB302_VERSION_STRING "3.02"
+#define ALT_SMB302_VERSION_STRING "3.0.2"
+#define SMB311_VERSION_STRING "3.1.1"
+#define ALT_SMB311_VERSION_STRING "3.11"
+
+#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
+
+#endif /* _COMMON_CIFS_GLOB_H */
diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h
index db22a1d0546b..361db7f9f623 100644
--- a/fs/smb/common/smbdirect/smbdirect_socket.h
+++ b/fs/smb/common/smbdirect/smbdirect_socket.h
@@ -437,13 +437,22 @@ enum smbdirect_mr_state {
SMBDIRECT_MR_READY,
SMBDIRECT_MR_REGISTERED,
SMBDIRECT_MR_INVALIDATED,
- SMBDIRECT_MR_ERROR
+ SMBDIRECT_MR_ERROR,
+ SMBDIRECT_MR_DISABLED
};
struct smbdirect_mr_io {
struct smbdirect_socket *socket;
struct ib_cqe cqe;
+ /*
+ * We can have up to two references:
+ * 1. by the connection
+ * 2. by the registration
+ */
+ struct kref kref;
+ struct mutex mutex;
+
struct list_head list;
enum smbdirect_mr_state state;
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index d742ba754348..863716207a0d 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -10,6 +10,7 @@
#include "glob.h"
#include "nterr.h"
+#include "../common/cifsglob.h"
#include "../common/smb2pdu.h"
#include "smb2pdu.h"
@@ -26,16 +27,8 @@
#define SMB311_PROT 6
#define BAD_PROT 0xFFFF
-#define SMB1_VERSION_STRING "1.0"
-#define SMB20_VERSION_STRING "2.0"
-#define SMB21_VERSION_STRING "2.1"
-#define SMB30_VERSION_STRING "3.0"
-#define SMB302_VERSION_STRING "3.02"
-#define SMB311_VERSION_STRING "3.1.1"
-
#define SMB_ECHO_INTERVAL (60 * HZ)
-#define CIFS_DEFAULT_IOSIZE (64 * 1024)
#define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */
#define MAX_STREAM_PROT_LEN 0x00FFFFFF
@@ -464,9 +457,4 @@ static inline unsigned int get_rfc1002_len(void *buf)
{
return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
}
-
-static inline void inc_rfc1001_len(void *buf, int count)
-{
- be32_add_cpu((__be32 *)buf, count);
-}
#endif /* __SMB_COMMON_H__ */
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 681cf0c8b9df..7310841f4512 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -51,8 +51,6 @@ struct arch_timer_vm_data {
};
struct arch_timer_context {
- struct kvm_vcpu *vcpu;
-
/* Emulated Timer (may be unused) */
struct hrtimer hrtimer;
u64 ns_frac;
@@ -71,6 +69,9 @@ struct arch_timer_context {
bool level;
} irq;
+ /* Who am I? */
+ enum kvm_arch_timers timer_id;
+
/* Duplicated state from arch_timer.c for convenience */
u32 host_timer_irq;
};
@@ -106,9 +107,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
void kvm_timer_init_vm(struct kvm *kvm);
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
-int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
-
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
@@ -127,9 +125,9 @@ void kvm_timer_init_vhe(void);
#define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER])
#define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER])
-#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers)
-
-#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data)
+#define arch_timer_ctx_index(ctx) ((ctx)->timer_id)
+#define timer_context_to_vcpu(ctx) container_of((ctx), struct kvm_vcpu, arch.timer_cpu.timers[(ctx)->timer_id])
+#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data))
#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
@@ -178,4 +176,14 @@ static inline u64 timer_get_offset(struct arch_timer_context *ctxt)
return offset;
}
+static inline void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
+{
+ if (!ctxt->offset.vm_offset) {
+ WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt));
+ return;
+ }
+
+ WRITE_ONCE(*ctxt->offset.vm_offset, offset);
+}
+
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fa36e70df088..5bd76cf394fa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -729,7 +729,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
#endif
#ifdef CONFIG_KVM_GUEST_MEMFD
-bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
+bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm);
+
+static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm)
+{
+ u64 flags = GUEST_MEMFD_FLAG_MMAP;
+
+ if (!kvm || kvm_arch_supports_gmem_init_shared(kvm))
+ flags |= GUEST_MEMFD_FLAG_INIT_SHARED;
+
+ return flags;
+}
#endif
#ifndef kvm_arch_has_readonly_mem
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d56583572c98..31463286402f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1659,6 +1659,7 @@ struct nfs_pgio_header {
void *netfs;
#endif
+ unsigned short retrans;
int pnfs_error;
int error; /* merge with pnfs_error */
unsigned int good_bytes; /* boundary of good data */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6efa98a57ec1..52f6000ab020 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -962,7 +962,7 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
-#define KVM_CAP_GUEST_MEMFD_MMAP 244
+#define KVM_CAP_GUEST_MEMFD_FLAGS 244
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1599,7 +1599,8 @@ struct kvm_memory_attributes {
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
-#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
+#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
+#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1)
struct kvm_create_guest_memfd {
__u64 size;
diff --git a/mm/slub.c b/mm/slub.c
index b1f15598fbfd..a8fcc7e6f25a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2170,8 +2170,15 @@ static inline void free_slab_obj_exts(struct slab *slab)
struct slabobj_ext *obj_exts;
obj_exts = slab_obj_exts(slab);
- if (!obj_exts)
+ if (!obj_exts) {
+ /*
+ * If obj_exts allocation failed, slab->obj_exts is set to
+ * OBJEXTS_ALLOC_FAIL. In this case, we end up here and should
+ * clear the flag.
+ */
+ slab->obj_exts = 0;
return;
+ }
/*
* obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
@@ -6443,15 +6450,16 @@ static void free_deferred_objects(struct irq_work *work)
slab = virt_to_slab(x);
s = slab->slab_cache;
+ /* Point 'x' back to the beginning of allocated object */
+ x -= s->offset;
+
/*
* We used freepointer in 'x' to link 'x' into df->objects.
* Clear it to NULL to avoid false positive detection
* of "Freepointer corruption".
*/
- *(void **)x = NULL;
+ set_freepointer(s, x, NULL);
- /* Point 'x' back to the beginning of allocated object */
- x -= s->offset;
__slab_free(s, slab, x, x, 1, _THIS_IP_);
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index 91906414a474..993c9e38e729 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -1020,7 +1020,7 @@ static void set_counter_defaults(void)
{
const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
uint64_t freq = read_sysreg(CNTFRQ_EL0);
- uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq);
+ int width = ilog2(MIN_ROLLOVER_SECS * freq);
width = clamp(width, 56, 64);
CVAL_MAX = GENMASK_ULL(width - 1, 0);
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
index 592b26ded779..d8fe17a6cc59 100644
--- a/tools/testing/selftests/kvm/arm64/external_aborts.c
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -359,6 +359,44 @@ static void test_mmio_ease(void)
kvm_vm_free(vm);
}
+static void test_serror_amo_guest(void)
+{
+ /*
+ * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+ * since the write is redirected to memory. But don't write (intentionally)
+ * broken code!
+ */
+ sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+ isb();
+
+ GUEST_SYNC(0);
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ /*
+ * KVM treats the effective value of AMO as 1 when
+ * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+ * unmasked.
+ */
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
int main(void)
{
test_mmio_abort();
@@ -369,4 +407,9 @@ int main(void)
test_serror_emulated();
test_mmio_ease();
test_s1ptw_abort();
+
+ if (!test_supports_el2())
+ return 0;
+
+ test_serror_amo();
}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index 011fad95dd02..c9b84eeaab6b 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -65,6 +65,9 @@ static struct feature_id_reg feat_id_regs[] = {
REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+ REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
};
bool filter_reg(__u64 reg)
@@ -345,9 +348,20 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
+
+ /*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
+ KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_TIMER_CNT,
+
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
@@ -755,6 +769,10 @@ static __u64 el2_regs[] = {
SYS_REG(VSESR_EL2),
};
+static __u64 el2_e2h0_regs[] = {
+ /* Empty */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -789,6 +807,15 @@ static __u64 el2_regs[] = {
.regs = el2_regs, \
.regs_n = ARRAY_SIZE(el2_regs), \
}
+#define EL2_E2H0_SUBLIST \
+ EL2_SUBLIST, \
+ { \
+ .name = "EL2 E2H0", \
+ .capability = KVM_CAP_ARM_EL2_E2H0, \
+ .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \
+ .regs = el2_e2h0_regs, \
+ .regs_n = ARRAY_SIZE(el2_e2h0_regs), \
+ }
static struct vcpu_reg_list vregs_config = {
.sublists = {
@@ -897,6 +924,65 @@ static struct vcpu_reg_list el2_pauth_pmu_config = {
},
};
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
@@ -911,5 +997,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
&el2_sve_pmu_config,
&el2_pauth_config,
&el2_pauth_pmu_config,
+
+ &el2_e2h0_vregs_config,
+ &el2_e2h0_vregs_pmu_config,
+ &el2_e2h0_sve_config,
+ &el2_e2h0_sve_pmu_config,
+ &el2_e2h0_pauth_config,
+ &el2_e2h0_pauth_pmu_config,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 8ff1e853f7f8..5e24f77868b5 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -249,11 +249,14 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_MPIDR_EL1);
+ GUEST_REG_SYNC(SYS_CLIDR_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
GUEST_REG_SYNC(SYS_MIDR_EL1);
GUEST_REG_SYNC(SYS_REVIDR_EL1);
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index 87922a89b134..687d04463983 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -123,6 +123,7 @@ static void guest_setup_gic(void)
static void guest_code(size_t nr_lpis)
{
guest_setup_gic();
+ local_irq_enable();
GUEST_SYNC(0);
@@ -331,7 +332,7 @@ static void setup_vm(void)
{
int i;
- vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index b3ca6737f304..e7d9aeb418d3 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -14,8 +14,6 @@
#include <linux/bitmap.h>
#include <linux/falloc.h>
#include <linux/sizes.h>
-#include <setjmp.h>
-#include <signal.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -24,7 +22,9 @@
#include "test_util.h"
#include "ucall_common.h"
-static void test_file_read_write(int fd)
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
{
char buf[64];
@@ -38,18 +38,22 @@ static void test_file_read_write(int fd)
"pwrite on a guest_mem fd should fail");
}
-static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
+static void test_mmap_cow(int fd, size_t size)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
{
const char val = 0xaa;
char *mem;
size_t i;
int ret;
- mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
- TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
-
- mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed.");
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
memset(mem, val, total_size);
for (i = 0; i < total_size; i++)
@@ -68,45 +72,37 @@ static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
for (i = 0; i < total_size; i++)
TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
- ret = munmap(mem, total_size);
- TEST_ASSERT(!ret, "munmap() should succeed.");
-}
-
-static sigjmp_buf jmpbuf;
-void fault_sigbus_handler(int signum)
-{
- siglongjmp(jmpbuf, 1);
+ kvm_munmap(mem, total_size);
}
-static void test_fault_overflow(int fd, size_t page_size, size_t total_size)
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
{
- struct sigaction sa_old, sa_new = {
- .sa_handler = fault_sigbus_handler,
- };
- size_t map_size = total_size * 4;
const char val = 0xaa;
char *mem;
size_t i;
- int ret;
- mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed.");
+ mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
- sigaction(SIGBUS, &sa_new, &sa_old);
- if (sigsetjmp(jmpbuf, 1) == 0) {
- memset(mem, 0xaa, map_size);
- TEST_ASSERT(false, "memset() should have triggered SIGBUS.");
- }
- sigaction(SIGBUS, &sa_old, NULL);
+ TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+ TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
- for (i = 0; i < total_size; i++)
+ for (i = 0; i < accessible_size; i++)
TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
- ret = munmap(mem, map_size);
- TEST_ASSERT(!ret, "munmap() should succeed.");
+ kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, 0, total_size);
}
-static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size)
+static void test_mmap_not_supported(int fd, size_t total_size)
{
char *mem;
@@ -117,7 +113,7 @@ static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(mem, MAP_FAILED);
}
-static void test_file_size(int fd, size_t page_size, size_t total_size)
+static void test_file_size(int fd, size_t total_size)
{
struct stat sb;
int ret;
@@ -128,7 +124,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
-static void test_fallocate(int fd, size_t page_size, size_t total_size)
+static void test_fallocate(int fd, size_t total_size)
{
int ret;
@@ -165,7 +161,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size)
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
}
-static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+static void test_invalid_punch_hole(int fd, size_t total_size)
{
struct {
off_t offset;
@@ -196,8 +192,7 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
}
static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
- uint64_t guest_memfd_flags,
- size_t page_size)
+ uint64_t guest_memfd_flags)
{
size_t size;
int fd;
@@ -214,7 +209,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
{
int fd1, fd2, ret;
struct stat st1, st2;
- size_t page_size = getpagesize();
fd1 = __vm_create_guest_memfd(vm, page_size, 0);
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
@@ -239,9 +233,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
close(fd1);
}
-static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags)
+static void test_guest_memfd_flags(struct kvm_vm *vm)
{
- size_t page_size = getpagesize();
+ uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
uint64_t flag;
int fd;
@@ -260,43 +254,57 @@ static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags)
}
}
-static void test_guest_memfd(unsigned long vm_type)
+#define gmem_test(__test, __vm, __flags) \
+do { \
+ int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \
+ \
+ test_##__test(fd, page_size * 4); \
+ close(fd); \
+} while (0)
+
+static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
{
- uint64_t flags = 0;
- struct kvm_vm *vm;
- size_t total_size;
- size_t page_size;
- int fd;
+ test_create_guest_memfd_multiple(vm);
+ test_create_guest_memfd_invalid_sizes(vm, flags);
- page_size = getpagesize();
- total_size = page_size * 4;
+ gmem_test(file_read_write, vm, flags);
- vm = vm_create_barebones_type(vm_type);
+ if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+ gmem_test(mmap_supported, vm, flags);
+ gmem_test(fault_overflow, vm, flags);
+ } else {
+ gmem_test(fault_private, vm, flags);
+ }
- if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP))
- flags |= GUEST_MEMFD_FLAG_MMAP;
+ gmem_test(mmap_cow, vm, flags);
+ } else {
+ gmem_test(mmap_not_supported, vm, flags);
+ }
- test_create_guest_memfd_multiple(vm);
- test_create_guest_memfd_invalid_sizes(vm, flags, page_size);
+ gmem_test(file_size, vm, flags);
+ gmem_test(fallocate, vm, flags);
+ gmem_test(invalid_punch_hole, vm, flags);
+}
- fd = vm_create_guest_memfd(vm, total_size, flags);
+static void test_guest_memfd(unsigned long vm_type)
+{
+ struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+ uint64_t flags;
- test_file_read_write(fd);
+ test_guest_memfd_flags(vm);
- if (flags & GUEST_MEMFD_FLAG_MMAP) {
- test_mmap_supported(fd, page_size, total_size);
- test_fault_overflow(fd, page_size, total_size);
- } else {
- test_mmap_not_supported(fd, page_size, total_size);
- }
+ __test_guest_memfd(vm, 0);
- test_file_size(fd, page_size, total_size);
- test_fallocate(fd, page_size, total_size);
- test_invalid_punch_hole(fd, page_size, total_size);
+ flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ if (flags & GUEST_MEMFD_FLAG_MMAP)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
- test_guest_memfd_flags(vm, flags);
+ /* MMAP should always be supported if INIT_SHARED is supported. */
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
- close(fd);
kvm_vm_free(vm);
}
@@ -328,22 +336,26 @@ static void test_guest_memfd_guest(void)
size_t size;
int fd, i;
- if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP))
+ if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
return;
vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
- TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP),
- "Default VM type should always support guest_memfd mmap()");
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+ "Default VM type should support MMAP, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+ "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
size = vm->page_size;
- fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP);
+ fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
- mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed");
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
memset(mem, 0xaa, size);
- munmap(mem, size);
+ kvm_munmap(mem, size);
virt_pg_map(vm, gpa, gpa);
vcpu_args_set(vcpu, 2, gpa, size);
@@ -351,8 +363,7 @@ static void test_guest_memfd_guest(void)
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
- mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed");
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
for (i = 0; i < size; i++)
TEST_ASSERT_EQ(mem[i], 0xff);
@@ -366,6 +377,8 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+ page_size = getpagesize();
+
/*
* Not all architectures support KVM_CAP_VM_TYPES. However, those that
* support guest_memfd have that support for the default VM type.
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 6f481475c135..ff928716574d 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -305,7 +305,17 @@ void test_wants_mte(void);
void test_disable_default_vgic(void);
bool vm_supports_el2(struct kvm_vm *vm);
-static bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+
+static inline bool test_supports_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ bool supported = vm_supports_el2(vm);
+
+ kvm_vm_free(vm);
+ return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
{
return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 26cc30290e76..d3f3e455c031 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -286,6 +286,31 @@ static inline bool kvm_has_cap(long cap)
#define __KVM_SYSCALL_ERROR(_name, _ret) \
"%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline void kvm_munmap(void *mem, size_t size)
+{
+ int ret;
+
+ ret = munmap(mem, size);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+}
+
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -1273,4 +1298,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
uint32_t guest_get_vcpuid(void);
+bool kvm_arch_has_default_irqchip(void);
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index c6ef895fbd9a..b4872ba8ed12 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_TEST_UTIL_H
#define SELFTEST_KVM_TEST_UTIL_H
+#include <setjmp.h>
+#include <signal.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -78,6 +80,23 @@ do { \
__builtin_unreachable(); \
} while (0)
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action) \
+do { \
+ struct sigaction sa_old, sa_new = { \
+ .sa_handler = expect_sigbus_handler, \
+ }; \
+ \
+ sigaction(SIGBUS, &sa_new, &sa_old); \
+ if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
+ action; \
+ TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
+ } \
+ sigaction(SIGBUS, &sa_old, NULL); \
+} while (0)
+
size_t parse_size(const char *size);
int64_t timespec_to_ns(struct timespec ts);
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
index 7c301b4c7005..5d7590d01868 100644
--- a/tools/testing/selftests/kvm/irqfd_test.c
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -89,11 +89,19 @@ static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
int main(int argc, char *argv[])
{
pthread_t racing_thread;
+ struct kvm_vcpu *unused;
int r, i;
- /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */
- vm1 = vm_create(1);
- vm2 = vm_create(1);
+ TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+ /*
+ * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+ * create an unused vCPU as certain architectures (like arm64) need to
+ * complete IRQ chip initialization after all possible vCPUs for a VM
+ * have been created.
+ */
+ vm1 = vm_create_with_one_vcpu(&unused, NULL);
+ vm2 = vm_create_with_one_vcpu(&unused, NULL);
WRITE_ONCE(__eventfd, kvm_new_eventfd());
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 369a4c87dd8f..54f6d17c78f7 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -725,3 +725,8 @@ void kvm_arch_vm_release(struct kvm_vm *vm)
if (vm->arch.has_gic)
close(vm->arch.gic_fd);
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6743fbd9bd67..1a93d6361671 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -741,13 +741,11 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
int ret;
if (vcpu->dirty_gfns) {
- ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->run, vcpu_mmap_sz());
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->run, vcpu_mmap_sz());
ret = close(vcpu->fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
@@ -783,20 +781,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
- int ret;
-
rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
rb_erase(&region->hva_node, &vm->regions.hva_tree);
hash_del(&region->slot_node);
sparsebit_free(&region->unused_phy_pages);
sparsebit_free(&region->protected_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_start, region->mmap_size);
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
- ret = munmap(region->mmap_alias, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_alias, region->mmap_size);
close(region->fd);
}
if (region->region.guest_memfd >= 0)
@@ -1053,12 +1047,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
- region->mmap_start = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_start != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1129,12 +1120,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
/* If shared memory, create an alias. */
if (region->fd >= 0) {
- region->mmap_alias = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_alias != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_alias = kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1344,10 +1333,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->run));
- vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
- PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->run != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd);
if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
@@ -1794,9 +1781,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
- page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+ addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
vcpu->dirty_gfns = addr;
vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
@@ -2344,3 +2330,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
pg = paddr >> vm->page_shift;
return sparsebit_is_set(region->protected_phy_pages, pg);
}
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+ return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..8ceeb17c819a 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 03eb99af9b8d..8a1848586a85 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -18,6 +18,13 @@
#include "test_util.h"
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+ siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
/*
* Random number generator that is usable from guest code. This is the
* Park-Miller LCG using standard constants.
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index c748cd9b2eef..b418502c5ecc 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1318,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void)
return ret;
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 6a437d2be9fa..37b7e6524533 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -339,8 +339,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
fd = kvm_memfd_alloc(slot_size, hugepages);
- mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+ mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
@@ -413,7 +412,7 @@ int main(int argc, char *argv[])
for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
- munmap(mem, slot_size / 2);
+ kvm_munmap(mem, slot_size / 2);
/* Sanity check that the vCPUs actually ran. */
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index 0350a8896a2f..f04768c1d2e4 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -10,6 +10,7 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <pthread.h>
/* Arbitrarily chosen values */
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
@@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa)
GUEST_DONE();
}
-static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
- u64 left)
+struct slot_worker_data {
+ struct kvm_vm *vm;
+ u64 gpa;
+ uint32_t flags;
+ bool worker_ready;
+ bool prefault_ready;
+ bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+ struct slot_worker_data *data = __data;
+ struct kvm_vm *vm = data->vm;
+
+ WRITE_ONCE(data->worker_ready, true);
+
+ while (!READ_ONCE(data->prefault_ready))
+ cpu_relax();
+
+ vm_mem_region_delete(vm, TEST_SLOT);
+
+ while (!READ_ONCE(data->recreate_slot))
+ cpu_relax();
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+ TEST_SLOT, TEST_NPAGES, data->flags);
+
+ return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+ u64 size, u64 expected_left, bool private)
{
struct kvm_pre_fault_memory range = {
- .gpa = gpa,
+ .gpa = base_gpa + offset,
.size = size,
.flags = 0,
};
- u64 prev;
+ struct slot_worker_data data = {
+ .vm = vcpu->vm,
+ .gpa = base_gpa,
+ .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+ };
+ bool slot_recreated = false;
+ pthread_t slot_worker;
int ret, save_errno;
+ u64 prev;
+
+ /*
+ * Concurrently delete (and recreate) the slot to test KVM's handling
+ * of a racing memslot deletion with prefaulting.
+ */
+ pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
- do {
+ while (!READ_ONCE(data.worker_ready))
+ cpu_relax();
+
+ WRITE_ONCE(data.prefault_ready, true);
+
+ for (;;) {
prev = range.size;
ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
save_errno = errno;
@@ -49,18 +98,65 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
"%sexpecting range.size to change on %s",
ret < 0 ? "not " : "",
ret < 0 ? "failure" : "success");
- } while (ret >= 0 ? range.size : save_errno == EINTR);
- TEST_ASSERT(range.size == left,
- "Completed with %lld bytes left, expected %" PRId64,
- range.size, left);
+ /*
+ * Immediately retry prefaulting if KVM was interrupted by an
+ * unrelated signal/event.
+ */
+ if (ret < 0 && save_errno == EINTR)
+ continue;
+
+ /*
+ * Tell the worker to recreate the slot in order to complete
+ * prefaulting (if prefault didn't already succeed before the
+ * slot was deleted) and/or to prepare for the next testcase.
+ * Wait for the worker to exit so that the next invocation of
+ * prefaulting is guaranteed to complete (assuming no KVM bugs).
+ */
+ if (!slot_recreated) {
+ WRITE_ONCE(data.recreate_slot, true);
+ pthread_join(slot_worker, NULL);
+ slot_recreated = true;
+
+ /*
+ * Retry prefaulting to get a stable result, i.e. to
+ * avoid seeing random EAGAIN failures. Don't retry if
+ * prefaulting already succeeded, as KVM disallows
+ * prefaulting with size=0, i.e. blindly retrying would
+ * result in test failures due to EINVAL. KVM should
+ * always return success if all bytes are prefaulted,
+ * i.e. there is no need to guard against EAGAIN being
+ * returned.
+ */
+ if (range.size)
+ continue;
+ }
+
+ /*
+ * All done if there are no remaining bytes to prefault, or if
+ * prefaulting failed (EINTR was handled above, and EAGAIN due
+ * to prefaulting a memslot that's being actively deleted should
+ * be impossible since the memslot has already been recreated).
+ */
+ if (!range.size || ret < 0)
+ break;
+ }
- if (left == 0)
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT(range.size == expected_left,
+ "Completed with %llu bytes left, expected %lu",
+ range.size, expected_left);
+
+ /*
+ * Assert success if prefaulting the entire range should succeed, i.e.
+ * complete with no bytes remaining. Otherwise prefaulting should have
+ * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+ * no memslot exists).
+ */
+ if (!expected_left)
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
else
- /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
- __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
- "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
}
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
@@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
if (private)
vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
- pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+
+ pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
vcpu_args_set(vcpu, 1, guest_test_virt_mem);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index d265b34c54be..50bc1c38225a 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm)
self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
- self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
- ASSERT_NE(self->run, MAP_FAILED);
+ self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd);
/**
* For virtual cpus that have been created with S390 user controlled
* virtual machines, the resulting vcpu fd can be memory mapped at page
* offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
* the virtual cpu's hardware control block.
*/
- self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
- ASSERT_NE(self->sie_block, MAP_FAILED);
+ self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd,
+ KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
TH_LOG("VM created %p %p", self->run, self->sie_block);
@@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm)
FIXTURE_TEARDOWN(uc_kvm)
{
- munmap(self->sie_block, PAGE_SIZE);
- munmap(self->run, self->kvm_run_size);
+ kvm_munmap(self->sie_block, PAGE_SIZE);
+ kvm_munmap(self->run, self->kvm_run_size);
close(self->vcpu_fd);
close(self->vm_fd);
close(self->kvm_fd);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index ce3ac0fd6dfb..7fe427ff9b38 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
for (slot = 0; slot < max_mem_slots; slot++)
@@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void)
mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
/* Check it cannot be added memory slots beyond the limit */
- mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
(uint64_t)max_mem_slots * MEM_REGION_SIZE,
@@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
- munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ kvm_munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 0227e13cd8dd..5f0015c5dd95 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -113,6 +113,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES
bool
config KVM_GUEST_MEMFD
+ depends on KVM_GENERIC_MMU_NOTIFIER
select XARRAY_MULTI
bool
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 94bafd6c558c..fbca8c0972da 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -102,8 +102,17 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
return filemap_grab_folio(inode->i_mapping, index);
}
-static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
- pgoff_t end)
+static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode)
+{
+ if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)
+ return KVM_FILTER_SHARED;
+
+ return KVM_FILTER_PRIVATE;
+}
+
+static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
+ pgoff_t end,
+ enum kvm_gfn_range_filter attr_filter)
{
bool flush = false, found_memslot = false;
struct kvm_memory_slot *slot;
@@ -118,8 +127,7 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
.end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff,
.slot = slot,
.may_block = true,
- /* guest memfd is relevant to only private mappings. */
- .attr_filter = KVM_FILTER_PRIVATE,
+ .attr_filter = attr_filter,
};
if (!found_memslot) {
@@ -139,8 +147,21 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
KVM_MMU_UNLOCK(kvm);
}
-static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
- pgoff_t end)
+static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start,
+ pgoff_t end)
+{
+ struct list_head *gmem_list = &inode->i_mapping->i_private_list;
+ enum kvm_gfn_range_filter attr_filter;
+ struct kvm_gmem *gmem;
+
+ attr_filter = kvm_gmem_get_invalidate_filter(inode);
+
+ list_for_each_entry(gmem, gmem_list, entry)
+ __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter);
+}
+
+static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
+ pgoff_t end)
{
struct kvm *kvm = gmem->kvm;
@@ -151,12 +172,20 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
}
}
-static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start,
+ pgoff_t end)
{
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
+ struct kvm_gmem *gmem;
+
+ list_for_each_entry(gmem, gmem_list, entry)
+ __kvm_gmem_invalidate_end(gmem, start, end);
+}
+
+static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+{
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
- struct kvm_gmem *gmem;
/*
* Bindings must be stable across invalidation to ensure the start+end
@@ -164,13 +193,11 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
*/
filemap_invalidate_lock(inode->i_mapping);
- list_for_each_entry(gmem, gmem_list, entry)
- kvm_gmem_invalidate_begin(gmem, start, end);
+ kvm_gmem_invalidate_begin(inode, start, end);
truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
- list_for_each_entry(gmem, gmem_list, entry)
- kvm_gmem_invalidate_end(gmem, start, end);
+ kvm_gmem_invalidate_end(inode, start, end);
filemap_invalidate_unlock(inode->i_mapping);
@@ -280,8 +307,9 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
* Zap all SPTEs pointed at by this file. Do not free the backing
* memory, as its lifetime is associated with the inode, not the file.
*/
- kvm_gmem_invalidate_begin(gmem, 0, -1ul);
- kvm_gmem_invalidate_end(gmem, 0, -1ul);
+ __kvm_gmem_invalidate_begin(gmem, 0, -1ul,
+ kvm_gmem_get_invalidate_filter(inode));
+ __kvm_gmem_invalidate_end(gmem, 0, -1ul);
list_del(&gmem->entry);
@@ -328,6 +356,9 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
return VM_FAULT_SIGBUS;
+ if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED))
+ return VM_FAULT_SIGBUS;
+
folio = kvm_gmem_get_folio(inode, vmf->pgoff);
if (IS_ERR(folio)) {
int err = PTR_ERR(folio);
@@ -400,8 +431,6 @@ static int kvm_gmem_migrate_folio(struct address_space *mapping,
static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio)
{
- struct list_head *gmem_list = &mapping->i_private_list;
- struct kvm_gmem *gmem;
pgoff_t start, end;
filemap_invalidate_lock_shared(mapping);
@@ -409,8 +438,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
start = folio->index;
end = start + folio_nr_pages(folio);
- list_for_each_entry(gmem, gmem_list, entry)
- kvm_gmem_invalidate_begin(gmem, start, end);
+ kvm_gmem_invalidate_begin(mapping->host, start, end);
/*
* Do not truncate the range, what action is taken in response to the
@@ -421,8 +449,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
* error to userspace.
*/
- list_for_each_entry(gmem, gmem_list, entry)
- kvm_gmem_invalidate_end(gmem, start, end);
+ kvm_gmem_invalidate_end(mapping->host, start, end);
filemap_invalidate_unlock_shared(mapping);
@@ -458,7 +485,7 @@ static const struct inode_operations kvm_gmem_iops = {
.setattr = kvm_gmem_setattr,
};
-bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
+bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm)
{
return true;
}
@@ -522,12 +549,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
{
loff_t size = args->size;
u64 flags = args->flags;
- u64 valid_flags = 0;
-
- if (kvm_arch_supports_gmem_mmap(kvm))
- valid_flags |= GUEST_MEMFD_FLAG_MMAP;
- if (flags & ~valid_flags)
+ if (flags & ~kvm_gmem_get_supported_flags(kvm))
return -EINVAL;
if (size <= 0 || !PAGE_ALIGNED(size))
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 226faeaa8e56..b7a0ae2a7b20 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4928,8 +4928,8 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#ifdef CONFIG_KVM_GUEST_MEMFD
case KVM_CAP_GUEST_MEMFD:
return 1;
- case KVM_CAP_GUEST_MEMFD_MMAP:
- return !kvm || kvm_arch_supports_gmem_mmap(kvm);
+ case KVM_CAP_GUEST_MEMFD_FLAGS:
+ return kvm_gmem_get_supported_flags(kvm);
#endif
default:
break;