diff options
-rw-r--r-- | Documentation/virt/kvm/api.rst | 10 | ||||
-rw-r--r-- | MAINTAINERS | 12 | ||||
-rw-r--r-- | arch/arm64/include/uapi/asm/kvm.h | 3 | ||||
-rw-r--r-- | arch/loongarch/include/asm/kvm_host.h | 1 | ||||
-rw-r--r-- | arch/loongarch/include/asm/kvm_para.h | 3 | ||||
-rw-r--r-- | arch/loongarch/include/asm/kvm_vcpu.h | 1 | ||||
-rw-r--r-- | arch/loongarch/include/uapi/asm/kvm_para.h | 1 | ||||
-rw-r--r-- | arch/loongarch/kvm/exit.c | 30 | ||||
-rw-r--r-- | arch/loongarch/kvm/main.c | 18 | ||||
-rw-r--r-- | arch/loongarch/kvm/vcpu.c | 7 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_vcpu_sbi.h | 1 | ||||
-rw-r--r-- | arch/riscv/include/uapi/asm/kvm.h | 7 | ||||
-rw-r--r-- | arch/riscv/kvm/Makefile | 1 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu.c | 7 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_exit.c | 37 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_onereg.c | 6 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_system.c | 73 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm-x86-ops.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 107 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 972 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 128 | ||||
-rw-r--r-- | arch/x86/kvm/governed_features.h | 22 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_emulate.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 31 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 33 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 82 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 80 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_iter.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_iter.h | 21 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 325 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.h | 51 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/reverse_cpuid.h | 23 | ||||
-rw-r--r-- | arch/x86/kvm/smm.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 22 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 43 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 78 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 21 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 17 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/hyperv.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/main.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 102 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/sgx.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 176 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/x86_ops.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 261 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 34 | ||||
-rw-r--r-- | include/linux/call_once.h | 45 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 37 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 8 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 345 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile.kvm | 330 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/aarch32_id_regs.c (renamed from tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c) | 10 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/arch_timer.c (renamed from tools/testing/selftests/kvm/aarch64/arch_timer.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c (renamed from tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/debug-exceptions.c (renamed from tools/testing/selftests/kvm/aarch64/debug-exceptions.c) | 4 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/get-reg-list.c (renamed from tools/testing/selftests/kvm/aarch64/get-reg-list.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/hypercalls.c (renamed from tools/testing/selftests/kvm/aarch64/hypercalls.c) | 6 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/mmio_abort.c (renamed from tools/testing/selftests/kvm/aarch64/mmio_abort.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/no-vgic-v3.c (renamed from tools/testing/selftests/kvm/aarch64/no-vgic-v3.c) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/page_fault_test.c (renamed from tools/testing/selftests/kvm/aarch64/page_fault_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/psci_test.c (renamed from tools/testing/selftests/kvm/aarch64/psci_test.c) | 8 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/set_id_regs.c (renamed from tools/testing/selftests/kvm/aarch64/set_id_regs.c) | 22 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/smccc_filter.c (renamed from tools/testing/selftests/kvm/aarch64/smccc_filter.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/vcpu_width_config.c (renamed from tools/testing/selftests/kvm/aarch64/vcpu_width_config.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_init.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_init.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_irq.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_irq.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/arm64/vpmu_counter_access.c (renamed from tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c) | 19 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/arch_timer.h (renamed from tools/testing/selftests/kvm/include/aarch64/arch_timer.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/delay.h (renamed from tools/testing/selftests/kvm/include/aarch64/delay.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/gic.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/gic_v3.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic_v3.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/gic_v3_its.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/processor.h (renamed from tools/testing/selftests/kvm/include/aarch64/processor.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/spinlock.h (renamed from tools/testing/selftests/kvm/include/aarch64/spinlock.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/ucall.h (renamed from tools/testing/selftests/kvm/include/aarch64/ucall.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/arm64/vgic.h (renamed from tools/testing/selftests/kvm/include/aarch64/vgic.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 10 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/debug_print.h (renamed from tools/testing/selftests/kvm/include/s390x/debug_print.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/diag318_test_handler.h (renamed from tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/facility.h (renamed from tools/testing/selftests/kvm/include/s390x/facility.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/processor.h (renamed from tools/testing/selftests/kvm/include/s390x/processor.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/sie.h (renamed from tools/testing/selftests/kvm/include/s390x/sie.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/s390/ucall.h (renamed from tools/testing/selftests/kvm/include/s390x/ucall.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/apic.h (renamed from tools/testing/selftests/kvm/include/x86_64/apic.h) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/evmcs.h (renamed from tools/testing/selftests/kvm/include/x86_64/evmcs.h) | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/hyperv.h (renamed from tools/testing/selftests/kvm/include/x86_64/hyperv.h) | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/mce.h (renamed from tools/testing/selftests/kvm/include/x86_64/mce.h) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/pmu.h (renamed from tools/testing/selftests/kvm/include/x86_64/pmu.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/processor.h (renamed from tools/testing/selftests/kvm/include/x86_64/processor.h) | 27 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/sev.h (renamed from tools/testing/selftests/kvm/include/x86_64/sev.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/svm.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm.h) | 6 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/svm_util.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm_util.h) | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/ucall.h (renamed from tools/testing/selftests/kvm/include/x86_64/ucall.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86/vmx.h (renamed from tools/testing/selftests/kvm/include/x86_64/vmx.h) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_private.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_private.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_v3.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/handlers.S (renamed from tools/testing/selftests/kvm/lib/aarch64/handlers.S) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/processor.c (renamed from tools/testing/selftests/kvm/lib/aarch64/processor.c) | 8 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/spinlock.c (renamed from tools/testing/selftests/kvm/lib/aarch64/spinlock.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/ucall.c (renamed from tools/testing/selftests/kvm/lib/aarch64/ucall.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/vgic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/vgic.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/processor.c | 66 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c (renamed from tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/s390/facility.c (renamed from tools/testing/selftests/kvm/lib/s390x/facility.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/s390/processor.c (renamed from tools/testing/selftests/kvm/lib/s390x/processor.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/s390/ucall.c (renamed from tools/testing/selftests/kvm/lib/s390x/ucall.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/apic.c (renamed from tools/testing/selftests/kvm/lib/x86_64/apic.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/handlers.S (renamed from tools/testing/selftests/kvm/lib/x86_64/handlers.S) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/hyperv.c (renamed from tools/testing/selftests/kvm/lib/x86_64/hyperv.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/memstress.c (renamed from tools/testing/selftests/kvm/lib/x86_64/memstress.c) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/pmu.c (renamed from tools/testing/selftests/kvm/lib/x86_64/pmu.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/processor.c (renamed from tools/testing/selftests/kvm/lib/x86_64/processor.c) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/sev.c (renamed from tools/testing/selftests/kvm/lib/x86_64/sev.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/svm.c (renamed from tools/testing/selftests/kvm/lib/x86_64/svm.c) | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/ucall.c (renamed from tools/testing/selftests/kvm/lib/x86_64/ucall.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86/vmx.c (renamed from tools/testing/selftests/kvm/lib/x86_64/vmx.c) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/mmu_stress_test.c (renamed from tools/testing/selftests/kvm/max_guest_memory_test.c) | 162 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/riscv/arch_timer.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/riscv/ebreak_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/riscv/get-reg-list.c | 18 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/cmma_test.c (renamed from tools/testing/selftests/kvm/s390x/cmma_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/config (renamed from tools/testing/selftests/kvm/s390x/config) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c (renamed from tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/debug_test.c (renamed from tools/testing/selftests/kvm/s390x/debug_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/memop.c (renamed from tools/testing/selftests/kvm/s390x/memop.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/resets.c (renamed from tools/testing/selftests/kvm/s390x/resets.c) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/shared_zeropage_test.c (renamed from tools/testing/selftests/kvm/s390x/shared_zeropage_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/sync_regs_test.c (renamed from tools/testing/selftests/kvm/s390x/sync_regs_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/tprot.c (renamed from tools/testing/selftests/kvm/s390x/tprot.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390/ucontrol_test.c (renamed from tools/testing/selftests/kvm/s390x/ucontrol_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/set_memory_region_test.c | 59 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/steal_time.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/amx_test.c (renamed from tools/testing/selftests/kvm/x86_64/amx_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/apic_bus_clock_test.c (renamed from tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/cpuid_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c (renamed from tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/debug_regs.c (renamed from tools/testing/selftests/kvm/x86_64/debug_regs.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c (renamed from tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c (renamed from tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/feature_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/feature_msrs_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/fix_hypercall_test.c (renamed from tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/flds_emulation.h (renamed from tools/testing/selftests/kvm/x86_64/flds_emulation.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hwcr_msr_test.c (renamed from tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_clock.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_clock.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_cpuid.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_evmcs.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_features.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_features.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_ipi.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_ipi.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_svm_test.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/kvm_clock_test.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_clock_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/kvm_pv_test.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_pv_test.c) | 38 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c (renamed from tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/monitor_mwait_test.c (renamed from tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/nested_exceptions_test.c (renamed from tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/nx_huge_pages_test.c (renamed from tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c) | 0 | ||||
-rwxr-xr-x | tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh (renamed from tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/platform_info_test.c (renamed from tools/testing/selftests/kvm/x86_64/platform_info_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/pmu_counters_test.c (renamed from tools/testing/selftests/kvm/x86_64/pmu_counters_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/pmu_event_filter_test.c (renamed from tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/private_mem_conversions_test.c (renamed from tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c (renamed from tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/recalc_apic_map_test.c (renamed from tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/set_boot_cpu_id.c (renamed from tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/set_sregs_test.c (renamed from tools/testing/selftests/kvm/x86_64/set_sregs_test.c) | 63 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/sev_init2_tests.c (renamed from tools/testing/selftests/kvm/x86_64/sev_init2_tests.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/sev_migrate_tests.c (renamed from tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/sev_smoke_test.c (renamed from tools/testing/selftests/kvm/x86_64/sev_smoke_test.c) | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c (renamed from tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/smm_test.c (renamed from tools/testing/selftests/kvm/x86_64/smm_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/state_test.c (renamed from tools/testing/selftests/kvm/x86_64/state_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/svm_int_ctl_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/svm_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/sync_regs_test.c (renamed from tools/testing/selftests/kvm/x86_64/sync_regs_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/triple_fault_event_test.c (renamed from tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/tsc_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/tsc_scaling_sync.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/ucna_injection_test.c (renamed from tools/testing/selftests/kvm/x86_64/ucna_injection_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/userspace_io_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_io_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_apic_access_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/xapic_ipi_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/xapic_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_state_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/xen_shinfo_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/xen_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86/xss_msr_test.c (renamed from tools/testing/selftests/kvm/x86_64/xss_msr_test.c) | 0 | ||||
-rw-r--r-- | virt/kvm/guest_memfd.c | 36 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 115 |
222 files changed, 2893 insertions, 1530 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 3514c4018f40..0d1c3a820ce6 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1825,15 +1825,18 @@ emulate them efficiently. The fields in each entry are defined as follows: the values returned by the cpuid instruction for this function/index combination -The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned -as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC -support. Instead it is reported via:: +x2APIC (CPUID leaf 1, ecx[21) and TSC deadline timer (CPUID leaf 1, ecx[24]) +may be returned as true, but they depend on KVM_CREATE_IRQCHIP for in-kernel +emulation of the local APIC. TSC deadline timer support is also reported via:: ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER) if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the feature in userspace, then you can enable the feature for KVM_SET_CPUID2. +Enabling x2APIC in KVM_SET_CPUID2 requires KVM_CREATE_IRQCHIP as KVM doesn't +support forwarding x2APIC MSR accesses to userspace, i.e. KVM does not support +emulating x2APIC in userspace. 4.47 KVM_PPC_GET_PVINFO ----------------------- @@ -7673,6 +7676,7 @@ branch to guests' 0x200 interrupt vector. :Architectures: x86 :Parameters: args[0] defines which exits are disabled :Returns: 0 on success, -EINVAL when args[0] contains invalid exits + or if any vCPUs have already been created Valid bits in args[0] are:: diff --git a/MAINTAINERS b/MAINTAINERS index 28e8de3a19e5..70bca3a67d5c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12686,8 +12686,8 @@ F: arch/arm64/include/asm/kvm* F: arch/arm64/include/uapi/asm/kvm* F: arch/arm64/kvm/ F: include/kvm/arm_* -F: tools/testing/selftests/kvm/*/aarch64/ -F: tools/testing/selftests/kvm/aarch64/ +F: tools/testing/selftests/kvm/*/arm64/ +F: tools/testing/selftests/kvm/arm64/ KERNEL VIRTUAL MACHINE FOR LOONGARCH (KVM/LoongArch) M: Tianrui Zhao <zhaotianrui@loongson.cn> @@ -12758,8 +12758,8 @@ F: arch/s390/kvm/ F: arch/s390/mm/gmap.c F: drivers/s390/char/uvdevice.c F: tools/testing/selftests/drivers/s390x/uvdevice/ -F: tools/testing/selftests/kvm/*/s390x/ -F: tools/testing/selftests/kvm/s390x/ +F: tools/testing/selftests/kvm/*/s390/ +F: tools/testing/selftests/kvm/s390/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) M: Sean Christopherson <seanjc@google.com> @@ -12776,8 +12776,8 @@ F: arch/x86/include/uapi/asm/svm.h F: arch/x86/include/uapi/asm/vmx.h F: arch/x86/kvm/ F: arch/x86/kvm/*/ -F: tools/testing/selftests/kvm/*/x86_64/ -F: tools/testing/selftests/kvm/x86_64/ +F: tools/testing/selftests/kvm/*/x86/ +F: tools/testing/selftests/kvm/x86/ KERNFS M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 66736ff04011..568bf858f319 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 7b8367c39da8..590982cd986e 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -162,6 +162,7 @@ enum emulation_result { #define LOONGARCH_PV_FEAT_UPDATED BIT_ULL(63) #define LOONGARCH_PV_FEAT_MASK (BIT(KVM_FEATURE_IPI) | \ BIT(KVM_FEATURE_STEAL_TIME) | \ + BIT(KVM_FEATURE_USER_HCALL) | \ BIT(KVM_FEATURE_VIRT_EXTIOI)) struct kvm_vcpu_arch { diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index c4e84227280d..3e4b397f423f 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -13,6 +13,7 @@ #define KVM_HCALL_CODE_SERVICE 0 #define KVM_HCALL_CODE_SWDBG 1 +#define KVM_HCALL_CODE_USER_SERVICE 2 #define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE) #define KVM_HCALL_FUNC_IPI 1 @@ -20,6 +21,8 @@ #define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) +#define KVM_HCALL_USER_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_USER_SERVICE) + /* * LoongArch hypercall return code */ diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index d7e8f7d50ee0..2c349f961bfb 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -43,6 +43,7 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst); int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst); int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_complete_iocsr_read(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_complete_user_service(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_emu_idle(struct kvm_vcpu *vcpu); int kvm_pending_timer(struct kvm_vcpu *vcpu); int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault); diff --git a/arch/loongarch/include/uapi/asm/kvm_para.h b/arch/loongarch/include/uapi/asm/kvm_para.h index b0604aa9b4bb..76d802ef01ce 100644 --- a/arch/loongarch/include/uapi/asm/kvm_para.h +++ b/arch/loongarch/include/uapi/asm/kvm_para.h @@ -17,5 +17,6 @@ #define KVM_FEATURE_STEAL_TIME 2 /* BIT 24 - 31 are features configurable by user space vmm */ #define KVM_FEATURE_VIRT_EXTIOI 24 +#define KVM_FEATURE_USER_HCALL 25 #endif /* _UAPI_ASM_KVM_PARA_H */ diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index a7893bd01e73..c1e8ec5b941b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -709,6 +709,14 @@ static int kvm_handle_write_fault(struct kvm_vcpu *vcpu) return kvm_handle_rdwr_fault(vcpu, true); } +int kvm_complete_user_service(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + update_pc(&vcpu->arch); + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, run->hypercall.ret); + + return 0; +} + /** * kvm_handle_fpu_disabled() - Guest used fpu however it is disabled at host * @vcpu: Virtual CPU context. @@ -873,6 +881,28 @@ static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) vcpu->stat.hypercall_exits++; kvm_handle_service(vcpu); break; + case KVM_HCALL_USER_SERVICE: + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_USER_HCALL)) { + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE); + break; + } + + vcpu->stat.hypercall_exits++; + vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; + vcpu->run->hypercall.nr = KVM_HCALL_USER_SERVICE; + vcpu->run->hypercall.args[0] = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); + vcpu->run->hypercall.args[1] = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); + vcpu->run->hypercall.args[2] = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); + vcpu->run->hypercall.args[3] = kvm_read_reg(vcpu, LOONGARCH_GPR_A3); + vcpu->run->hypercall.args[4] = kvm_read_reg(vcpu, LOONGARCH_GPR_A4); + vcpu->run->hypercall.args[5] = kvm_read_reg(vcpu, LOONGARCH_GPR_A5); + vcpu->run->hypercall.flags = 0; + /* + * Set invalid return value by default, let user-mode VMM modify it. + */ + vcpu->run->hypercall.ret = KVM_HCALL_INVALID_CODE; + ret = RESUME_HOST; + break; case KVM_HCALL_SWDBG: /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */ if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) { diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 396fed2665a5..bf9268bf26d5 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -245,6 +245,24 @@ void kvm_check_vpid(struct kvm_vcpu *vcpu) trace_kvm_vpid_change(vcpu, vcpu->arch.vpid); vcpu->cpu = cpu; kvm_clear_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); + + /* + * LLBCTL is a separated guest CSR register from host, a general + * exception ERET instruction clears the host LLBCTL register in + * host mode, and clears the guest LLBCTL register in guest mode. + * ERET in tlb refill exception does not clear LLBCTL register. + * + * When secondary mmu mapping is changed, guest OS does not know + * even if the content is changed after mapping is changed. + * + * Here clear WCLLB of the guest LLBCTL register when mapping is + * changed. Otherwise, if mmu mapping is changed while guest is + * executing LL/SC pair, LL loads with the old address and set + * the LLBCTL flag, SC checks the LLBCTL flag and will store the + * new address successfully since LLBCTL_WCLLB is on, even if + * memory with new address is changed on other VCPUs. + */ + set_gcsr_llbctl(CSR_LLBCTL_WCLLB); } /* Restore GSTAT(0x50).vpid */ diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index d18a4a270415..fb72095c8077 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1732,9 +1732,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->mmio_needed = 0; } - if (run->exit_reason == KVM_EXIT_LOONGARCH_IOCSR) { + switch (run->exit_reason) { + case KVM_EXIT_HYPERCALL: + kvm_complete_user_service(vcpu, run); + break; + case KVM_EXIT_LOONGARCH_IOCSR: if (!run->iocsr_io.is_write) kvm_complete_iocsr_read(vcpu, run); + break; } if (!vcpu->wants_to_run) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 35eab6e0f4ae..cc33e35cd628 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -87,6 +87,11 @@ struct kvm_vcpu_stat { u64 csr_exit_kernel; u64 signal_exits; u64 exits; + u64 instr_illegal_exits; + u64 load_misaligned_exits; + u64 store_misaligned_exits; + u64 load_access_exits; + u64 store_access_exits; }; struct kvm_arch_memory_slot { diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index b96705258cf9..4ed6203cdd30 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -85,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 3482c9a73d1b..f06bc5efcd79 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -179,6 +179,9 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SSNPM, KVM_RISCV_ISA_EXT_SVADE, KVM_RISCV_ISA_EXT_SVADU, + KVM_RISCV_ISA_EXT_SVVPTC, + KVM_RISCV_ISA_EXT_ZABHA, + KVM_RISCV_ISA_EXT_ZICCRSE, KVM_RISCV_ISA_EXT_MAX, }; @@ -198,6 +201,7 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, KVM_RISCV_SBI_EXT_STA, + KVM_RISCV_SBI_EXT_SUSP, KVM_RISCV_SBI_EXT_MAX, }; @@ -211,9 +215,6 @@ struct kvm_riscv_sbi_sta { #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 0fb1840c3e0a..4e0bba91d284 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -30,6 +30,7 @@ kvm-y += vcpu_sbi_hsm.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_sta.o +kvm-y += vcpu_sbi_system.o kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_switch.o kvm-y += vcpu_timer.o diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e048dcc6e65e..60d684c76c58 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -34,7 +34,12 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, csr_exit_user), STATS_DESC_COUNTER(VCPU, csr_exit_kernel), STATS_DESC_COUNTER(VCPU, signal_exits), - STATS_DESC_COUNTER(VCPU, exits) + STATS_DESC_COUNTER(VCPU, exits), + STATS_DESC_COUNTER(VCPU, instr_illegal_exits), + STATS_DESC_COUNTER(VCPU, load_misaligned_exits), + STATS_DESC_COUNTER(VCPU, store_misaligned_exits), + STATS_DESC_COUNTER(VCPU, load_access_exits), + STATS_DESC_COUNTER(VCPU, store_access_exits), }; const struct kvm_stats_header kvm_vcpu_stats_header = { diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index fa98e5c024b2..6e0c18412795 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -165,6 +165,17 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, vcpu->arch.guest_context.sstatus |= SR_SPP; } +static inline int vcpu_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) +{ + int ret = -EFAULT; + + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { + kvm_riscv_vcpu_trap_redirect(vcpu, trap); + ret = 1; + } + return ret; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -183,14 +194,32 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, run->exit_reason = KVM_EXIT_UNKNOWN; switch (trap->scause) { case EXC_INST_ILLEGAL: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ILLEGAL_INSN); + vcpu->stat.instr_illegal_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_LOAD); + vcpu->stat.load_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_STORE); + vcpu->stat.store_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_ACCESS: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_LOAD); + vcpu->stat.load_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_ACCESS: - if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { - kvm_riscv_vcpu_trap_redirect(vcpu, trap); - ret = 1; - } + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_STORE); + vcpu->stat.store_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; + case EXC_INST_ACCESS: + ret = vcpu_redirect(vcpu, trap); break; case EXC_VIRTUAL_INST_FAULT: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 753f66c8b70a..f6d27b59c641 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -46,6 +46,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -65,6 +67,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -145,6 +148,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: @@ -162,6 +167,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZFA: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: case KVM_RISCV_ISA_EXT_ZICSR: diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 6e704ed86a83..d1c83a77735e 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_dbcn, }, { + .ext_idx = KVM_RISCV_SBI_EXT_SUSP, + .ext_ptr = &vcpu_sbi_ext_susp, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_STA, .ext_ptr = &vcpu_sbi_ext_sta, }, diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c new file mode 100644 index 000000000000..5d55e08791fa --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_vcpu_sbi.h> +#include <asm/sbi.h> + +static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm_cpu_context *reset_cntx; + unsigned long funcid = cp->a6; + unsigned long hva, i; + struct kvm_vcpu *tmp; + + switch (funcid) { + case SBI_EXT_SUSP_SYSTEM_SUSPEND: + if (cp->a0 != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + retdata->err_val = SBI_ERR_INVALID_PARAM; + return 0; + } + + if (!(cp->sstatus & SR_SPP)) { + retdata->err_val = SBI_ERR_FAILURE; + return 0; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, cp->a1 >> PAGE_SHIFT, NULL); + if (kvm_is_error_hva(hva)) { + retdata->err_val = SBI_ERR_INVALID_ADDRESS; + return 0; + } + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (tmp == vcpu) + continue; + if (!kvm_riscv_vcpu_stopped(tmp)) { + retdata->err_val = SBI_ERR_DENIED; + return 0; + } + } + + spin_lock(&vcpu->arch.reset_cntx_lock); + reset_cntx = &vcpu->arch.guest_reset_context; + reset_cntx->sepc = cp->a1; + reset_cntx->a0 = vcpu->vcpu_id; + reset_cntx->a1 = cp->a2; + spin_unlock(&vcpu->arch.reset_cntx_lock); + + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); + + /* userspace provides the suspend implementation */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + retdata->uexit = true; + break; + default: + retdata->err_val = SBI_ERR_NOT_SUPPORTED; + break; + } + + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp = { + .extid_start = SBI_EXT_SUSP, + .extid_end = SBI_EXT_SUSP, + .default_disabled = true, + .handler = kvm_sbi_ext_susp_handler, +}; diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5aff7222e40f..c35550581da0 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -83,7 +83,6 @@ KVM_X86_OP(enable_nmi_window) KVM_X86_OP(enable_irq_window) KVM_X86_OP_OPTIONAL(update_cr8_intercept) KVM_X86_OP(refresh_apicv_exec_ctrl) -KVM_X86_OP_OPTIONAL(hwapic_irr_update) KVM_X86_OP_OPTIONAL(hwapic_isr_update) KVM_X86_OP_OPTIONAL(load_eoi_exitmap) KVM_X86_OP_OPTIONAL(set_virtual_apic_mode) @@ -94,12 +93,17 @@ KVM_X86_OP_OPTIONAL_RET0(set_tss_addr) KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr) KVM_X86_OP_OPTIONAL_RET0(get_mt_mask) KVM_X86_OP(load_mmu_pgd) +KVM_X86_OP_OPTIONAL(link_external_spt) +KVM_X86_OP_OPTIONAL(set_external_spte) +KVM_X86_OP_OPTIONAL(free_external_spt) +KVM_X86_OP_OPTIONAL(remove_external_spte) KVM_X86_OP(has_wbinvd_exit) KVM_X86_OP(get_l2_tsc_offset) KVM_X86_OP(get_l2_tsc_multiplier) KVM_X86_OP(write_tsc_offset) KVM_X86_OP(write_tsc_multiplier) KVM_X86_OP(get_exit_info) +KVM_X86_OP(get_entry_info) KVM_X86_OP(check_intercept) KVM_X86_OP(handle_exit_irqoff) KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e8aeb4b4f868..b15cde0a9b5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -26,6 +26,7 @@ #include <linux/irqbypass.h> #include <linux/kfifo.h> #include <linux/sched/vhost_task.h> +#include <linux/call_once.h> #include <asm/apic.h> #include <asm/pvclock-abi.h> @@ -312,10 +313,11 @@ struct kvm_kernel_irq_routing_entry; * the number of unique SPs that can theoretically be created is 2^n, where n * is the number of bits that are used to compute the role. * - * But, even though there are 19 bits in the mask below, not all combinations + * But, even though there are 20 bits in the mask below, not all combinations * of modes and flags are possible: * - * - invalid shadow pages are not accounted, so the bits are effectively 18 + * - invalid shadow pages are not accounted, mirror pages are not shadowed, + * so the bits are effectively 18. * * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); * execonly and ad_disabled are only used for nested EPT which has @@ -348,7 +350,8 @@ union kvm_mmu_page_role { unsigned ad_disabled:1; unsigned guest_mode:1; unsigned passthrough:1; - unsigned :5; + unsigned is_mirror:1; + unsigned :4; /* * This is left at the top of the word so that @@ -456,6 +459,7 @@ struct kvm_mmu { int (*sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i); struct kvm_mmu_root_info root; + hpa_t mirror_root_hpa; union kvm_cpu_role cpu_role; union kvm_mmu_page_role root_role; @@ -738,6 +742,23 @@ struct kvm_queued_exception { bool has_payload; }; +/* + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. + */ +enum kvm_only_cpuid_leafs { + CPUID_12_EAX = NCAPINTS, + CPUID_7_1_EDX, + CPUID_8000_0007_EDX, + CPUID_8000_0022_EAX, + CPUID_7_2_EDX, + CPUID_24_0_EBX, + NR_KVM_CPU_CAPS, + + NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -812,6 +833,11 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_shadow_page_cache; struct kvm_mmu_memory_cache mmu_shadowed_info_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * This cache is to allocate external page table. E.g. private EPT used + * by the TDX module. + */ + struct kvm_mmu_memory_cache mmu_external_spt_cache; /* * QEMU userspace and the guest each have their own FPU state. @@ -853,27 +879,23 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; - struct kvm_hypervisor_cpuid kvm_cpuid; bool is_amd_compatible; /* - * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly - * when "struct kvm_vcpu_arch" is no longer defined in an - * arch/x86/include/asm header. The max is mostly arbitrary, i.e. - * can be increased as necessary. - */ -#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG - - /* - * Track whether or not the guest is allowed to use features that are - * governed by KVM, where "governed" means KVM needs to manage state - * and/or explicitly enable the feature in hardware. Typically, but - * not always, governed features can be used by the guest if and only - * if both KVM and userspace want to expose the feature to the guest. + * cpu_caps holds the effective guest capabilities, i.e. the features + * the vCPU is allowed to use. Typically, but not always, features can + * be used by the guest if and only if both KVM and userspace want to + * expose the feature to the guest. + * + * A common exception is for virtualization holes, i.e. when KVM can't + * prevent the guest from using a feature, in which case the vCPU "has" + * the feature regardless of what KVM or userspace desires. + * + * Note, features that don't require KVM involvement in any way are + * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the + * guest CPUID provided by userspace. */ - struct { - DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES); - } governed_features; + u32 cpu_caps[NR_KVM_CPU_CAPS]; u64 reserved_gpa_bits; int maxphyaddr; @@ -1444,6 +1466,7 @@ struct kvm_arch { struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; struct vhost_task *nx_huge_page_recovery_thread; u64 nx_huge_page_last; + struct once nx_once; #ifdef CONFIG_X86_64 /* The number of TDP MMU pages across all roots. */ @@ -1535,6 +1558,8 @@ struct kvm_arch { */ #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) struct kvm_mmu_memory_cache split_desc_cache; + + gfn_t gfn_direct_bits; }; struct kvm_vm_stat { @@ -1733,8 +1758,7 @@ struct kvm_x86_ops { const unsigned long required_apicv_inhibits; bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); - void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); - void (*hwapic_isr_update)(int isr); + void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); @@ -1748,6 +1772,21 @@ struct kvm_x86_ops { void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); + /* Update external mapping with page table link. */ + int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *external_spt); + /* Update the external page table from spte getting set. */ + int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + kvm_pfn_t pfn_for_gfn); + + /* Update external page tables for page table about to be freed. */ + int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *external_spt); + + /* Update external page table from spte getting removed, and flush TLB. */ + int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + kvm_pfn_t pfn_for_gfn); + bool (*has_wbinvd_exit)(void); u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); @@ -1756,12 +1795,15 @@ struct kvm_x86_ops { void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); /* - * Retrieve somewhat arbitrary exit information. Intended to + * Retrieve somewhat arbitrary exit/entry information. Intended to * be used only from within tracepoints or error paths. */ void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1, u64 *info2, - u32 *exit_int_info, u32 *exit_int_info_err_code); + u32 *intr_info, u32 *error_code); + + void (*get_entry_info)(struct kvm_vcpu *vcpu, + u32 *intr_info, u32 *error_code); int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, @@ -2018,8 +2060,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); * VMware backdoor emulation handles select instructions * and reinjects the #GP for all other cases. * - * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which - * case the CR2/GPA value pass on the stack is valid. + * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case + * the CR2/GPA value pass on the stack is valid. * * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility * state and inject single-step #DBs after skipping @@ -2054,6 +2096,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) +static inline bool kvm_can_emulate_event_vectoring(int emul_type) +{ + return !(emul_type & EMULTYPE_PF); +} + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); @@ -2061,6 +2108,8 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, u8 ndata); void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); +void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); + void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); @@ -2180,12 +2229,6 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl); -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 88585c1de416..9e75da97bce0 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -925,5 +925,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f7e222953cab..2cbb3874ad39 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -82,15 +82,6 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) return ret; } -#define F feature_bit - -/* Scattered Flag - For features that are scattered by cpufeatures.h. */ -#define SF(name) \ -({ \ - BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ - (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \ -}) - /* * Magic value used by KVM when querying userspace-provided CPUID entries and * doesn't care about the CPIUD index because the index of the function in @@ -100,8 +91,8 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) */ #define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull -static inline struct kvm_cpuid_entry2 *cpuid_entry2_find( - struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index) +static struct kvm_cpuid_entry2 *cpuid_entry2_find(struct kvm_vcpu *vcpu, + u32 function, u64 index) { struct kvm_cpuid_entry2 *e; int i; @@ -118,8 +109,8 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find( */ lockdep_assert_irqs_enabled(); - for (i = 0; i < nent; i++) { - e = &entries[i]; + for (i = 0; i < vcpu->arch.cpuid_nent; i++) { + e = &vcpu->arch.cpuid_entries[i]; if (e->function != function) continue; @@ -151,9 +142,27 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find( return NULL; } -static int kvm_check_cpuid(struct kvm_vcpu *vcpu, - struct kvm_cpuid_entry2 *entries, - int nent) +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, + u32 function, u32 index) +{ + return cpuid_entry2_find(vcpu, function, index); +} +EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index); + +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function) +{ + return cpuid_entry2_find(vcpu, function, KVM_CPUID_INDEX_NOT_SIGNIFICANT); +} +EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); + +/* + * cpuid_entry2_find() and KVM_CPUID_INDEX_NOT_SIGNIFICANT should never be used + * directly outside of kvm_find_cpuid_entry() and kvm_find_cpuid_entry_index(). + */ +#undef KVM_CPUID_INDEX_NOT_SIGNIFICANT + +static int kvm_check_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; u64 xfeatures; @@ -162,8 +171,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu, * The existing code assumes virtual address is 48-bit or 57-bit in the * canonical address checks; exit if it is ever changed. */ - best = cpuid_entry2_find(entries, nent, 0x80000008, - KVM_CPUID_INDEX_NOT_SIGNIFICANT); + best = kvm_find_cpuid_entry(vcpu, 0x80000008); if (best) { int vaddr_bits = (best->eax & 0xff00) >> 8; @@ -175,7 +183,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu, * Exposing dynamic xfeatures to the guest requires additional * enabling in the FPU, e.g. to expand the guest XSAVE state size. */ - best = cpuid_entry2_find(entries, nent, 0xd, 0); + best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0); if (!best) return 0; @@ -187,6 +195,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu, return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures); } +static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu); + /* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) @@ -194,6 +204,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 struct kvm_cpuid_entry2 *orig; int i; + /* + * Apply runtime CPUID updates to the incoming CPUID entries to avoid + * false positives due mismatches on KVM-owned feature flags. + * + * Note! @e2 and @nent track the _old_ CPUID entries! + */ + kvm_update_cpuid_runtime(vcpu); + kvm_apply_cpuid_pv_features_quirk(vcpu); + if (nent != vcpu->arch.cpuid_nent) return -EINVAL; @@ -210,15 +229,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 return 0; } -static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries, - int nent, const char *sig) +static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, + const char *sig) { struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; u32 base; for_each_possible_hypervisor_cpuid_base(base) { - entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT); + entry = kvm_find_cpuid_entry(vcpu, base); if (entry) { u32 signature[3]; @@ -238,118 +257,90 @@ static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_e return cpuid; } -static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, - const char *sig) +static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu) { - return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent, sig); -} - -static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries, - int nent, u32 kvm_cpuid_base) -{ - return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES, - KVM_CPUID_INDEX_NOT_SIGNIFICANT); -} - -static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) -{ - u32 base = vcpu->arch.kvm_cpuid.base; + struct kvm_hypervisor_cpuid kvm_cpuid; + struct kvm_cpuid_entry2 *best; - if (!base) - return NULL; + kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); + if (!kvm_cpuid.base) + return 0; - return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent, base); -} + best = kvm_find_cpuid_entry(vcpu, kvm_cpuid.base | KVM_CPUID_FEATURES); + if (!best) + return 0; -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu); + if (kvm_hlt_in_guest(vcpu->kvm)) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); - /* - * save the feature bitmap to avoid cpuid lookup for every PV - * operation - */ - if (best) - vcpu->arch.pv_cpuid.features = best->eax; + return best->eax; } /* * Calculate guest's supported XCR0 taking into account guest CPUID data and * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0). */ -static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) +static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; - best = cpuid_entry2_find(entries, nent, 0xd, 0); + best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0); if (!best) return 0; return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0; } -static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, - int nent) +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu, + struct kvm_cpuid_entry2 *entry, + unsigned int x86_feature, + bool has_feature) +{ + cpuid_entry_change(entry, x86_feature, has_feature); + guest_cpu_cap_change(vcpu, x86_feature, has_feature); +} + +void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; - struct kvm_hypervisor_cpuid kvm_cpuid; - best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); + best = kvm_find_cpuid_entry(vcpu, 1); if (best) { - /* Update OSXSAVE bit */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) - cpuid_entry_change(best, X86_FEATURE_OSXSAVE, + kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE, kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)); - cpuid_entry_change(best, X86_FEATURE_APIC, - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE); + kvm_update_feature_runtime(vcpu, best, X86_FEATURE_APIC, + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE); + + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) + kvm_update_feature_runtime(vcpu, best, X86_FEATURE_MWAIT, + vcpu->arch.ia32_misc_enable_msr & + MSR_IA32_MISC_ENABLE_MWAIT); } - best = cpuid_entry2_find(entries, nent, 7, 0); - if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) - cpuid_entry_change(best, X86_FEATURE_OSPKE, - kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)); + best = kvm_find_cpuid_entry_index(vcpu, 7, 0); + if (best) + kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE, + kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)); + - best = cpuid_entry2_find(entries, nent, 0xD, 0); + best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0); if (best) best->ebx = xstate_required_size(vcpu->arch.xcr0, false); - best = cpuid_entry2_find(entries, nent, 0xD, 1); + best = kvm_find_cpuid_entry_index(vcpu, 0xD, 1); if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) || cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - - kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE); - if (kvm_cpuid.base) { - best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base); - if (kvm_hlt_in_guest(vcpu->kvm) && best) - best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); - } - - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { - best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); - if (best) - cpuid_entry_change(best, X86_FEATURE_MWAIT, - vcpu->arch.ia32_misc_enable_msr & - MSR_IA32_MISC_ENABLE_MWAIT); - } -} - -void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) -{ - __kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); } EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); -static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent) +static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_HYPERV struct kvm_cpuid_entry2 *entry; - entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE, - KVM_CPUID_INDEX_NOT_SIGNIFICANT); + entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE); return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX; #else return false; @@ -368,15 +359,71 @@ static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu) is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx); } -static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) +/* + * This isn't truly "unsafe", but except for the cpu_caps initialization code, + * all register lookups should use __cpuid_entry_get_reg(), which provides + * compile-time validation of the input. + */ +static u32 cpuid_get_reg_unsafe(struct kvm_cpuid_entry2 *entry, u32 reg) +{ + switch (reg) { + case CPUID_EAX: + return entry->eax; + case CPUID_EBX: + return entry->ebx; + case CPUID_ECX: + return entry->ecx; + case CPUID_EDX: + return entry->edx; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func, + bool include_partially_emulated); + +void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; + struct kvm_cpuid_entry2 *entry; bool allow_gbpages; + int i; + + memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps)); + BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS); - BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES); - bitmap_zero(vcpu->arch.governed_features.enabled, - KVM_MAX_NR_GOVERNED_FEATURES); + /* + * Reset guest capabilities to userspace's guest CPUID definition, i.e. + * honor userspace's definition for features that don't require KVM or + * hardware management/support (or that KVM simply doesn't care about). + */ + for (i = 0; i < NR_KVM_CPU_CAPS; i++) { + const struct cpuid_reg cpuid = reverse_cpuid[i]; + struct kvm_cpuid_entry2 emulated; + + if (!cpuid.function) + continue; + + entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index); + if (!entry) + continue; + + cpuid_func_emulated(&emulated, cpuid.function, true); + + /* + * A vCPU has a feature if it's supported by KVM and is enabled + * in guest CPUID. Note, this includes features that are + * supported by KVM but aren't advertised to userspace! + */ + vcpu->arch.cpu_caps[i] = kvm_cpu_caps[i] | + cpuid_get_reg_unsafe(&emulated, cpuid.reg); + vcpu->arch.cpu_caps[i] &= cpuid_get_reg_unsafe(entry, cpuid.reg); + } + + kvm_update_cpuid_runtime(vcpu); /* * If TDP is enabled, let the guest use GBPAGES if they're supported in @@ -390,9 +437,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * and can install smaller shadow pages if the host lacks 1GiB support. */ allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) : - guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES); - if (allow_gbpages) - kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES); + guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES); + guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages); best = kvm_find_cpuid_entry(vcpu, 1); if (best && apic) { @@ -404,21 +450,22 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - vcpu->arch.guest_supported_xcr0 = - cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu); - kvm_update_pv_runtime(vcpu); + vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu); vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); kvm_pmu_refresh(vcpu); - vcpu->arch.cr4_guest_rsvd_bits = - __cr4_reserved_bits(guest_cpuid_has, vcpu); - kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent)); +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) + vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_) | + __cr4_reserved_bits(guest_cpu_cap_has, vcpu); +#undef __kvm_cpu_cap_has + + kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu)); /* Invoke the vendor callback only after the above state is updated. */ kvm_x86_call(vcpu_after_set_cpuid)(vcpu); @@ -457,9 +504,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu) static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) { + u32 vcpu_caps[NR_KVM_CPU_CAPS]; int r; - __kvm_update_cpuid_runtime(vcpu, e2, nent); + /* + * Swap the existing (old) entries with the incoming (new) entries in + * order to massage the new entries, e.g. to account for dynamic bits + * that KVM controls, without clobbering the current guest CPUID, which + * KVM needs to preserve in order to unwind on failure. + * + * Similarly, save the vCPU's current cpu_caps so that the capabilities + * can be updated alongside the CPUID entries when performing runtime + * updates. Full initialization is done if and only if the vCPU hasn't + * run, i.e. only if userspace is potentially changing CPUID features. + */ + swap(vcpu->arch.cpuid_entries, e2); + swap(vcpu->arch.cpuid_nent, nent); + + memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps)); + BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps)); /* * KVM does not correctly handle changing guest CPUID after KVM_RUN, as @@ -475,35 +538,36 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) - return r; - - kvfree(e2); - return 0; + goto err; + goto success; } #ifdef CONFIG_KVM_HYPERV - if (kvm_cpuid_has_hyperv(e2, nent)) { + if (kvm_cpuid_has_hyperv(vcpu)) { r = kvm_hv_vcpu_init(vcpu); if (r) - return r; + goto err; } #endif - r = kvm_check_cpuid(vcpu, e2, nent); + r = kvm_check_cpuid(vcpu); if (r) - return r; - - kvfree(vcpu->arch.cpuid_entries); - vcpu->arch.cpuid_entries = e2; - vcpu->arch.cpuid_nent = nent; + goto err; - vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); #ifdef CONFIG_KVM_XEN vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); #endif kvm_vcpu_after_set_cpuid(vcpu); +success: + kvfree(e2); return 0; + +err: + memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps)); + swap(vcpu->arch.cpuid_entries, e2); + swap(vcpu->arch.cpuid_nent, nent); + return r; } /* when an old userspace process fills a new kernel module */ @@ -590,107 +654,294 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, return 0; } -/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ -static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) +static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid) { - const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); struct kvm_cpuid_entry2 entry; + u32 base; - reverse_cpuid_check(leaf); + /* + * KVM only supports features defined by Intel (0x0), AMD (0x80000000), + * and Centaur (0xc0000000). WARN if a feature for new vendor base is + * defined, as this and other code would need to be updated. + */ + base = cpuid.function & 0xffff0000; + if (WARN_ON_ONCE(base && base != 0x80000000 && base != 0xc0000000)) + return 0; + + if (cpuid_eax(base) < cpuid.function) + return 0; cpuid_count(cpuid.function, cpuid.index, &entry.eax, &entry.ebx, &entry.ecx, &entry.edx); - kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg); + return *__cpuid_entry_get_reg(&entry, cpuid.reg); } -static __always_inline -void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) -{ - /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ - BUILD_BUG_ON(leaf < NCAPINTS); +/* + * For kernel-defined leafs, mask KVM's supported feature set with the kernel's + * capabilities as well as raw CPUID. For KVM-defined leafs, consult only raw + * CPUID, as KVM is the one and only authority (in the kernel). + */ +#define kvm_cpu_cap_init(leaf, feature_initializers...) \ +do { \ + const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); \ + const u32 __maybe_unused kvm_cpu_cap_init_in_progress = leaf; \ + const u32 *kernel_cpu_caps = boot_cpu_data.x86_capability; \ + u32 kvm_cpu_cap_passthrough = 0; \ + u32 kvm_cpu_cap_synthesized = 0; \ + u32 kvm_cpu_cap_emulated = 0; \ + u32 kvm_cpu_cap_features = 0; \ + \ + feature_initializers \ + \ + kvm_cpu_caps[leaf] = kvm_cpu_cap_features; \ + \ + if (leaf < NCAPINTS) \ + kvm_cpu_caps[leaf] &= kernel_cpu_caps[leaf]; \ + \ + kvm_cpu_caps[leaf] |= kvm_cpu_cap_passthrough; \ + kvm_cpu_caps[leaf] &= (raw_cpuid_get(cpuid) | \ + kvm_cpu_cap_synthesized); \ + kvm_cpu_caps[leaf] |= kvm_cpu_cap_emulated; \ +} while (0) - kvm_cpu_caps[leaf] = mask; +/* + * Assert that the feature bit being declared, e.g. via F(), is in the CPUID + * word that's being initialized. Exempt 0x8000_0001.EDX usage of 0x1.EDX + * features, as AMD duplicated many 0x1.EDX features into 0x8000_0001.EDX. + */ +#define KVM_VALIDATE_CPU_CAP_USAGE(name) \ +do { \ + u32 __leaf = __feature_leaf(X86_FEATURE_##name); \ + \ + BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress); \ +} while (0) + +#define F(name) \ +({ \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ + kvm_cpu_cap_features |= feature_bit(name); \ +}) - __kvm_cpu_cap_mask(leaf); -} +/* Scattered Flag - For features that are scattered by cpufeatures.h. */ +#define SCATTERED_F(name) \ +({ \ + BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ + if (boot_cpu_has(X86_FEATURE_##name)) \ + F(name); \ +}) -static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) -{ - /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ - BUILD_BUG_ON(leaf >= NCAPINTS); +/* Features that KVM supports only on 64-bit kernels. */ +#define X86_64_F(name) \ +({ \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ + if (IS_ENABLED(CONFIG_X86_64)) \ + F(name); \ +}) - kvm_cpu_caps[leaf] &= mask; +/* + * Emulated Feature - For features that KVM emulates in software irrespective + * of host CPU/kernel support. + */ +#define EMULATED_F(name) \ +({ \ + kvm_cpu_cap_emulated |= feature_bit(name); \ + F(name); \ +}) - __kvm_cpu_cap_mask(leaf); -} +/* + * Synthesized Feature - For features that are synthesized into boot_cpu_data, + * i.e. may not be present in the raw CPUID, but can still be advertised to + * userspace. Primarily used for mitigation related feature flags. + */ +#define SYNTHESIZED_F(name) \ +({ \ + kvm_cpu_cap_synthesized |= feature_bit(name); \ + F(name); \ +}) + +/* + * Passthrough Feature - For features that KVM supports based purely on raw + * hardware CPUID, i.e. that KVM virtualizes even if the host kernel doesn't + * use the feature. Simply force set the feature in KVM's capabilities, raw + * CPUID support will be factored in by kvm_cpu_cap_mask(). + */ +#define PASSTHROUGH_F(name) \ +({ \ + kvm_cpu_cap_passthrough |= feature_bit(name); \ + F(name); \ +}) + +/* + * Aliased Features - For features in 0x8000_0001.EDX that are duplicates of + * identical 0x1.EDX features, and thus are aliased from 0x1 to 0x8000_0001. + */ +#define ALIASED_1_EDX_F(name) \ +({ \ + BUILD_BUG_ON(__feature_leaf(X86_FEATURE_##name) != CPUID_1_EDX); \ + BUILD_BUG_ON(kvm_cpu_cap_init_in_progress != CPUID_8000_0001_EDX); \ + kvm_cpu_cap_features |= feature_bit(name); \ +}) + +/* + * Vendor Features - For features that KVM supports, but are added in later + * because they require additional vendor enabling. + */ +#define VENDOR_F(name) \ +({ \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ +}) + +/* + * Runtime Features - For features that KVM dynamically sets/clears at runtime, + * e.g. when CR4 changes, but which are never advertised to userspace. + */ +#define RUNTIME_F(name) \ +({ \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ +}) + +/* + * Undefine the MSR bit macro to avoid token concatenation issues when + * processing X86_FEATURE_SPEC_CTRL_SSBD. + */ +#undef SPEC_CTRL_SSBD + +/* DS is defined by ptrace-abi.h on 32-bit builds. */ +#undef DS void kvm_set_cpu_caps(void) { -#ifdef CONFIG_X86_64 - unsigned int f_gbpages = F(GBPAGES); - unsigned int f_lm = F(LM); - unsigned int f_xfd = F(XFD); -#else - unsigned int f_gbpages = 0; - unsigned int f_lm = 0; - unsigned int f_xfd = 0; -#endif memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) > sizeof(boot_cpu_data.x86_capability)); - memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability, - sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps))); - - kvm_cpu_cap_mask(CPUID_1_ECX, + kvm_cpu_cap_init(CPUID_1_ECX, + F(XMM3), + F(PCLMULQDQ), + VENDOR_F(DTES64), /* * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not* - * advertised to guests via CPUID! + * advertised to guests via CPUID! MWAIT is also technically a + * runtime flag thanks to IA32_MISC_ENABLES; mark it as such so + * that KVM is aware that it's a known, unadvertised flag. */ - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | - 0 /* DS-CPL, VMX, SMX, EST */ | - 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | - F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) | - F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | - F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | - 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | - F(F16C) | F(RDRAND) + RUNTIME_F(MWAIT), + /* DS-CPL */ + VENDOR_F(VMX), + /* SMX, EST */ + /* TM2 */ + F(SSSE3), + /* CNXT-ID */ + /* Reserved */ + F(FMA), + F(CX16), + /* xTPR Update */ + F(PDCM), + F(PCID), + /* Reserved, DCA */ + F(XMM4_1), + F(XMM4_2), + EMULATED_F(X2APIC), + F(MOVBE), + F(POPCNT), + EMULATED_F(TSC_DEADLINE_TIMER), + F(AES), + F(XSAVE), + RUNTIME_F(OSXSAVE), + F(AVX), + F(F16C), + F(RDRAND), + EMULATED_F(HYPERVISOR), ); - /* KVM emulates x2apic in software irrespective of host support. */ - kvm_cpu_cap_set(X86_FEATURE_X2APIC); - - kvm_cpu_cap_mask(CPUID_1_EDX, - F(FPU) | F(VME) | F(DE) | F(PSE) | - F(TSC) | F(MSR) | F(PAE) | F(MCE) | - F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | - F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) | - 0 /* Reserved, DS, ACPI */ | F(MMX) | - F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | - 0 /* HTT, TM, Reserved, PBE */ + + kvm_cpu_cap_init(CPUID_1_EDX, + F(FPU), + F(VME), + F(DE), + F(PSE), + F(TSC), + F(MSR), + F(PAE), + F(MCE), + F(CX8), + F(APIC), + /* Reserved */ + F(SEP), + F(MTRR), + F(PGE), + F(MCA), + F(CMOV), + F(PAT), + F(PSE36), + /* PSN */ + F(CLFLUSH), + /* Reserved */ + VENDOR_F(DS), + /* ACPI */ + F(MMX), + F(FXSR), + F(XMM), + F(XMM2), + F(SELFSNOOP), + /* HTT, TM, Reserved, PBE */ ); - kvm_cpu_cap_mask(CPUID_7_0_EBX, - F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | - F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) | - F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) | - F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | - F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) | - F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) | - F(AVX512VL)); - - kvm_cpu_cap_mask(CPUID_7_ECX, - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | - F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | - F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) | F(BUS_LOCK_DETECT) + kvm_cpu_cap_init(CPUID_7_0_EBX, + F(FSGSBASE), + EMULATED_F(TSC_ADJUST), + F(SGX), + F(BMI1), + F(HLE), + F(AVX2), + F(FDP_EXCPTN_ONLY), + F(SMEP), + F(BMI2), + F(ERMS), + F(INVPCID), + F(RTM), + F(ZERO_FCS_FDS), + VENDOR_F(MPX), + F(AVX512F), + F(AVX512DQ), + F(RDSEED), + F(ADX), + F(SMAP), + F(AVX512IFMA), + F(CLFLUSHOPT), + F(CLWB), + VENDOR_F(INTEL_PT), + F(AVX512PF), + F(AVX512ER), + F(AVX512CD), + F(SHA_NI), + F(AVX512BW), + F(AVX512VL), + ); + + kvm_cpu_cap_init(CPUID_7_ECX, + F(AVX512VBMI), + PASSTHROUGH_F(LA57), + F(PKU), + RUNTIME_F(OSPKE), + F(RDPID), + F(AVX512_VPOPCNTDQ), + F(UMIP), + F(AVX512_VBMI2), + F(GFNI), + F(VAES), + F(VPCLMULQDQ), + F(AVX512_VNNI), + F(AVX512_BITALG), + F(CLDEMOTE), + F(MOVDIRI), + F(MOVDIR64B), + VENDOR_F(WAITPKG), + F(SGX_LC), + F(BUS_LOCK_DETECT), ); - /* Set LA57 based on hardware capability. */ - if (cpuid_ecx(7) & F(LA57)) - kvm_cpu_cap_set(X86_FEATURE_LA57); /* * PKU not yet implemented for shadow paging and requires OSPKE @@ -699,18 +950,25 @@ void kvm_set_cpu_caps(void) if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) kvm_cpu_cap_clear(X86_FEATURE_PKU); - kvm_cpu_cap_mask(CPUID_7_EDX, - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) | - F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) + kvm_cpu_cap_init(CPUID_7_EDX, + F(AVX512_4VNNIW), + F(AVX512_4FMAPS), + F(SPEC_CTRL), + F(SPEC_CTRL_SSBD), + EMULATED_F(ARCH_CAPABILITIES), + F(INTEL_STIBP), + F(MD_CLEAR), + F(AVX512_VP2INTERSECT), + F(FSRM), + F(SERIALIZE), + F(TSXLDTRK), + F(AVX512_FP16), + F(AMX_TILE), + F(AMX_INT8), + F(AMX_BF16), + F(FLUSH_L1D), ); - /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ - kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST); - kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES); - if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) && boot_cpu_has(X86_FEATURE_AMD_IBPB) && boot_cpu_has(X86_FEATURE_AMD_IBRS)) @@ -720,65 +978,133 @@ void kvm_set_cpu_caps(void) if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); - kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(SHA512) | F(SM3) | F(SM4) | F(AVX_VNNI) | F(AVX512_BF16) | - F(CMPCCXADD) | F(FZRM) | F(FSRS) | F(FSRC) | F(AMX_FP16) | - F(AVX_IFMA) | F(LAM) + kvm_cpu_cap_init(CPUID_7_1_EAX, + F(SHA512), + F(SM3), + F(SM4), + F(AVX_VNNI), + F(AVX512_BF16), + F(CMPCCXADD), + F(FZRM), + F(FSRS), + F(FSRC), + F(AMX_FP16), + F(AVX_IFMA), + F(LAM), ); - kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(AMX_COMPLEX) | - F(AVX_VNNI_INT16) | F(PREFETCHITI) | F(AVX10) + kvm_cpu_cap_init(CPUID_7_1_EDX, + F(AVX_VNNI_INT8), + F(AVX_NE_CONVERT), + F(AMX_COMPLEX), + F(AVX_VNNI_INT16), + F(PREFETCHITI), + F(AVX10), ); - kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, - F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | - F(BHI_CTRL) | F(MCDT_NO) + kvm_cpu_cap_init(CPUID_7_2_EDX, + F(INTEL_PSFD), + F(IPRED_CTRL), + F(RRSBA_CTRL), + F(DDPD_U), + F(BHI_CTRL), + F(MCDT_NO), ); - kvm_cpu_cap_mask(CPUID_D_1_EAX, - F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd + kvm_cpu_cap_init(CPUID_D_1_EAX, + F(XSAVEOPT), + F(XSAVEC), + F(XGETBV1), + F(XSAVES), + X86_64_F(XFD), ); - kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, - SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) + kvm_cpu_cap_init(CPUID_12_EAX, + SCATTERED_F(SGX1), + SCATTERED_F(SGX2), + SCATTERED_F(SGX_EDECCSSA), ); - kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX, - F(AVX10_128) | F(AVX10_256) | F(AVX10_512) + kvm_cpu_cap_init(CPUID_24_0_EBX, + F(AVX10_128), + F(AVX10_256), + F(AVX10_512), ); - kvm_cpu_cap_mask(CPUID_8000_0001_ECX, - F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | - F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | - F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | - 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | - F(TOPOEXT) | 0 /* PERFCTR_CORE */ + kvm_cpu_cap_init(CPUID_8000_0001_ECX, + F(LAHF_LM), + F(CMP_LEGACY), + VENDOR_F(SVM), + /* ExtApicSpace */ + F(CR8_LEGACY), + F(ABM), + F(SSE4A), + F(MISALIGNSSE), + F(3DNOWPREFETCH), + F(OSVW), + /* IBS */ + F(XOP), + /* SKINIT, WDT, LWP */ + F(FMA4), + F(TBM), + F(TOPOEXT), + VENDOR_F(PERFCTR_CORE), ); - kvm_cpu_cap_mask(CPUID_8000_0001_EDX, - F(FPU) | F(VME) | F(DE) | F(PSE) | - F(TSC) | F(MSR) | F(PAE) | F(MCE) | - F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | - F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* Reserved */ | - F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | - F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | - 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) + kvm_cpu_cap_init(CPUID_8000_0001_EDX, + ALIASED_1_EDX_F(FPU), + ALIASED_1_EDX_F(VME), + ALIASED_1_EDX_F(DE), + ALIASED_1_EDX_F(PSE), + ALIASED_1_EDX_F(TSC), + ALIASED_1_EDX_F(MSR), + ALIASED_1_EDX_F(PAE), + ALIASED_1_EDX_F(MCE), + ALIASED_1_EDX_F(CX8), + ALIASED_1_EDX_F(APIC), + /* Reserved */ + F(SYSCALL), + ALIASED_1_EDX_F(MTRR), + ALIASED_1_EDX_F(PGE), + ALIASED_1_EDX_F(MCA), + ALIASED_1_EDX_F(CMOV), + ALIASED_1_EDX_F(PAT), + ALIASED_1_EDX_F(PSE36), + /* Reserved */ + F(NX), + /* Reserved */ + F(MMXEXT), + ALIASED_1_EDX_F(MMX), + ALIASED_1_EDX_F(FXSR), + F(FXSR_OPT), + X86_64_F(GBPAGES), + F(RDTSCP), + /* Reserved */ + X86_64_F(LM), + F(3DNOWEXT), + F(3DNOW), ); if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64)) kvm_cpu_cap_set(X86_FEATURE_GBPAGES); - kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX, - SF(CONSTANT_TSC) + kvm_cpu_cap_init(CPUID_8000_0007_EDX, + SCATTERED_F(CONSTANT_TSC), ); - kvm_cpu_cap_mask(CPUID_8000_0008_EBX, - F(CLZERO) | F(XSAVEERPTR) | - F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | - F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | - F(AMD_PSFD) | F(AMD_IBPB_RET) + kvm_cpu_cap_init(CPUID_8000_0008_EBX, + F(CLZERO), + F(XSAVEERPTR), + F(WBNOINVD), + F(AMD_IBPB), + F(AMD_IBRS), + F(AMD_SSBD), + F(VIRT_SSBD), + F(AMD_SSB_NO), + F(AMD_STIBP), + F(AMD_STIBP_ALWAYS_ON), + F(AMD_PSFD), + F(AMD_IBPB_RET), ); /* @@ -808,50 +1134,73 @@ void kvm_set_cpu_caps(void) !boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); - /* - * Hide all SVM features by default, SVM will set the cap bits for - * features it emulates and/or exposes for L1. - */ - kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0); - - kvm_cpu_cap_mask(CPUID_8000_001F_EAX, - 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ | - F(SME_COHERENT)); + /* All SVM features required additional vendor module enabling. */ + kvm_cpu_cap_init(CPUID_8000_000A_EDX, + VENDOR_F(NPT), + VENDOR_F(VMCBCLEAN), + VENDOR_F(FLUSHBYASID), + VENDOR_F(NRIPS), + VENDOR_F(TSCRATEMSR), + VENDOR_F(V_VMSAVE_VMLOAD), + VENDOR_F(LBRV), + VENDOR_F(PAUSEFILTER), + VENDOR_F(PFTHRESHOLD), + VENDOR_F(VGIF), + VENDOR_F(VNMI), + VENDOR_F(SVME_ADDR_CHK), + ); - kvm_cpu_cap_mask(CPUID_8000_0021_EAX, - F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ | - F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ | - F(WRMSR_XX_BASE_NS) | F(SRSO_USER_KERNEL_NO) + kvm_cpu_cap_init(CPUID_8000_001F_EAX, + VENDOR_F(SME), + VENDOR_F(SEV), + /* VM_PAGE_FLUSH */ + VENDOR_F(SEV_ES), + F(SME_COHERENT), ); - kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB); - kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE); - kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO); + kvm_cpu_cap_init(CPUID_8000_0021_EAX, + F(NO_NESTED_DATA_BP), + /* + * Synthesize "LFENCE is serializing" into the AMD-defined entry + * in KVM's supported CPUID, i.e. if the feature is reported as + * supported by the kernel. LFENCE_RDTSC was a Linux-defined + * synthetic feature long before AMD joined the bandwagon, e.g. + * LFENCE is serializing on most CPUs that support SSE2. On + * CPUs that don't support AMD's leaf, ANDing with the raw host + * CPUID will drop the flags, and reporting support in AMD's + * leaf can make it easier for userspace to detect the feature. + */ + SYNTHESIZED_F(LFENCE_RDTSC), + /* SmmPgCfgLock */ + F(NULL_SEL_CLR_BASE), + F(AUTOIBRS), + EMULATED_F(NO_SMM_CTL_MSR), + /* PrefetchCtlMsr */ + F(WRMSR_XX_BASE_NS), + SYNTHESIZED_F(SBPB), + SYNTHESIZED_F(IBPB_BRTYPE), + SYNTHESIZED_F(SRSO_NO), + SYNTHESIZED_F(SRSO_USER_KERNEL_NO), + ); - kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX, - F(PERFMON_V2) + kvm_cpu_cap_init(CPUID_8000_0022_EAX, + F(PERFMON_V2), ); - /* - * Synthesize "LFENCE is serializing" into the AMD-defined entry in - * KVM's supported CPUID if the feature is reported as supported by the - * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long - * before AMD joined the bandwagon, e.g. LFENCE is serializing on most - * CPUs that support SSE2. On CPUs that don't support AMD's leaf, - * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing - * the mask with the raw host CPUID, and reporting support in AMD's - * leaf can make it easier for userspace to detect the feature. - */ - if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) - kvm_cpu_cap_set(X86_FEATURE_LFENCE_RDTSC); if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE); - kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR); - kvm_cpu_cap_mask(CPUID_C000_0001_EDX, - F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | - F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | - F(PMM) | F(PMM_EN) + kvm_cpu_cap_init(CPUID_C000_0001_EDX, + F(XSTORE), + F(XSTORE_EN), + F(XCRYPT), + F(XCRYPT_EN), + F(ACE2), + F(ACE2_EN), + F(PHE), + F(PHE_EN), + F(PMM), + F(PMM_EN), ); /* @@ -871,6 +1220,16 @@ void kvm_set_cpu_caps(void) } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); +#undef F +#undef SCATTERED_F +#undef X86_64_F +#undef EMULATED_F +#undef SYNTHESIZED_F +#undef PASSTHROUGH_F +#undef ALIASED_1_EDX_F +#undef VENDOR_F +#undef RUNTIME_F + struct kvm_cpuid_array { struct kvm_cpuid_entry2 *entries; int maxnent; @@ -928,14 +1287,11 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, return entry; } -static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) +static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func, + bool include_partially_emulated) { - struct kvm_cpuid_entry2 *entry; - - if (array->nent >= array->maxnent) - return -E2BIG; + memset(entry, 0, sizeof(*entry)); - entry = &array->entries[array->nent]; entry->function = func; entry->index = 0; entry->flags = 0; @@ -943,23 +1299,37 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) switch (func) { case 0: entry->eax = 7; - ++array->nent; - break; + return 1; case 1: - entry->ecx = F(MOVBE); - ++array->nent; - break; + entry->ecx = feature_bit(MOVBE); + /* + * KVM allows userspace to enumerate MONITOR+MWAIT support to + * the guest, but the MWAIT feature flag is never advertised + * to userspace because MONITOR+MWAIT aren't virtualized by + * hardware, can't be faithfully emulated in software (KVM + * emulates them as NOPs), and allowing the guest to execute + * them natively requires enabling a per-VM capability. + */ + if (include_partially_emulated) + entry->ecx |= feature_bit(MWAIT); + return 1; case 7: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; entry->eax = 0; if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) - entry->ecx = F(RDPID); - ++array->nent; - break; + entry->ecx = feature_bit(RDPID); + return 1; default: - break; + return 0; } +} + +static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) +{ + if (array->nent >= array->maxnent) + return -E2BIG; + array->nent += cpuid_func_emulated(&array->entries[array->nent], func, false); return 0; } @@ -1103,7 +1473,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; cpuid_entry_override(entry, CPUID_D_1_EAX); - if (entry->eax & (F(XSAVES)|F(XSAVEC))) + if (entry->eax & (feature_bit(XSAVES) | feature_bit(XSAVEC))) entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss, true); else { @@ -1540,22 +1910,6 @@ out_free: return r; } -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, - u32 function, u32 index) -{ - return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent, - function, index); -} -EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index); - -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, - u32 function) -{ - return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent, - function, KVM_CPUID_INDEX_NOT_SIGNIFICANT); -} -EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); - /* * Intel CPUID semantics treats any query for an out-of-range leaf as if the * highest basic leaf (i.e. CPUID.0H:EAX) were requested. AMD CPUID semantics @@ -1648,10 +2002,10 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u64 data; if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) && (data & TSX_CTRL_CPUID_CLEAR)) - *ebx &= ~(F(RTM) | F(HLE)); + *ebx &= ~(feature_bit(RTM) | feature_bit(HLE)); } else if (function == 0x80000007) { if (kvm_hv_invtsc_suppressed(vcpu)) - *edx &= ~SF(CONSTANT_TSC); + *edx &= ~feature_bit(CONSTANT_TSC); } } else { *eax = *ebx = *ecx = *edx = 0; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f16a7b2c2adc..67d80aa72d50 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -10,8 +10,8 @@ extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; void kvm_set_cpu_caps(void); +void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, u32 function, u32 index); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, @@ -67,41 +67,40 @@ static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry, *reg = kvm_cpu_caps[leaf]; } -static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu, - unsigned int x86_feature) +static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, + unsigned int x86_feature) { const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); struct kvm_cpuid_entry2 *entry; + u32 *reg; + + /* + * XSAVES is a special snowflake. Due to lack of a dedicated intercept + * on SVM, KVM must assume that XSAVES (and thus XRSTORS) is usable by + * the guest if the host supports XSAVES and *XSAVE* is exposed to the + * guest. Because the guest can execute XSAVES and XRSTORS, i.e. can + * indirectly consume XSS, KVM must ensure XSS is zeroed when running + * the guest, i.e. must set XSAVES in vCPU capabilities. But to reject + * direct XSS reads and writes (to minimize the virtualization hole and + * honor userspace's CPUID), KVM needs to check the raw guest CPUID, + * not KVM's view of guest capabilities. + * + * For all other features, guest capabilities are accurate. Expand + * this allowlist with extreme vigilance. + */ + BUILD_BUG_ON(x86_feature != X86_FEATURE_XSAVES); entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index); if (!entry) return NULL; - return __cpuid_entry_get_reg(entry, cpuid.reg); -} - -static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, - unsigned int x86_feature) -{ - u32 *reg; - - reg = guest_cpuid_get_register(vcpu, x86_feature); + reg = __cpuid_entry_get_reg(entry, cpuid.reg); if (!reg) return false; return *reg & __feature_bit(x86_feature); } -static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, - unsigned int x86_feature) -{ - u32 *reg; - - reg = guest_cpuid_get_register(vcpu, x86_feature); - if (reg) - *reg &= ~__feature_bit(x86_feature); -} - static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) { return vcpu->arch.is_amd_compatible; @@ -150,21 +149,6 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) return x86_stepping(best->eax); } -static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu) -{ - return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || - guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) || - guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) || - guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)); -} - -static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) -{ - return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || - guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB) || - guest_cpuid_has(vcpu, X86_FEATURE_SBPB)); -} - static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) { return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; @@ -180,7 +164,6 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); } @@ -188,7 +171,6 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature); } @@ -196,7 +178,6 @@ static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature); } @@ -220,58 +201,61 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu, return vcpu->arch.pv_cpuid.features & (1u << kvm_feature); } -enum kvm_governed_features { -#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x, -#include "governed_features.h" - KVM_NR_GOVERNED_FEATURES -}; - -static __always_inline int kvm_governed_feature_index(unsigned int x86_feature) +static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu, + unsigned int x86_feature) { - switch (x86_feature) { -#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x; -#include "governed_features.h" - default: - return -1; - } -} + unsigned int x86_leaf = __feature_leaf(x86_feature); -static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature) -{ - return kvm_governed_feature_index(x86_feature) >= 0; + vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature); } -static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu, - unsigned int x86_feature) +static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu, + unsigned int x86_feature) { - BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature)); + unsigned int x86_leaf = __feature_leaf(x86_feature); - __set_bit(kvm_governed_feature_index(x86_feature), - vcpu->arch.governed_features.enabled); + vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); } -static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu, - unsigned int x86_feature) +static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu, + unsigned int x86_feature, + bool guest_has_cap) { - if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature)) - kvm_governed_feature_set(vcpu, x86_feature); + if (guest_has_cap) + guest_cpu_cap_set(vcpu, x86_feature); + else + guest_cpu_cap_clear(vcpu, x86_feature); } -static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu, - unsigned int x86_feature) +static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu, + unsigned int x86_feature) { - BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature)); + unsigned int x86_leaf = __feature_leaf(x86_feature); - return test_bit(kvm_governed_feature_index(x86_feature), - vcpu->arch.governed_features.enabled); + return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature); } static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { - if (guest_can_use(vcpu, X86_FEATURE_LAM)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LAM)) cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57); return kvm_vcpu_is_legal_gpa(vcpu, cr3); } +static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_STIBP) || + guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBRS) || + guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_SSBD)); +} + +static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) +{ + return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) || + guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB) || + guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB)); +} + #endif diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h deleted file mode 100644 index ad463b1ed4e4..000000000000 --- a/arch/x86/kvm/governed_features.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE) -BUILD_BUG() -#endif - -#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x) - -KVM_GOVERNED_X86_FEATURE(GBPAGES) -KVM_GOVERNED_X86_FEATURE(XSAVES) -KVM_GOVERNED_X86_FEATURE(VMX) -KVM_GOVERNED_X86_FEATURE(NRIPS) -KVM_GOVERNED_X86_FEATURE(TSCRATEMSR) -KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD) -KVM_GOVERNED_X86_FEATURE(LBRV) -KVM_GOVERNED_X86_FEATURE(PAUSEFILTER) -KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD) -KVM_GOVERNED_X86_FEATURE(VGIF) -KVM_GOVERNED_X86_FEATURE(VNMI) -KVM_GOVERNED_X86_FEATURE(LAM) - -#undef KVM_GOVERNED_X86_FEATURE -#undef KVM_GOVERNED_FEATURE diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4f0a94346d00..6a6dd5a84f22 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1352,7 +1352,7 @@ static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) return; if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) || - !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVEC)) return; pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. " diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 10495fffb890..73072585e164 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -88,6 +88,8 @@ struct x86_instruction_info { #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ #define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ +/* Emulation during event vectoring is unhandleable. */ +#define X86EMUL_UNHANDLEABLE_VECTORING 7 /* x86-specific emulation flags */ #define X86EMUL_F_WRITE BIT(0) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3c83951c619e..a009c94c26c2 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -598,7 +598,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) * version first and level-triggered interrupts never get EOIed in * IOAPIC. */ - if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) && !ioapic_in_kernel(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); @@ -734,10 +734,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { if (unlikely(apic->apicv_active)) { - /* need to update RVI */ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); - kvm_x86_call(hwapic_irr_update)(apic->vcpu, - apic_find_highest_irr(apic)); } else { apic->irr_pending = false; kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); @@ -763,7 +760,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * just set SVI. */ if (unlikely(apic->apicv_active)) - kvm_x86_call(hwapic_isr_update)(vec); + kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec); else { ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -808,7 +805,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * and must be left alone. */ if (unlikely(apic->apicv_active)) - kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic)); + kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic)); else { --apic->isr_count; BUG_ON(apic->isr_count < 0); @@ -816,6 +813,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) } } +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) + return; + + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); +} +EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr); + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { /* This may race with setting of irr in __apic_accept_irq() and @@ -2357,7 +2365,7 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); + val &= (apic_lvt_mask[LVT_TIMER] | apic->lapic_timer.timer_mode_mask); kvm_lapic_set_reg(apic, APIC_LVTT, val); apic_update_lvtt(apic); break; @@ -2634,7 +2642,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated) return 0; u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff | - (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); + (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) return 1; @@ -2805,8 +2813,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_update_ppr(apic); if (apic->apicv_active) { kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_irr_update)(vcpu, -1); - kvm_x86_call(hwapic_isr_update)(-1); + kvm_x86_call(hwapic_isr_update)(vcpu, -1); } vcpu->arch.apic_arb_prio = 0; @@ -3121,9 +3128,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_apic_update_apicv(vcpu); if (apic->apicv_active) { kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_irr_update)(vcpu, - apic_find_highest_irr(apic)); - kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic)); + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 24add38beaf0..1a8553ebdb42 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -118,6 +118,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e9322358678b..050a0e229a4d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -104,6 +104,15 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { + /* + * Checking root.hpa is sufficient even when KVM has mirror root. + * We can have either: + * (1) mirror_root_hpa = INVALID_PAGE, root.hpa = INVALID_PAGE + * (2) mirror_root_hpa = root, root.hpa = INVALID_PAGE + * (3) mirror_root_hpa = root1, root.hpa = root2 + * We don't ever have: + * mirror_root_hpa = INVALID_PAGE, root.hpa = root + */ if (likely(vcpu->arch.mmu->root.hpa != INVALID_PAGE)) return 0; @@ -126,7 +135,7 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu) static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu) { - if (!guest_can_use(vcpu, X86_FEATURE_LAM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LAM)) return 0; return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57); @@ -287,4 +296,26 @@ static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu, return gpa; return translate_nested_gpa(vcpu, gpa, access, exception); } + +static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm) +{ + return kvm->arch.vm_type == KVM_X86_TDX_VM; +} + +static inline gfn_t kvm_gfn_direct_bits(const struct kvm *kvm) +{ + return kvm->arch.gfn_direct_bits; +} + +static inline bool kvm_is_addr_direct(struct kvm *kvm, gpa_t gpa) +{ + gpa_t gpa_direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(kvm)); + + return !gpa_direct_bits || (gpa & gpa_direct_bits); +} + +static inline bool kvm_is_gfn_alias(struct kvm *kvm, gfn_t gfn) +{ + return gfn & kvm_gfn_direct_bits(kvm); +} #endif diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2401606db260..a45ae60e84ab 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -599,6 +599,12 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) 1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM); if (r) return r; + if (kvm_has_mirrored_tdp(vcpu->kvm)) { + r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_external_spt_cache, + PT64_ROOT_MAX_LEVEL); + if (r) + return r; + } r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache, PT64_ROOT_MAX_LEVEL); if (r) @@ -618,6 +624,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache); + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); } @@ -3656,8 +3663,13 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) unsigned i; int r; - if (tdp_mmu_enabled) - return kvm_tdp_mmu_alloc_root(vcpu); + if (tdp_mmu_enabled) { + if (kvm_has_mirrored_tdp(vcpu->kvm) && + !VALID_PAGE(mmu->mirror_root_hpa)) + kvm_tdp_mmu_alloc_root(vcpu, true); + kvm_tdp_mmu_alloc_root(vcpu, false); + return 0; + } write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); @@ -4379,8 +4391,12 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { struct kvm_memory_slot *slot = fault->slot; + struct kvm *kvm = vcpu->kvm; int ret; + if (KVM_BUG_ON(kvm_is_gfn_alias(kvm, fault->gfn), kvm)) + return -EFAULT; + /* * Note that the mmu_invalidate_seq also serves to detect a concurrent * change in attributes. is_page_fault_stale() will detect an @@ -4394,7 +4410,7 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, * Now that we have a snapshot of mmu_invalidate_seq we can check for a * private vs. shared mismatch. */ - if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { + if (fault->is_private != kvm_mem_is_private(kvm, fault->gfn)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } @@ -4456,7 +4472,7 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held * to detect retry guarantees the worst case latency for the vCPU. */ - if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) + if (mmu_invalidate_retry_gfn_unsafe(kvm, fault->mmu_seq, fault->gfn)) return RET_PF_RETRY; ret = __kvm_mmu_faultin_pfn(vcpu, fault); @@ -4476,7 +4492,7 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, * overall cost of failing to detect the invalidation until after * mmu_lock is acquired. */ - if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) { + if (mmu_invalidate_retry_gfn_unsafe(kvm, fault->mmu_seq, fault->gfn)) { kvm_mmu_finish_page_fault(vcpu, fault, RET_PF_RETRY); return RET_PF_RETRY; } @@ -5022,7 +5038,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, context->cpu_role.base.level, is_efer_nx(context), - guest_can_use(vcpu, X86_FEATURE_GBPAGES), + guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), guest_cpuid_is_amd_compatible(vcpu)); } @@ -5099,7 +5115,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), context->root_role.level, context->root_role.efer_nx, - guest_can_use(vcpu, X86_FEATURE_GBPAGES), + guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES), is_pse, is_amd); if (!shadow_me_mask) @@ -6095,8 +6111,16 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err else if (r == RET_PF_SPURIOUS) vcpu->stat.pf_spurious++; + /* + * None of handle_mmio_page_fault(), kvm_mmu_do_page_fault(), or + * kvm_mmu_write_protect_fault() return RET_PF_CONTINUE. + * kvm_mmu_do_page_fault() only uses RET_PF_CONTINUE internally to + * indicate continuing the page fault handling until to the final + * page table mapping phase. + */ + WARN_ON_ONCE(r == RET_PF_CONTINUE); if (r != RET_PF_EMULATE) - return 1; + return r; emulate: return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, @@ -6272,6 +6296,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) mmu->root.hpa = INVALID_PAGE; mmu->root.pgd = 0; + mmu->mirror_root_hpa = INVALID_PAGE; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; @@ -6441,8 +6466,13 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) * write and in the same critical section as making the reload request, * e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield. */ - if (tdp_mmu_enabled) - kvm_tdp_mmu_invalidate_all_roots(kvm); + if (tdp_mmu_enabled) { + /* + * External page tables don't support fast zapping, therefore + * their mirrors must be invalidated separately by the caller. + */ + kvm_tdp_mmu_invalidate_roots(kvm, KVM_DIRECT_ROOTS); + } /* * Notify all vcpus to reload its shadow page table and flush TLB. @@ -6467,7 +6497,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) * lead to use-after-free. */ if (tdp_mmu_enabled) - kvm_tdp_mmu_zap_invalidated_roots(kvm); + kvm_tdp_mmu_zap_invalidated_roots(kvm, true); } void kvm_mmu_init_vm(struct kvm *kvm) @@ -7220,6 +7250,12 @@ out: void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); + if (tdp_mmu_enabled) { + read_lock(&vcpu->kvm->mmu_lock); + mmu_free_root_page(vcpu->kvm, &vcpu->arch.mmu->mirror_root_hpa, + NULL); + read_unlock(&vcpu->kvm->mmu_lock); + } free_mmu_pages(&vcpu->arch.root_mmu); free_mmu_pages(&vcpu->arch.guest_mmu); mmu_free_memory_caches(vcpu); @@ -7411,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -int kvm_mmu_post_init_vm(struct kvm *kvm) +static void kvm_mmu_start_lpage_recovery(struct once *once) { - if (nx_hugepage_mitigation_hard_disabled) - return 0; + struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); + struct kvm *kvm = container_of(ka, struct kvm, arch); kvm->arch.nx_huge_page_last = get_jiffies_64(); kvm->arch.nx_huge_page_recovery_thread = vhost_task_create( kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill, kvm, "kvm-nx-lpage-recovery"); + if (kvm->arch.nx_huge_page_recovery_thread) + vhost_task_start(kvm->arch.nx_huge_page_recovery_thread); +} + +int kvm_mmu_post_init_vm(struct kvm *kvm) +{ + if (nx_hugepage_mitigation_hard_disabled) + return 0; + + call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); if (!kvm->arch.nx_huge_page_recovery_thread) return -ENOMEM; - - vhost_task_start(kvm->arch.nx_huge_page_recovery_thread); return 0; } @@ -7452,6 +7496,12 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; + /* Unmap the old attribute page. */ + if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) + range->attr_filter = KVM_FILTER_SHARED; + else + range->attr_filter = KVM_FILTER_PRIVATE; + return kvm_unmap_gfn_range(kvm, range); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index b00abbe3f6cf..75f00598289d 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -6,6 +6,8 @@ #include <linux/kvm_host.h> #include <asm/kvm_host.h> +#include "mmu.h" + #ifdef CONFIG_KVM_PROVE_MMU #define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x) #else @@ -101,7 +103,22 @@ struct kvm_mmu_page { int root_count; refcount_t tdp_mmu_root_count; }; - unsigned int unsync_children; + union { + /* These two members aren't used for TDP MMU */ + struct { + unsigned int unsync_children; + /* + * Number of writes since the last time traversal + * visited this page. + */ + atomic_t write_flooding_count; + }; + /* + * Page table page of external PT. + * Passed to TDX module, not accessed by KVM. + */ + void *external_spt; + }; union { struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ tdp_ptep_t ptep; @@ -124,9 +141,6 @@ struct kvm_mmu_page { int clear_spte_count; #endif - /* Number of writes since the last time traversal visited this page. */ - atomic_t write_flooding_count; - #ifdef CONFIG_X86_64 /* Used for freeing the page asynchronously if it is a TDP MMU page. */ struct rcu_head rcu_head; @@ -145,6 +159,34 @@ static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) return kvm_mmu_role_as_id(sp->role); } +static inline bool is_mirror_sp(const struct kvm_mmu_page *sp) +{ + return sp->role.is_mirror; +} + +static inline void kvm_mmu_alloc_external_spt(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +{ + /* + * external_spt is allocated for TDX module to hold private EPT mappings, + * TDX module will initialize the page by itself. + * Therefore, KVM does not need to initialize or access external_spt. + * KVM only interacts with sp->spt for private EPT operations. + */ + sp->external_spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_external_spt_cache); +} + +static inline gfn_t kvm_gfn_root_bits(const struct kvm *kvm, const struct kvm_mmu_page *root) +{ + /* + * Since mirror SPs are used only for TDX, which maps private memory + * at its "natural" GFN, no mask needs to be applied to them - and, dually, + * we expect that the bits is only used for the shared PT. + */ + if (is_mirror_sp(root)) + return 0; + return kvm_gfn_direct_bits(kvm); +} + static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp) { /* @@ -229,7 +271,12 @@ struct kvm_page_fault { */ u8 goal_level; - /* Shifted addr, or result of guest page table walk if addr is a gva. */ + /* + * Shifted addr, or result of guest page table walk if addr is a gva. In + * the case of VM where memslot's can be mapped at multiple GPA aliases + * (i.e. TDX), the gfn field does not contain the bit that selects between + * the aliases (i.e. the shared bit for TDX). + */ gfn_t gfn; /* The memslot containing gfn. May be NULL. */ @@ -268,9 +315,7 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h * * Note, all values must be greater than or equal to zero so as not to encroach - * on -errno return values. Somewhat arbitrarily use '0' for CONTINUE, which - * will allow for efficient machine code when checking for CONTINUE, e.g. - * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero. + * on -errno return values. */ enum { RET_PF_CONTINUE = 0, @@ -282,6 +327,14 @@ enum { RET_PF_SPURIOUS, }; +/* + * Define RET_PF_CONTINUE as 0 to allow for + * - efficient machine code when checking for CONTINUE, e.g. + * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero, + * - kvm_mmu_do_page_fault() to return other RET_PF_* as a positive value. + */ +static_assert(RET_PF_CONTINUE == 0); + static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { @@ -317,10 +370,19 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int r; if (vcpu->arch.mmu->root_role.direct) { - fault.gfn = fault.addr >> PAGE_SHIFT; + /* + * Things like memslots don't understand the concept of a shared + * bit. Strip it so that the GFN can be used like normal, and the + * fault.addr can be used when the shared bit is needed. + */ + fault.gfn = gpa_to_gfn(fault.addr) & ~kvm_gfn_direct_bits(vcpu->kvm); fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); } + /* + * With retpoline being active an indirect call is rather expensive, + * so do a direct call in the most common case. + */ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp) r = kvm_tdp_page_fault(vcpu, &fault); else diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index af10bc0380a3..59746854c0af 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -276,6 +276,11 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root) return spte_to_child_sp(root); } +static inline bool is_mirror_sptep(tdp_ptep_t sptep) +{ + return is_mirror_sp(sptep_to_sp(rcu_dereference(sptep))); +} + static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) { return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index 04c247bfe318..9e17bfa80901 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -12,7 +12,7 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter) { iter->sptep = iter->pt_path[iter->level - 1] + - SPTE_INDEX(iter->gfn << PAGE_SHIFT, iter->level); + SPTE_INDEX((iter->gfn | iter->gfn_bits) << PAGE_SHIFT, iter->level); iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep); } @@ -37,15 +37,17 @@ void tdp_iter_restart(struct tdp_iter *iter) * rooted at root_pt, starting with the walk to translate next_last_level_gfn. */ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, - int min_level, gfn_t next_last_level_gfn) + int min_level, gfn_t next_last_level_gfn, gfn_t gfn_bits) { if (WARN_ON_ONCE(!root || (root->role.level < 1) || - (root->role.level > PT64_ROOT_MAX_LEVEL))) { + (root->role.level > PT64_ROOT_MAX_LEVEL) || + (gfn_bits && next_last_level_gfn >= gfn_bits))) { iter->valid = false; return; } iter->next_last_level_gfn = next_last_level_gfn; + iter->gfn_bits = gfn_bits; iter->root_level = root->role.level; iter->min_level = min_level; iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt; @@ -113,7 +115,7 @@ static bool try_step_side(struct tdp_iter *iter) * Check if the iterator is already at the end of the current page * table. */ - if (SPTE_INDEX(iter->gfn << PAGE_SHIFT, iter->level) == + if (SPTE_INDEX((iter->gfn | iter->gfn_bits) << PAGE_SHIFT, iter->level) == (SPTE_ENT_PER_PAGE - 1)) return false; diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 2880fd392e0c..047b78333653 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -93,8 +93,10 @@ struct tdp_iter { tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL]; /* A pointer to the current SPTE */ tdp_ptep_t sptep; - /* The lowest GFN mapped by the current SPTE */ + /* The lowest GFN (mask bits excluded) mapped by the current SPTE */ gfn_t gfn; + /* Mask applied to convert the GFN to the mapping GPA */ + gfn_t gfn_bits; /* The level of the root page given to the iterator */ int root_level; /* The lowest level the iterator should traverse to */ @@ -122,18 +124,23 @@ struct tdp_iter { * Iterates over every SPTE mapping the GFN range [start, end) in a * preorder traversal. */ -#define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \ - for (tdp_iter_start(&iter, root, min_level, start); \ - iter.valid && iter.gfn < end; \ +#define for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end) \ + for (tdp_iter_start(&iter, root, min_level, start, kvm_gfn_root_bits(kvm, root)); \ + iter.valid && iter.gfn < end; \ tdp_iter_next(&iter)) -#define for_each_tdp_pte(iter, root, start, end) \ - for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) +#define for_each_tdp_pte_min_level_all(iter, root, min_level) \ + for (tdp_iter_start(&iter, root, min_level, 0, 0); \ + iter.valid && iter.gfn < tdp_mmu_max_gfn_exclusive(); \ + tdp_iter_next(&iter)) + +#define for_each_tdp_pte(iter, kvm, root, start, end) \ + for_each_tdp_pte_min_level(iter, kvm, root, PG_LEVEL_4K, start, end) tdp_ptep_t spte_to_child_pt(u64 pte, int level); void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, - int min_level, gfn_t next_last_level_gfn); + int min_level, gfn_t next_last_level_gfn, gfn_t gfn_bits); void tdp_iter_next(struct tdp_iter *iter); void tdp_iter_restart(struct tdp_iter *iter); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 2f15e0e33903..046b6ba31197 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -37,8 +37,8 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) * for zapping and thus puts the TDP MMU's reference to each root, i.e. * ultimately frees all roots. */ - kvm_tdp_mmu_invalidate_all_roots(kvm); - kvm_tdp_mmu_zap_invalidated_roots(kvm); + kvm_tdp_mmu_invalidate_roots(kvm, KVM_VALID_ROOTS); + kvm_tdp_mmu_zap_invalidated_roots(kvm, false); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); @@ -53,6 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) static void tdp_mmu_free_sp(struct kvm_mmu_page *sp) { + free_page((unsigned long)sp->external_spt); free_page((unsigned long)sp->spt); kmem_cache_free(mmu_page_header_cache, sp); } @@ -91,19 +92,33 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); } +static bool tdp_mmu_root_match(struct kvm_mmu_page *root, + enum kvm_tdp_mmu_root_types types) +{ + if (WARN_ON_ONCE(!(types & KVM_VALID_ROOTS))) + return false; + + if (root->role.invalid && !(types & KVM_INVALID_ROOTS)) + return false; + + if (likely(!is_mirror_sp(root))) + return types & KVM_DIRECT_ROOTS; + return types & KVM_MIRROR_ROOTS; +} + /* * Returns the next root after @prev_root (or the first root if @prev_root is - * NULL). A reference to the returned root is acquired, and the reference to - * @prev_root is released (the caller obviously must hold a reference to - * @prev_root if it's non-NULL). + * NULL) that matches with @types. A reference to the returned root is + * acquired, and the reference to @prev_root is released (the caller obviously + * must hold a reference to @prev_root if it's non-NULL). * - * If @only_valid is true, invalid roots are skipped. + * Roots that doesn't match with @types are skipped. * * Returns NULL if the end of tdp_mmu_roots was reached. */ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, struct kvm_mmu_page *prev_root, - bool only_valid) + enum kvm_tdp_mmu_root_types types) { struct kvm_mmu_page *next_root; @@ -124,7 +139,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, typeof(*next_root), link); while (next_root) { - if ((!only_valid || !next_root->role.invalid) && + if (tdp_mmu_root_match(next_root, types) && kvm_tdp_mmu_get_root(next_root)) break; @@ -149,20 +164,20 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, * If shared is set, this function is operating under the MMU lock in read * mode. */ -#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid); \ +#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _types) \ + for (_root = tdp_mmu_next_root(_kvm, NULL, _types); \ ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \ - _root = tdp_mmu_next_root(_kvm, _root, _only_valid)) \ + _root = tdp_mmu_next_root(_kvm, _root, _types)) \ if (_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) { \ } else #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ - __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true) + __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, KVM_VALID_ROOTS) #define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, false); \ + for (_root = tdp_mmu_next_root(_kvm, NULL, KVM_ALL_ROOTS); \ ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \ - _root = tdp_mmu_next_root(_kvm, _root, false)) + _root = tdp_mmu_next_root(_kvm, _root, KVM_ALL_ROOTS)) /* * Iterate over all TDP MMU roots. Requires that mmu_lock be held for write, @@ -171,18 +186,15 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, * Holding mmu_lock for write obviates the need for RCU protection as the list * is guaranteed to be stable. */ -#define __for_each_tdp_mmu_root(_kvm, _root, _as_id, _only_valid) \ +#define __for_each_tdp_mmu_root(_kvm, _root, _as_id, _types) \ list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) \ if (kvm_lockdep_assert_mmu_lock_held(_kvm, false) && \ ((_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) || \ - ((_only_valid) && (_root)->role.invalid))) { \ + !tdp_mmu_root_match((_root), (_types)))) { \ } else -#define for_each_tdp_mmu_root(_kvm, _root, _as_id) \ - __for_each_tdp_mmu_root(_kvm, _root, _as_id, false) - #define for_each_valid_tdp_mmu_root(_kvm, _root, _as_id) \ - __for_each_tdp_mmu_root(_kvm, _root, _as_id, true) + __for_each_tdp_mmu_root(_kvm, _root, _as_id, KVM_VALID_ROOTS) static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) { @@ -223,7 +235,7 @@ static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp, tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role); } -int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu) +void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool mirror) { struct kvm_mmu *mmu = vcpu->arch.mmu; union kvm_mmu_page_role role = mmu->root_role; @@ -231,6 +243,9 @@ int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; struct kvm_mmu_page *root; + if (mirror) + role.is_mirror = true; + /* * Check for an existing root before acquiring the pages lock to avoid * unnecessary serialization if multiple vCPUs are loading a new root. @@ -282,9 +297,12 @@ out_read_unlock: * and actually consuming the root if it's invalidated after dropping * mmu_lock, and the root can't be freed as this vCPU holds a reference. */ - mmu->root.hpa = __pa(root->spt); - mmu->root.pgd = 0; - return 0; + if (mirror) { + mmu->mirror_root_hpa = __pa(root->spt); + } else { + mmu->root.hpa = __pa(root->spt); + mmu->root.pgd = 0; + } } static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, @@ -322,6 +340,29 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } +static void remove_external_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte, + int level) +{ + kvm_pfn_t old_pfn = spte_to_pfn(old_spte); + int ret; + + /* + * External (TDX) SPTEs are limited to PG_LEVEL_4K, and external + * PTs are removed in a special order, involving free_external_spt(). + * But remove_external_spte() will be called on non-leaf PTEs via + * __tdp_mmu_zap_root(), so avoid the error the former would return + * in this case. + */ + if (!is_last_spte(old_spte, level)) + return; + + /* Zapping leaf spte is allowed only when write lock is held. */ + lockdep_assert_held_write(&kvm->mmu_lock); + /* Because write lock is held, operation should success. */ + ret = static_call(kvm_x86_remove_external_spte)(kvm, gfn, level, old_pfn); + KVM_BUG_ON(ret, kvm); +} + /** * handle_removed_pt() - handle a page table removed from the TDP structure * @@ -417,11 +458,81 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) } handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, old_spte, FROZEN_SPTE, level, shared); + + if (is_mirror_sp(sp)) { + KVM_BUG_ON(shared, kvm); + remove_external_spte(kvm, gfn, old_spte, level); + } + } + + if (is_mirror_sp(sp) && + WARN_ON(static_call(kvm_x86_free_external_spt)(kvm, base_gfn, sp->role.level, + sp->external_spt))) { + /* + * Failed to free page table page in mirror page table and + * there is nothing to do further. + * Intentionally leak the page to prevent the kernel from + * accessing the encrypted page. + */ + sp->external_spt = NULL; } call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); } +static void *get_external_spt(gfn_t gfn, u64 new_spte, int level) +{ + if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) { + struct kvm_mmu_page *sp = spte_to_child_sp(new_spte); + + WARN_ON_ONCE(sp->role.level + 1 != level); + WARN_ON_ONCE(sp->gfn != gfn); + return sp->external_spt; + } + + return NULL; +} + +static int __must_check set_external_spte_present(struct kvm *kvm, tdp_ptep_t sptep, + gfn_t gfn, u64 old_spte, + u64 new_spte, int level) +{ + bool was_present = is_shadow_present_pte(old_spte); + bool is_present = is_shadow_present_pte(new_spte); + bool is_leaf = is_present && is_last_spte(new_spte, level); + kvm_pfn_t new_pfn = spte_to_pfn(new_spte); + int ret = 0; + + KVM_BUG_ON(was_present, kvm); + + lockdep_assert_held(&kvm->mmu_lock); + /* + * We need to lock out other updates to the SPTE until the external + * page table has been modified. Use FROZEN_SPTE similar to + * the zapping case. + */ + if (!try_cmpxchg64(rcu_dereference(sptep), &old_spte, FROZEN_SPTE)) + return -EBUSY; + + /* + * Use different call to either set up middle level + * external page table, or leaf. + */ + if (is_leaf) { + ret = static_call(kvm_x86_set_external_spte)(kvm, gfn, level, new_pfn); + } else { + void *external_spt = get_external_spt(gfn, new_spte, level); + + KVM_BUG_ON(!external_spt, kvm); + ret = static_call(kvm_x86_link_external_spt)(kvm, gfn, level, external_spt); + } + if (ret) + __kvm_tdp_mmu_write_spte(sptep, old_spte); + else + __kvm_tdp_mmu_write_spte(sptep, new_spte); + return ret; +} + /** * handle_changed_spte - handle bookkeeping associated with an SPTE change * @kvm: kvm instance @@ -522,11 +633,10 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared); } -static inline int __must_check __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, +static inline int __must_check __tdp_mmu_set_spte_atomic(struct kvm *kvm, + struct tdp_iter *iter, u64 new_spte) { - u64 *sptep = rcu_dereference(iter->sptep); - /* * The caller is responsible for ensuring the old SPTE is not a FROZEN * SPTE. KVM should never attempt to zap or manipulate a FROZEN SPTE, @@ -535,15 +645,34 @@ static inline int __must_check __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, */ WARN_ON_ONCE(iter->yielded || is_frozen_spte(iter->old_spte)); - /* - * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and - * does not hold the mmu_lock. On failure, i.e. if a different logical - * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with - * the current value, so the caller operates on fresh data, e.g. if it - * retries tdp_mmu_set_spte_atomic() - */ - if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) - return -EBUSY; + if (is_mirror_sptep(iter->sptep) && !is_frozen_spte(new_spte)) { + int ret; + + /* + * Users of atomic zapping don't operate on mirror roots, + * so don't handle it and bug the VM if it's seen. + */ + if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm)) + return -EBUSY; + + ret = set_external_spte_present(kvm, iter->sptep, iter->gfn, + iter->old_spte, new_spte, iter->level); + if (ret) + return ret; + } else { + u64 *sptep = rcu_dereference(iter->sptep); + + /* + * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs + * and does not hold the mmu_lock. On failure, i.e. if a + * different logical CPU modified the SPTE, try_cmpxchg64() + * updates iter->old_spte with the current value, so the caller + * operates on fresh data, e.g. if it retries + * tdp_mmu_set_spte_atomic() + */ + if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) + return -EBUSY; + } return 0; } @@ -573,7 +702,7 @@ static inline int __must_check tdp_mmu_set_spte_atomic(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - ret = __tdp_mmu_set_spte_atomic(iter, new_spte); + ret = __tdp_mmu_set_spte_atomic(kvm, iter, new_spte); if (ret) return ret; @@ -613,6 +742,16 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level); handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false); + + /* + * Users that do non-atomic setting of PTEs don't operate on mirror + * roots, so don't handle it and bug the VM if it's seen. + */ + if (is_mirror_sptep(sptep)) { + KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm); + remove_external_spte(kvm, gfn, old_spte, level); + } + return old_spte; } @@ -625,18 +764,18 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter, iter->gfn, iter->level); } -#define tdp_root_for_each_pte(_iter, _root, _start, _end) \ - for_each_tdp_pte(_iter, _root, _start, _end) +#define tdp_root_for_each_pte(_iter, _kvm, _root, _start, _end) \ + for_each_tdp_pte(_iter, _kvm, _root, _start, _end) -#define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \ - tdp_root_for_each_pte(_iter, _root, _start, _end) \ +#define tdp_root_for_each_leaf_pte(_iter, _kvm, _root, _start, _end) \ + tdp_root_for_each_pte(_iter, _kvm, _root, _start, _end) \ if (!is_shadow_present_pte(_iter.old_spte) || \ !is_last_spte(_iter.old_spte, _iter.level)) \ continue; \ else -#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ - for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end) +#define tdp_mmu_for_each_pte(_iter, _kvm, _root, _start, _end) \ + for_each_tdp_pte(_iter, _kvm, _root, _start, _end) static inline bool __must_check tdp_mmu_iter_need_resched(struct kvm *kvm, struct tdp_iter *iter) @@ -705,10 +844,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, { struct tdp_iter iter; - gfn_t end = tdp_mmu_max_gfn_exclusive(); - gfn_t start = 0; - - for_each_tdp_pte_min_level(iter, root, zap_level, start, end) { + for_each_tdp_pte_min_level_all(iter, root, zap_level) { retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared)) continue; @@ -812,7 +948,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); - for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) { + for_each_tdp_pte_min_level(iter, kvm, root, PG_LEVEL_4K, start, end) { if (can_yield && tdp_mmu_iter_cond_resched(kvm, &iter, flush, false)) { flush = false; @@ -863,19 +999,21 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) struct kvm_mmu_page *root; /* - * Zap all roots, including invalid roots, as all SPTEs must be dropped - * before returning to the caller. Zap directly even if the root is - * also being zapped by a worker. Walking zapped top-level SPTEs isn't - * all that expensive and mmu_lock is already held, which means the - * worker has yielded, i.e. flushing the work instead of zapping here - * isn't guaranteed to be any faster. + * Zap all direct roots, including invalid direct roots, as all direct + * SPTEs must be dropped before returning to the caller. For TDX, mirror + * roots don't need handling in response to the mmu notifier (the caller). + * + * Zap directly even if the root is also being zapped by a concurrent + * "fast zap". Walking zapped top-level SPTEs isn't all that expensive + * and mmu_lock is already held, which means the other thread has yielded. * * A TLB flush is unnecessary, KVM zaps everything if and only the VM * is being destroyed or the userspace VMM has exited. In both cases, * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. */ lockdep_assert_held_write(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root) + __for_each_tdp_mmu_root_yield_safe(kvm, root, -1, + KVM_DIRECT_ROOTS | KVM_INVALID_ROOTS) tdp_mmu_zap_root(kvm, root, false); } @@ -883,11 +1021,14 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * Zap all invalidated roots to ensure all SPTEs are dropped before the "fast * zap" completes. */ -void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) +void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm, bool shared) { struct kvm_mmu_page *root; - read_lock(&kvm->mmu_lock); + if (shared) + read_lock(&kvm->mmu_lock); + else + write_lock(&kvm->mmu_lock); for_each_tdp_mmu_root_yield_safe(kvm, root) { if (!root->tdp_mmu_scheduled_root_to_zap) @@ -905,7 +1046,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) * that may be zapped, as such entries are associated with the * ASID on both VMX and SVM. */ - tdp_mmu_zap_root(kvm, root, true); + tdp_mmu_zap_root(kvm, root, shared); /* * The referenced needs to be put *after* zapping the root, as @@ -915,7 +1056,10 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) kvm_tdp_mmu_put_root(kvm, root); } - read_unlock(&kvm->mmu_lock); + if (shared) + read_unlock(&kvm->mmu_lock); + else + write_unlock(&kvm->mmu_lock); } /* @@ -928,11 +1072,19 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference. * See kvm_tdp_mmu_alloc_root(). */ -void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) +void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm, + enum kvm_tdp_mmu_root_types root_types) { struct kvm_mmu_page *root; /* + * Invalidating invalid roots doesn't make sense, prevent developers from + * having to think about it. + */ + if (WARN_ON_ONCE(root_types & KVM_INVALID_ROOTS)) + root_types &= ~KVM_INVALID_ROOTS; + + /* * mmu_lock must be held for write to ensure that a root doesn't become * invalid while there are active readers (invalidating a root while * there are active readers may or may not be problematic in practice, @@ -953,6 +1105,9 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) * or get/put references to roots. */ list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { + if (!tdp_mmu_root_match(root, root_types)) + continue; + /* * Note, invalid roots can outlive a memslot update! Invalid * roots must be *zapped* before the memslot update completes, @@ -1068,7 +1223,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, */ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - struct kvm_mmu *mmu = vcpu->arch.mmu; + struct kvm_mmu_page *root = tdp_mmu_get_root_for_fault(vcpu, fault); struct kvm *kvm = vcpu->kvm; struct tdp_iter iter; struct kvm_mmu_page *sp; @@ -1080,7 +1235,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) rcu_read_lock(); - tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) { + tdp_mmu_for_each_pte(iter, kvm, root, fault->gfn, fault->gfn + 1) { int r; if (fault->nx_huge_page_workaround_enabled) @@ -1107,13 +1262,18 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) */ sp = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_child_sp(sp, &iter); + if (is_mirror_sp(sp)) + kvm_mmu_alloc_external_spt(vcpu, sp); sp->nx_huge_page_disallowed = fault->huge_page_disallowed; - if (is_shadow_present_pte(iter.old_spte)) + if (is_shadow_present_pte(iter.old_spte)) { + /* Don't support large page for mirrored roots (TDX) */ + KVM_BUG_ON(is_mirror_sptep(iter.sptep), vcpu->kvm); r = tdp_mmu_split_huge_page(kvm, &iter, sp, true); - else + } else { r = tdp_mmu_link_sp(kvm, &iter, sp, true); + } /* * Force the guest to retry if installing an upper level SPTE @@ -1148,12 +1308,16 @@ retry: return ret; } +/* Used by mmu notifier via kvm_unmap_gfn_range() */ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { + enum kvm_tdp_mmu_root_types types; struct kvm_mmu_page *root; - __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) + types = kvm_gfn_range_filter_to_root_types(kvm, range->attr_filter) | KVM_INVALID_ROOTS; + + __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, types) flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, range->may_block, flush); @@ -1193,20 +1357,24 @@ static bool __kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool test_only) { + enum kvm_tdp_mmu_root_types types; struct kvm_mmu_page *root; struct tdp_iter iter; bool ret = false; + types = kvm_gfn_range_filter_to_root_types(kvm, range->attr_filter); + /* * Don't support rescheduling, none of the MMU notifiers that funnel * into this helper allow blocking; it'd be dead, wasteful code. Note, * this helper must NOT be used to unmap GFNs, as it processes only * valid roots! */ - for_each_valid_tdp_mmu_root(kvm, root, range->slot->as_id) { + WARN_ON(types & ~KVM_VALID_ROOTS); + __for_each_tdp_mmu_root(kvm, root, range->slot->as_id, types) { guard(rcu)(); - tdp_root_for_each_leaf_pte(iter, root, range->start, range->end) { + tdp_root_for_each_leaf_pte(iter, kvm, root, range->start, range->end) { if (!is_accessed_spte(iter.old_spte)) continue; @@ -1247,7 +1415,7 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); - for_each_tdp_pte_min_level(iter, root, min_level, start, end) { + for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end) { retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; @@ -1366,7 +1534,7 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, * level above the target level (e.g. splitting a 1GB to 512 2MB pages, * and then splitting each of those to 512 4KB pages). */ - for_each_tdp_pte_min_level(iter, root, target_level + 1, start, end) { + for_each_tdp_pte_min_level(iter, kvm, root, target_level + 1, start, end) { retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared)) continue; @@ -1464,7 +1632,7 @@ static void clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); - tdp_root_for_each_pte(iter, root, start, end) { + tdp_root_for_each_pte(iter, kvm, root, start, end) { retry: if (!is_shadow_present_pte(iter.old_spte) || !is_last_spte(iter.old_spte, iter.level)) @@ -1512,7 +1680,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); - tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), + tdp_root_for_each_leaf_pte(iter, kvm, root, gfn + __ffs(mask), gfn + BITS_PER_LONG) { if (!mask) break; @@ -1566,7 +1734,7 @@ static int tdp_mmu_make_huge_spte(struct kvm *kvm, gfn_t end = start + KVM_PAGES_PER_HPAGE(parent->level); struct tdp_iter iter; - tdp_root_for_each_leaf_pte(iter, root, start, end) { + tdp_root_for_each_leaf_pte(iter, kvm, root, start, end) { /* * Use the parent iterator when checking for forward progress so * that KVM doesn't get stuck continuously trying to yield (i.e. @@ -1600,7 +1768,7 @@ static void recover_huge_pages_range(struct kvm *kvm, rcu_read_lock(); - for_each_tdp_pte_min_level(iter, root, PG_LEVEL_2M, start, end) { + for_each_tdp_pte_min_level(iter, kvm, root, PG_LEVEL_2M, start, end) { retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) { flush = false; @@ -1681,7 +1849,7 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); - for_each_tdp_pte_min_level(iter, root, min_level, gfn, gfn + 1) { + for_each_tdp_pte_min_level(iter, kvm, root, min_level, gfn, gfn + 1) { if (!is_shadow_present_pte(iter.old_spte) || !is_last_spte(iter.old_spte, iter.level)) continue; @@ -1729,14 +1897,14 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); struct tdp_iter iter; - struct kvm_mmu *mmu = vcpu->arch.mmu; gfn_t gfn = addr >> PAGE_SHIFT; int leaf = -1; *root_level = vcpu->arch.mmu->root_role.level; - tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { + tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) { leaf = iter.level; sptes[leaf] = iter.old_spte; } @@ -1758,11 +1926,12 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gfn_t gfn, u64 *spte) { + /* Fast pf is not supported for mirrored roots */ + struct kvm_mmu_page *root = tdp_mmu_get_root(vcpu, KVM_DIRECT_ROOTS); struct tdp_iter iter; - struct kvm_mmu *mmu = vcpu->arch.mmu; tdp_ptep_t sptep = NULL; - tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { + tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) { *spte = iter.old_spte; sptep = iter.sptep; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index f03ca0dd13d9..52acf99d40a0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -10,7 +10,7 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm); void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); -int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu); +void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool private); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) { @@ -19,11 +19,56 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root); +enum kvm_tdp_mmu_root_types { + KVM_INVALID_ROOTS = BIT(0), + KVM_DIRECT_ROOTS = BIT(1), + KVM_MIRROR_ROOTS = BIT(2), + KVM_VALID_ROOTS = KVM_DIRECT_ROOTS | KVM_MIRROR_ROOTS, + KVM_ALL_ROOTS = KVM_VALID_ROOTS | KVM_INVALID_ROOTS, +}; + +static inline enum kvm_tdp_mmu_root_types kvm_gfn_range_filter_to_root_types(struct kvm *kvm, + enum kvm_gfn_range_filter process) +{ + enum kvm_tdp_mmu_root_types ret = 0; + + if (!kvm_has_mirrored_tdp(kvm)) + return KVM_DIRECT_ROOTS; + + if (process & KVM_FILTER_PRIVATE) + ret |= KVM_MIRROR_ROOTS; + if (process & KVM_FILTER_SHARED) + ret |= KVM_DIRECT_ROOTS; + + WARN_ON_ONCE(!ret); + + return ret; +} + +static inline struct kvm_mmu_page *tdp_mmu_get_root_for_fault(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) +{ + if (unlikely(!kvm_is_addr_direct(vcpu->kvm, fault->addr))) + return root_to_sp(vcpu->arch.mmu->mirror_root_hpa); + + return root_to_sp(vcpu->arch.mmu->root.hpa); +} + +static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu, + enum kvm_tdp_mmu_root_types type) +{ + if (unlikely(type == KVM_MIRROR_ROOTS)) + return root_to_sp(vcpu->arch.mmu->mirror_root_hpa); + + return root_to_sp(vcpu->arch.mmu->root.hpa); +} + bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush); bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); void kvm_tdp_mmu_zap_all(struct kvm *kvm); -void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm); -void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm); +void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm, + enum kvm_tdp_mmu_root_types root_types); +void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm, bool shared); int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 47a46283c866..75e9cfc689f8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -797,7 +797,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); kvm_pmu_call(init)(vcpu); - kvm_pmu_refresh(vcpu); } /* Release perf_events for vPMCs that have been unused for a full time slice. */ diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index e46220ece83c..fde0ae986003 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,23 +7,6 @@ #include <asm/cpufeatures.h> /* - * Hardware-defined CPUID leafs that are either scattered by the kernel or are - * unknown to the kernel, but need to be directly used by KVM. Note, these - * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. - */ -enum kvm_only_cpuid_leafs { - CPUID_12_EAX = NCAPINTS, - CPUID_7_1_EDX, - CPUID_8000_0007_EDX, - CPUID_8000_0022_EAX, - CPUID_7_2_EDX, - CPUID_24_0_EBX, - NR_KVM_CPU_CAPS, - - NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, -}; - -/* * Define a KVM-only feature flag. * * For features that are scattered by cpufeatures.h, __feature_translate() also @@ -145,7 +128,10 @@ static __always_inline u32 __feature_translate(int x86_feature) static __always_inline u32 __feature_leaf(int x86_feature) { - return __feature_translate(x86_feature) / 32; + u32 x86_leaf = __feature_translate(x86_feature) / 32; + + reverse_cpuid_check(x86_leaf); + return x86_leaf; } /* @@ -168,7 +154,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_featu { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); return reverse_cpuid[x86_leaf]; } diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 85241c0c7f56..e0ab7df27b66 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -283,7 +283,7 @@ void enter_smm(struct kvm_vcpu *vcpu) memset(smram.bytes, 0, sizeof(smram.bytes)); #ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, &smram.smram64); else #endif @@ -353,7 +353,7 @@ void enter_smm(struct kvm_vcpu *vcpu) kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); #ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) if (kvm_x86_call(set_efer)(vcpu, 0)) goto error; #endif @@ -586,7 +586,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) * supports long mode. */ #ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) { struct kvm_segment cs_desc; unsigned long cr4; @@ -609,7 +609,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); #ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) { unsigned long cr4, efer; /* Clear CR4.PAE before clearing EFER.LME. */ @@ -634,7 +634,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; #ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) ret = rsm_load_state_64(ctxt, &smram.smram64); else #endif diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b708bdf7eaff..d77b094d9a4d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -111,7 +111,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm) { - if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD)) + if (!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD)) return true; if (!nested_npt_enabled(svm)) @@ -594,7 +594,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DR); } - if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && + if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { /* * Reserved bits of DEBUGCTL are ignored. Be consistent with @@ -651,7 +651,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. */ - if (guest_can_use(vcpu, X86_FEATURE_VGIF) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK)) int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); else @@ -689,7 +689,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.tsc_offset = vcpu->arch.tsc_offset; - if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) && svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) nested_svm_update_tsc_ratio_msr(vcpu); @@ -710,7 +710,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, * what a nrips=0 CPU would do (L1 is responsible for advancing RIP * prior to injecting the event). */ - if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) vmcb02->control.next_rip = svm->nested.ctl.next_rip; else if (boot_cpu_has(X86_FEATURE_NRIPS)) vmcb02->control.next_rip = vmcb12_rip; @@ -720,7 +720,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_injected = true; svm->soft_int_csbase = vmcb12_csbase; svm->soft_int_old_rip = vmcb12_rip; - if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) svm->soft_int_next_rip = svm->nested.ctl.next_rip; else svm->soft_int_next_rip = vmcb12_rip; @@ -728,18 +728,18 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.virt_ext = vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK; - if (guest_can_use(vcpu, X86_FEATURE_LBRV)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV)) vmcb02->control.virt_ext |= (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; - if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER)) pause_count12 = svm->nested.ctl.pause_filter_count; else pause_count12 = 0; - if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD)) pause_thresh12 = svm->nested.ctl.pause_filter_thresh; else pause_thresh12 = 0; @@ -1026,7 +1026,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (vmcb12->control.exit_code != SVM_EXIT_ERR) nested_save_pending_event_to_vmcb12(svm, vmcb12); - if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) vmcb12->control.next_rip = vmcb02->control.next_rip; vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; @@ -1065,7 +1065,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (!nested_exit_on_intr(svm)) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && + if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { svm_copy_lbrs(vmcb12, vmcb02); svm_update_lbrv(vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 22d5a65b410c..288f7f2a46f2 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -46,7 +46,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, switch (msr) { case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: - if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE)) return NULL; /* * Each PMU counter has a pair of CTL and CTR MSRs. CTLn @@ -109,7 +109,7 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3: return pmu->version > 0; case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: - return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE); + return guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE); case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: @@ -179,7 +179,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) union cpuid_0x80000022_ebx ebx; pmu->version = 1; - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFMON_V2)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_PERFMON_V2)) { pmu->version = 2; /* * Note, PERFMON_V2 is also in 0x80000022.0x0, i.e. the guest @@ -189,7 +189,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) x86_feature_cpuid(X86_FEATURE_PERFMON_V2).index); ebx.full = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0)->ebx; pmu->nr_arch_gp_counters = ebx.split.num_core_pmc; - } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { + } else if (guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE; } else { pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fe6cc763fd51..a2a794c32050 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3051,11 +3051,11 @@ out: min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", - sev_es_supported ? "enabled" : "disabled", + str_enabled_disabled(sev_es_supported), min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); if (boot_cpu_has(X86_FEATURE_SEV_SNP)) pr_info("SEV-SNP %s (ASIDs %u - %u)\n", - sev_snp_supported ? "enabled" : "disabled", + str_enabled_disabled(sev_snp_supported), min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); sev_enabled = sev_supported; @@ -3627,13 +3627,20 @@ static int snp_begin_psc_msr(struct vcpu_svm *svm, u64 ghcb_msr) return 1; /* resume guest */ } - if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) { + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { set_ghcb_msr(svm, GHCB_MSR_PSC_RESP_ERROR); return 1; /* resume guest */ } vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; + /* + * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2) + * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that + * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting + * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU. + */ + vcpu->run->hypercall.ret = 0; vcpu->run->hypercall.args[0] = gpa; vcpu->run->hypercall.args[1] = 1; vcpu->run->hypercall.args[2] = (op == SNP_PAGE_STATE_PRIVATE) @@ -3710,7 +3717,7 @@ static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc) bool huge; u64 gfn; - if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) { + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); return 1; } @@ -3797,6 +3804,13 @@ next_range: case VMGEXIT_PSC_OP_SHARED: vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; + /* + * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2) + * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that + * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting + * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU. + */ + vcpu->run->hypercall.ret = 0; vcpu->run->hypercall.args[0] = gfn_to_gpa(gfn); vcpu->run->hypercall.args[1] = npages; vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE @@ -4435,8 +4449,8 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { - bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + bool v_tsc_aux = guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID); set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); } @@ -4445,16 +4459,15 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if * the host/guest supports its use. * - * guest_can_use() checks a number of requirements on the host/guest to - * ensure that MSR_IA32_XSS is available, but it might report true even - * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host - * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better - * to further check that the guest CPUID actually supports - * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved - * guests will still get intercepted and caught in the normal - * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. + * KVM treats the guest as being capable of using XSAVES even if XSAVES + * isn't enabled in guest CPUID as there is no intercept for XSAVES, + * i.e. the guest can use XSAVES/XRSTOR to read/write XSS if XSAVE is + * exposed to the guest and XSAVES is supported in hardware. Condition + * full XSS passthrough on the guest being able to use XSAVES *and* + * XSAVES being exposed to the guest so that KVM can at least honor + * guest CPUID for RDMSR and WRMSR. */ - if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); else diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 21dacd312779..7640a84e554a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -28,6 +28,7 @@ #include <linux/rwsem.h> #include <linux/cc_platform.h> #include <linux/smp.h> +#include <linux/string_choices.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -284,8 +285,6 @@ u32 svm_msrpm_offset(u32 msr) return MSR_INVALID; } -static void svm_flush_tlb_current(struct kvm_vcpu *vcpu); - static int get_npt_level(void) { #ifdef CONFIG_X86_64 @@ -1049,7 +1048,7 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || - (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) && + (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); if (enable_lbrv == current_enable_lbrv) @@ -1187,14 +1186,14 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, */ if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) { if (!npt_enabled || - !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID)) + !guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_INVPCID)) svm_set_intercept(svm, INTERCEPT_INVPCID); else svm_clr_intercept(svm, INTERCEPT_INVPCID); } if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) { - if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP)) svm_clr_intercept(svm, INTERCEPT_RDTSCP); else svm_set_intercept(svm, INTERCEPT_RDTSCP); @@ -1921,9 +1920,6 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; unsigned long old_cr4 = vcpu->arch.cr4; - if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb_current(vcpu); - vcpu->arch.cr4 = cr4; if (!npt_enabled) { cr4 |= X86_CR4_PAE; @@ -2864,7 +2860,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_AMD64_TSC_RATIO: if (!msr_info->host_initiated && - !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR)) return 1; msr_info->data = svm->tsc_ratio_msr; break; @@ -2940,7 +2936,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_AMD64_VIRT_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD)) return 1; msr_info->data = svm->virt_spec_ctrl; @@ -3024,7 +3020,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) switch (ecx) { case MSR_AMD64_TSC_RATIO: - if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) { + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR)) { if (!msr->host_initiated) return 1; @@ -3046,7 +3042,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->tsc_ratio_msr = data; - if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) && is_guest_mode(vcpu)) nested_svm_update_tsc_ratio_msr(vcpu); @@ -3091,7 +3087,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_AMD64_VIRT_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD)) return 1; if (data & ~SPEC_CTRL_SSBD) @@ -3263,7 +3259,7 @@ static int invpcid_interception(struct kvm_vcpu *vcpu) unsigned long type; gva_t gva; - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -3533,6 +3529,21 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, *error_code = 0; } +static void svm_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, + u32 *error_code) +{ + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + *intr_info = control->event_inj; + + if ((*intr_info & SVM_EXITINTINFO_VALID) && + (*intr_info & SVM_EXITINTINFO_VALID_ERR)) + *error_code = control->event_inj_err; + else + *error_code = 0; + +} + static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4392,27 +4403,17 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give * the guest read/write access to the host's XSS. */ - if (boot_cpu_has(X86_FEATURE_XSAVE) && - boot_cpu_has(X86_FEATURE_XSAVES) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) - kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES); - - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS); - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR); - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV); + guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES, + boot_cpu_has(X86_FEATURE_XSAVES) && + guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE)); /* * Intercept VMLOAD if the vCPU model is Intel in order to emulate that * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing * SVM on Intel is bonkers and extremely unlikely to work). */ - if (!guest_cpuid_is_intel_compatible(vcpu)) - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); - - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER); - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD); - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF); - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI); + if (guest_cpuid_is_intel_compatible(vcpu)) + guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); svm_recalc_instruction_intercepts(vcpu, svm); @@ -4422,7 +4423,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) if (boot_cpu_has(X86_FEATURE_FLUSH_L1D)) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0, - !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); + !!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D)); if (sev_guest(vcpu->kvm)) sev_vcpu_after_set_cpuid(svm); @@ -4673,7 +4674,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) * responsible for ensuring nested SVM and SMIs are mutually exclusive. */ - if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) return 1; smram->smram64.svm_guest_flag = 1; @@ -4720,14 +4721,14 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) const struct kvm_smram_state_64 *smram64 = &smram->smram64; - if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) return 0; /* Non-zero if SMI arrived while vCPU was in guest mode. */ if (!smram64->svm_guest_flag) return 0; - if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SVM)) return 1; if (!(smram64->efer & EFER_SVME)) @@ -4790,9 +4791,15 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len) { + struct vcpu_svm *svm = to_svm(vcpu); bool smep, smap, is_user; u64 error_code; + /* Check that emulation is possible during event vectoring */ + if ((svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK) && + !kvm_can_emulate_event_vectoring(emul_type)) + return X86EMUL_UNHANDLEABLE_VECTORING; + /* Emulation is always possible when KVM has access to all guest state. */ if (!sev_guest(vcpu->kvm)) return X86EMUL_CONTINUE; @@ -4889,7 +4896,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * In addition, don't apply the erratum workaround if the #NPF occurred * while translating guest page tables (see below). */ - error_code = to_svm(vcpu)->vmcb->control.exit_info_1; + error_code = svm->vmcb->control.exit_info_1; if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK)) goto resume_guest; @@ -5077,6 +5084,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS, .get_exit_info = svm_get_exit_info, + .get_entry_info = svm_get_entry_info, .vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid, @@ -5328,7 +5336,7 @@ static __init int svm_hardware_setup(void) /* Force VM NPT level equal to the host's paging level */ kvm_configure_mmu(npt_enabled, get_npt_level(), get_npt_level(), PG_LEVEL_1G); - pr_info("Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); + pr_info("Nested Paging %s\n", str_enabled_disabled(npt_enabled)); /* Setup shadow_me_value and shadow_me_mask */ kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 43fa6a16eb19..9d7cdb8fbf87 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -358,39 +358,32 @@ static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm) return &to_kvm_svm(kvm)->sev_info; } +#ifdef CONFIG_KVM_AMD_SEV static __always_inline bool sev_guest(struct kvm *kvm) { -#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; -#else - return false; -#endif } - static __always_inline bool sev_es_guest(struct kvm *kvm) { -#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->es_active && !WARN_ON_ONCE(!sev->active); -#else - return false; -#endif } static __always_inline bool sev_snp_guest(struct kvm *kvm) { -#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) && !WARN_ON_ONCE(!sev_es_guest(kvm)); +} #else - return false; +#define sev_guest(kvm) false +#define sev_es_guest(kvm) false +#define sev_snp_guest(kvm) false #endif -} static inline bool ghcb_gpa_is_registered(struct vcpu_svm *svm, u64 val) { @@ -502,7 +495,7 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit) static inline bool nested_vgif_enabled(struct vcpu_svm *svm) { - return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) && + return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VGIF) && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK); } @@ -554,7 +547,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm) static inline bool nested_vnmi_enabled(struct vcpu_svm *svm) { - return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) && + return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VNMI) && (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK); } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index d3aeffd6ae75..0b844cb97978 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -22,15 +22,22 @@ TRACE_EVENT(kvm_entry, __field( unsigned int, vcpu_id ) __field( unsigned long, rip ) __field( bool, immediate_exit ) + __field( u32, intr_info ) + __field( u32, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->rip = kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; + + kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, + &__entry->error_code); ), - TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip, + TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s", + __entry->vcpu_id, __entry->rip, + __entry->intr_info, __entry->error_code, __entry->immediate_exit ? "[immediate exit]" : "") ); @@ -308,12 +315,14 @@ TRACE_EVENT(name, \ __field( u32, intr_info ) \ __field( u32, error_code ) \ __field( unsigned int, vcpu_id ) \ + __field( u64, requests ) \ ), \ \ TP_fast_assign( \ __entry->guest_rip = kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ + __entry->requests = READ_ONCE(vcpu->requests); \ kvm_x86_call(get_exit_info)(vcpu, \ &__entry->exit_reason, \ &__entry->info1, \ @@ -323,11 +332,13 @@ TRACE_EVENT(name, \ ), \ \ TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \ - "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \ + "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \ + "requests 0x%016llx", \ __entry->vcpu_id, \ kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \ __entry->guest_rip, __entry->info1, __entry->info2, \ - __entry->intr_info, __entry->error_code) \ + __entry->intr_info, __entry->error_code, \ + __entry->requests) \ ) /* diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h index a87407412615..11a339009781 100644 --- a/arch/x86/kvm/vmx/hyperv.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -42,7 +42,7 @@ static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx) return vmx->nested.hv_evmcs; } -static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu) +static inline bool guest_cpu_cap_has_evmcs(struct kvm_vcpu *vcpu) { /* * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 92d35cc6cd15..2427f918e763 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -100,7 +100,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .apicv_pre_state_restore = vmx_apicv_pre_state_restore, .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, - .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_interrupt = vmx_deliver_interrupt, @@ -111,6 +110,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .get_mt_mask = vmx_get_mt_mask, .get_exit_info = vmx_get_exit_info, + .get_entry_info = vmx_get_entry_info, .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, @@ -126,7 +126,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .check_intercept = vmx_check_intercept, .handle_exit_irqoff = vmx_handle_exit_irqoff, - .cpu_dirty_log_size = PML_ENTITY_NUM, + .cpu_dirty_log_size = PML_LOG_NR_ENTRIES, .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, .nested_ops = &vmx_nested_ops, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index aa78b6f38dfe..8a7af02d466e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -257,7 +257,7 @@ static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr) * state. It is possible that the area will stay mapped as * vmx->nested.hv_evmcs but this shouldn't be a problem. */ - if (!guest_cpuid_has_evmcs(vcpu) || + if (!guest_cpu_cap_has_evmcs(vcpu) || !evmptr_is_valid(nested_get_evmptr(vcpu))) return false; @@ -2089,7 +2089,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( bool evmcs_gpa_changed = false; u64 evmcs_gpa; - if (likely(!guest_cpuid_has_evmcs(vcpu))) + if (likely(!guest_cpu_cap_has_evmcs(vcpu))) return EVMPTRLD_DISABLED; evmcs_gpa = nested_get_evmptr(vcpu); @@ -2992,7 +2992,7 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, return -EINVAL; #ifdef CONFIG_KVM_HYPERV - if (guest_cpuid_has_evmcs(vcpu)) + if (guest_cpu_cap_has_evmcs(vcpu)) return nested_evmcs_check_controls(vmcs12); #endif @@ -3287,7 +3287,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) * L2 was running), map it here to make sure vmcs12 changes are * properly reflected. */ - if (guest_cpuid_has_evmcs(vcpu) && + if (guest_cpu_cap_has_evmcs(vcpu) && vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) { enum nested_evmptrld_status evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, false); @@ -3442,7 +3442,7 @@ static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) if (!nested_cpu_has_pml(vmcs12)) return 0; - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { + if (vmcs12->guest_pml_index >= PML_LOG_NR_ENTRIES) { vmx->nested.pml_full = true; return 1; } @@ -3481,14 +3481,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } -static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) -{ - u8 rvi = vmx_get_rvi(); - u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); - - return ((rvi & 0xf0) > (vppr & 0xf0)); -} - static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); @@ -3508,7 +3500,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); enum vm_entry_failure_code entry_failure_code; - bool evaluate_pending_interrupts; union vmx_exit_reason exit_reason = { .basic = EXIT_REASON_INVALID_STATE, .failed_vmentry = 1, @@ -3527,13 +3518,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, kvm_service_local_tlb_flush_requests(vcpu); - evaluate_pending_interrupts = exec_controls_get(vmx) & - (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); - if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) - evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); - if (!evaluate_pending_interrupts) - evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu); - if (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); @@ -3616,9 +3600,13 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, * Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI * when it executed VMLAUNCH/VMRESUME, as entering non-root mode can * effectively unblock various events, e.g. INIT/SIPI cause VM-Exit - * unconditionally. + * unconditionally. Take care to pull data from vmcs01 as appropriate, + * e.g. when checking for interrupt windows, as vmcs02 is now loaded. */ - if (unlikely(evaluate_pending_interrupts)) + if ((__exec_controls_get(&vmx->vmcs01) & (CPU_BASED_INTR_WINDOW_EXITING | + CPU_BASED_NMI_WINDOW_EXITING)) || + kvm_apic_has_pending_init_or_sipi(vcpu) || + kvm_apic_has_interrupt(vcpu)) kvm_make_request(KVM_REQ_EVENT, vcpu); /* @@ -3751,14 +3739,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (unlikely(status != NVMX_VMENTRY_SUCCESS)) goto vmentry_failed; - /* Emulate processing of posted interrupts on VM-Enter. */ - if (nested_cpu_has_posted_intr(vmcs12) && - kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) { - vmx->nested.pi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv); - } - /* Hide L1D cache contents from the nested guest. */ vmx->vcpu.arch.l1tf_flush_l1d = true; @@ -4220,13 +4200,25 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) */ bool block_nested_exceptions = vmx->nested.nested_run_pending; /* - * New events (not exceptions) are only recognized at instruction + * Events that don't require injection, i.e. that are virtualized by + * hardware, aren't blocked by a pending VM-Enter as KVM doesn't need + * to regain control in order to deliver the event, and hardware will + * handle event ordering, e.g. with respect to injected exceptions. + * + * But, new events (not exceptions) are only recognized at instruction * boundaries. If an event needs reinjection, then KVM is handling a - * VM-Exit that occurred _during_ instruction execution; new events are - * blocked until the instruction completes. + * VM-Exit that occurred _during_ instruction execution; new events, + * irrespective of whether or not they're injected, are blocked until + * the instruction completes. + */ + bool block_non_injected_events = kvm_event_needs_reinjection(vcpu); + /* + * Inject events are blocked by nested VM-Enter, as KVM is responsible + * for managing priority between concurrent events, i.e. KVM needs to + * wait until after VM-Enter completes to deliver injected events. */ bool block_nested_events = block_nested_exceptions || - kvm_event_needs_reinjection(vcpu); + block_non_injected_events; if (lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &apic->pending_events)) { @@ -4338,18 +4330,26 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { int irq; - if (block_nested_events) - return -EBUSY; - if (!nested_exit_on_intr(vcpu)) + if (!nested_exit_on_intr(vcpu)) { + if (block_nested_events) + return -EBUSY; + goto no_vmexit; + } if (!nested_exit_intr_ack_set(vcpu)) { + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; } irq = kvm_cpu_get_extint(vcpu); if (irq != -1) { + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); return 0; @@ -4368,11 +4368,22 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) * and enabling posted interrupts requires ACK-on-exit. */ if (irq == vmx->nested.posted_intr_nv) { + /* + * Nested posted interrupts are delivered via RVI, i.e. + * aren't injected by KVM, and so can be queued even if + * manual event injection is disallowed. + */ + if (block_non_injected_events) + return -EBUSY; + vmx->nested.pi_pending = true; kvm_apic_clear_irr(vcpu, irq); goto no_vmexit; } + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); @@ -5015,7 +5026,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, * doesn't isolate different VMCSs, i.e. in this case, doesn't provide * separate modes for L2 vs L1. */ - if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL)) indirect_branch_prediction_barrier(); /* Update any VMCS fields that might have changed while L2 ran */ @@ -5050,6 +5061,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); } + if (vmx->nested.update_vmcs01_hwapic_isr) { + vmx->nested.update_vmcs01_hwapic_isr = false; + kvm_apic_update_hwapic_isr(vcpu); + } + if ((vm_exit_reason != -1) && (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx))) vmx->nested.need_vmcs12_to_shadow_sync = true; @@ -6279,7 +6295,7 @@ static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu, { u32 encls_leaf; - if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) || + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) || !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING)) return false; @@ -6617,7 +6633,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (guest_can_use(vcpu, X86_FEATURE_VMX) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr; @@ -6758,7 +6774,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) return -EINVAL; } else { - if (!guest_can_use(vcpu, X86_FEATURE_VMX)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX)) return -EINVAL; if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) @@ -6792,7 +6808,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && - (!guest_can_use(vcpu, X86_FEATURE_VMX) || + (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) || !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9c9d4a336166..77012b2eca0e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -110,7 +110,7 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) return 0; return vcpu->arch.perf_capabilities; @@ -160,7 +160,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; break; case MSR_IA32_DS_AREA: - ret = guest_cpuid_has(vcpu, X86_FEATURE_DS); + ret = guest_cpu_cap_has(vcpu, X86_FEATURE_DS); break; case MSR_PEBS_DATA_CFG: perf_capabilities = vcpu_get_perf_capabilities(vcpu); diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index b352a3ba7354..9961e07cf071 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -122,7 +122,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) * likely than a bad userspace address. */ if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) && - guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) { + guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2)) { memset(&ex, 0, sizeof(ex)); ex.vector = PF_VECTOR; ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK | @@ -365,7 +365,7 @@ static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) return true; if (leaf >= EAUG && leaf <= EMODT) - return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); + return guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2); return false; } @@ -381,8 +381,8 @@ int handle_encls(struct kvm_vcpu *vcpu) { u32 leaf = (u32)kvm_rax_read(vcpu); - if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) || - !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { + if (!enable_sgx || !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) || + !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) { kvm_queue_exception(vcpu, UD_VECTOR); } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) || !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) { @@ -479,15 +479,15 @@ void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (!cpu_has_vmx_encls_vmexit()) return; - if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) && sgx_enabled_in_guest_bios(vcpu)) { - if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) { bitmap &= ~GENMASK_ULL(ETRACK, ECREATE); if (sgx_intercept_encls_ecreate(vcpu)) bitmap |= (1 << ECREATE); } - if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2)) bitmap &= ~GENMASK_ULL(EMODT, EAUG); /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 893366e53732..f72835e85b6d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1636,7 +1636,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) * result in a #GP unless the same write also clears TraceEn. */ if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) + (data & RTIT_CTL_TRACEEN) && + data != vmx->pt_desc.guest.ctl) return 1; /* @@ -1705,6 +1706,12 @@ int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, kvm_queue_exception(vcpu, UD_VECTOR); return X86EMUL_PROPAGATE_FAULT; } + + /* Check that emulation is possible during event vectoring */ + if ((to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + !kvm_can_emulate_event_vectoring(emul_type)) + return X86EMUL_UNHANDLEABLE_VECTORING; + return X86EMUL_CONTINUE; } @@ -1908,8 +1915,8 @@ static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx) vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx)); vmx_setup_uret_msr(vmx, MSR_TSC_AUX, - guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); + guest_cpu_cap_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || + guest_cpu_cap_has(&vmx->vcpu, X86_FEATURE_RDPID)); /* * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new @@ -2062,7 +2069,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) + !guest_cpu_cap_has(vcpu, X86_FEATURE_MPX))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2078,13 +2085,13 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC)) return 1; msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; break; case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: - if (!guest_can_use(vcpu, X86_FEATURE_VMX)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX)) return 1; if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, &msr_info->data)) @@ -2097,7 +2104,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * sanity checking and refuse to boot. Filter all unsupported * features out. */ - if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu)) + if (!msr_info->host_initiated && guest_cpu_cap_has_evmcs(vcpu)) nested_evmcs_filter_control_msr(vcpu, msr_info->index, &msr_info->data); #endif @@ -2167,7 +2174,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, u64 data) { #ifdef CONFIG_X86_64 - if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) return (u32)data; #endif return (unsigned long)data; @@ -2178,7 +2185,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated u64 debugctl = 0; if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && - (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) + (host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) && @@ -2282,7 +2289,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) + !guest_cpu_cap_has(vcpu, X86_FEATURE_MPX))) return 1; if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) || (data & MSR_IA32_BNDCFGS_RSVD)) @@ -2384,7 +2391,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * behavior, but it's close enough. */ if (!msr_info->host_initiated && - (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) || + (!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC) || ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) && !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED)))) return 1; @@ -2394,7 +2401,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!msr_info->host_initiated) return 1; /* they are read-only */ - if (!guest_can_use(vcpu, X86_FEATURE_VMX)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX)) return 1; return vmx_set_vmx_msr(vcpu, msr_index, data); case MSR_IA32_RTIT_CTL: @@ -2468,9 +2475,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((data & PERF_CAP_PEBS_MASK) != (kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK)) return 1; - if (!guest_cpuid_has(vcpu, X86_FEATURE_DS)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_DS)) return 1; - if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_DTES64)) return 1; if (!cpuid_model_is_consistent(vcpu)) return 1; @@ -4590,10 +4597,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, bool __enabled; \ \ if (cpu_has_vmx_##name()) { \ - if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \ - __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \ - else \ - __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \ + __enabled = guest_cpu_cap_has(__vcpu, X86_FEATURE_##feat_name); \ vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\ __enabled, exiting); \ } \ @@ -4669,8 +4673,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) */ if (cpu_has_vmx_rdtscp()) { bool rdpid_or_rdtscp_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID); vmx_adjust_secondary_exec_control(vmx, &exec_control, SECONDARY_EXEC_ENABLE_RDTSCP, @@ -4820,7 +4824,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (enable_pml) { vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX); } vmx_write_encls_bitmap(&vmx->vcpu, NULL); @@ -5959,7 +5963,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) } operand; int gpr_index; - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -6049,7 +6053,7 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) /* * When nested=0, all VMX instruction VM Exits filter here. The handlers - * are overwritten by nested_vmx_setup() when nested=1. + * are overwritten by nested_vmx_hardware_setup() when nested=1. */ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) { @@ -6191,6 +6195,15 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, } } +void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code) +{ + *intr_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); + if (is_exception_with_error_code(*intr_info)) + *error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE); + else + *error_code = 0; +} + static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) { if (vmx->pml_pg) { @@ -6202,32 +6215,40 @@ static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u16 pml_idx, pml_tail_index; u64 *pml_buf; - u16 pml_idx; + int i; pml_idx = vmcs_read16(GUEST_PML_INDEX); /* Do nothing if PML buffer is empty */ - if (pml_idx == (PML_ENTITY_NUM - 1)) + if (pml_idx == PML_HEAD_INDEX) return; + /* + * PML index always points to the next available PML buffer entity + * unless PML log has just overflowed. + */ + pml_tail_index = (pml_idx >= PML_LOG_NR_ENTRIES) ? 0 : pml_idx + 1; - /* PML index always points to next available PML buffer entity */ - if (pml_idx >= PML_ENTITY_NUM) - pml_idx = 0; - else - pml_idx++; - + /* + * PML log is written backwards: the CPU first writes the entry 511 + * then the entry 510, and so on. + * + * Read the entries in the same order they were written, to ensure that + * the dirty ring is filled in the same order the CPU wrote them. + */ pml_buf = page_address(vmx->pml_pg); - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { + + for (i = PML_HEAD_INDEX; i >= pml_tail_index; i--) { u64 gpa; - gpa = pml_buf[pml_idx]; + gpa = pml_buf[i]; WARN_ON(gpa & (PAGE_SIZE - 1)); kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); } /* reset PML index */ - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX); } static void vmx_dump_sel(char *name, uint32_t sel) @@ -6543,33 +6564,15 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - /* - * Note: - * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by - * delivery event since it indicates guest is accessing MMIO. - * The vm-exit can be triggered again after return to guest that - * will cause infinite loop. - */ if ((vectoring_info & VECTORING_INFO_VALID_MASK) && (exit_reason.basic != EXIT_REASON_EXCEPTION_NMI && exit_reason.basic != EXIT_REASON_EPT_VIOLATION && exit_reason.basic != EXIT_REASON_PML_FULL && exit_reason.basic != EXIT_REASON_APIC_ACCESS && exit_reason.basic != EXIT_REASON_TASK_SWITCH && - exit_reason.basic != EXIT_REASON_NOTIFY)) { - int ndata = 3; - - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.data[0] = vectoring_info; - vcpu->run->internal.data[1] = exit_reason.full; - vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu); - if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { - vcpu->run->internal.data[ndata++] = - vmcs_read64(GUEST_PHYSICAL_ADDRESS); - } - vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; - vcpu->run->internal.ndata = ndata; + exit_reason.basic != EXIT_REASON_NOTIFY && + exit_reason.basic != EXIT_REASON_EPT_MISCONFIG)) { + kvm_prepare_event_vectoring_exit(vcpu, INVALID_GPA); return 0; } @@ -6862,11 +6865,32 @@ void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) read_unlock(&vcpu->kvm->mmu_lock); } -void vmx_hwapic_isr_update(int max_isr) +void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) { u16 status; u8 old; + /* + * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI + * is only relevant for if and only if Virtual Interrupt Delivery is + * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's + * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested + * VM-Exit, otherwise L1 with run with a stale SVI. + */ + if (is_guest_mode(vcpu)) { + /* + * KVM is supposed to forward intercepted L2 EOIs to L1 if VID + * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. + * Note, userspace can stuff state while L2 is active; assert + * that VID is disabled if and only if the vCPU is in KVM_RUN + * to avoid false positives if userspace is setting APIC state. + */ + WARN_ON_ONCE(vcpu->wants_to_run && + nested_cpu_has_vid(get_vmcs12(vcpu))); + to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; + return; + } + if (max_isr == -1) max_isr = 0; @@ -6896,20 +6920,6 @@ static void vmx_set_rvi(int vector) } } -void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) -{ - /* - * When running L2, updating RVI is only relevant when - * vmcs12 virtual-interrupt-delivery enabled. - * However, it can be enabled only when L1 also - * intercepts external-interrupts and in that case - * we should not update vmcs02 RVI but instead intercept - * interrupt. Therefore, do nothing when running L2. - */ - if (!is_guest_mode(vcpu)) - vmx_set_rvi(max_irr); -} - int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7828,12 +7838,8 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be * set if and only if XSAVE is supported. */ - if (boot_cpu_has(X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES); - - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX); - kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM); + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE)) + guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES); vmx_setup_uret_msrs(vmx); @@ -7841,7 +7847,7 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmcs_set_secondary_exec_control(vmx, vmx_secondary_exec_control(vmx)); - if (guest_can_use(vcpu, X86_FEATURE_VMX)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX)) vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; @@ -7850,25 +7856,25 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); - if (guest_can_use(vcpu, X86_FEATURE_VMX)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX)) nested_vmx_cr_fixed1_bits_update(vcpu); if (boot_cpu_has(X86_FEATURE_INTEL_PT) && - guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) + guest_cpu_cap_has(vcpu, X86_FEATURE_INTEL_PT)) update_intel_pt_cfg(vcpu); if (boot_cpu_has(X86_FEATURE_RTM)) { struct vmx_uret_msr *msr; msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); if (msr) { - bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); + bool enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_RTM); vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); } } if (kvm_cpu_cap_has(X86_FEATURE_XFD)) vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, - !guest_cpuid_has(vcpu, X86_FEATURE_XFD)); + !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)); if (boot_cpu_has(X86_FEATURE_IBPB)) vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W, @@ -7876,17 +7882,17 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) if (boot_cpu_has(X86_FEATURE_FLUSH_L1D)) vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, - !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); + !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D)); set_cr4_guest_host_mask(vmx); vmx_write_encls_bitmap(vcpu, NULL); - if (guest_cpuid_has(vcpu, X86_FEATURE_SGX)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX)) vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED; else vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED; - if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC)) vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_LC_ENABLED; else @@ -8597,7 +8603,7 @@ static void __vmx_exit(void) vmx_cleanup_l1d_flush(); } -static void vmx_exit(void) +static void __exit vmx_exit(void) { kvm_exit(); __vmx_exit(); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 43f573f6ca46..8b111ce1087c 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -176,6 +176,7 @@ struct nested_vmx { bool reload_vmcs01_apic_access_page; bool update_vmcs01_cpu_dirty_logging; bool update_vmcs01_apicv_status; + bool update_vmcs01_hwapic_isr; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to @@ -330,7 +331,10 @@ struct vcpu_vmx { bool ple_window_dirty; /* Support for PML */ -#define PML_ENTITY_NUM 512 +#define PML_LOG_NR_ENTRIES 512 + /* PML is written backwards: this is the first entry written by the CPU */ +#define PML_HEAD_INDEX (PML_LOG_NR_ENTRIES-1) + struct page *pml_pg; /* apic deadline value in host tsc */ diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index a55981c5216e..ce3295a67c04 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -47,8 +47,7 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu); void vmx_migrate_timers(struct kvm_vcpu *vcpu); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu); -void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); -void vmx_hwapic_isr_update(int max_isr); +void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu); void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector); @@ -104,8 +103,11 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr); u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); + void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code); +void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code); + u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); void vmx_write_tsc_offset(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c79a8cc57ba4..6d4a6734b2d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -119,8 +119,6 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; - #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) #define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE @@ -1179,7 +1177,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) if (vcpu->arch.xcr0 != kvm_host.xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); - if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && vcpu->arch.ia32_xss != kvm_host.xss) wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } @@ -1188,7 +1186,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) vcpu->arch.pkru != vcpu->arch.host_pkru && ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) - write_pkru(vcpu->arch.pkru); + wrpkru(vcpu->arch.pkru); } EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); @@ -1202,7 +1200,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) { vcpu->arch.pkru = rdpkru(); if (vcpu->arch.pkru != vcpu->arch.host_pkru) - write_pkru(vcpu->arch.host_pkru); + wrpkru(vcpu->arch.host_pkru); } if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) { @@ -1210,7 +1208,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) if (vcpu->arch.xcr0 != kvm_host.xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, kvm_host.xcr0); - if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && vcpu->arch.ia32_xss != kvm_host.xss) wrmsrl(MSR_IA32_XSS, kvm_host.xss); } @@ -1283,18 +1281,6 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); -bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - if (cr4 & cr4_reserved_bits) - return false; - - if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4); - static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { return __kvm_is_valid_cr4(vcpu, cr4) && @@ -1516,10 +1502,10 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { u64 fixed = DR6_FIXED_1; - if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; - if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) fixed |= DR6_BUS_LOCK; return fixed; } @@ -1695,20 +1681,20 @@ static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (efer & EFER_AUTOIBRS && !guest_cpuid_has(vcpu, X86_FEATURE_AUTOIBRS)) + if (efer & EFER_AUTOIBRS && !guest_cpu_cap_has(vcpu, X86_FEATURE_AUTOIBRS)) return false; - if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) + if (efer & EFER_FFXSR && !guest_cpu_cap_has(vcpu, X86_FEATURE_FXSR_OPT)) return false; - if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + if (efer & EFER_SVME && !guest_cpu_cap_has(vcpu, X86_FEATURE_SVM)) return false; if (efer & (EFER_LME | EFER_LMA) && - !guest_cpuid_has(vcpu, X86_FEATURE_LM)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) return false; - if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX)) + if (efer & EFER_NX && !guest_cpu_cap_has(vcpu, X86_FEATURE_NX)) return false; return true; @@ -1850,8 +1836,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, return 1; if (!host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) return 1; /* @@ -1908,8 +1894,8 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, return 1; if (!host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) return 1; break; } @@ -2095,7 +2081,7 @@ EXPORT_SYMBOL_GPL(kvm_handle_invalid_op); static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn) { if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) && - !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT)) return kvm_handle_invalid_op(vcpu); pr_warn_once("%s instruction emulated as NOP!\n", insn); @@ -3767,13 +3753,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_ARCH_CAPABILITIES: if (!msr_info->host_initiated || - !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) return KVM_MSR_RET_UNSUPPORTED; vcpu->arch.arch_capabilities = data; break; case MSR_IA32_PERF_CAPABILITIES: if (!msr_info->host_initiated || - !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) return KVM_MSR_RET_UNSUPPORTED; if (data & ~kvm_caps.supported_perf_cap) @@ -3797,11 +3783,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((!guest_has_pred_cmd_msr(vcpu))) return 1; - if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB)) reserved_bits |= PRED_CMD_IBPB; - if (!guest_cpuid_has(vcpu, X86_FEATURE_SBPB)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB)) reserved_bits |= PRED_CMD_SBPB; } @@ -3822,7 +3808,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } case MSR_IA32_FLUSH_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D)) return 1; if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH)) @@ -3873,7 +3859,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_set_lapic_tscdeadline_msr(vcpu, data); break; case MSR_IA32_TSC_ADJUST: - if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSC_ADJUST)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); @@ -3900,7 +3886,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) && ((old_val ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3)) return 1; vcpu->arch.ia32_misc_enable_msr = data; kvm_update_cpuid_runtime(vcpu); @@ -4077,12 +4063,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_pr_unimpl_wrmsr(vcpu, msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.length = data; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.status = data; break; @@ -4101,7 +4087,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #ifdef CONFIG_X86_64 case MSR_IA32_XFD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)) return 1; if (data & ~kvm_guest_supported_xfd(vcpu)) @@ -4111,7 +4097,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_XFD_ERR: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)) return 1; if (data & ~kvm_guest_supported_xfd(vcpu)) @@ -4226,12 +4212,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.microcode_version; break; case MSR_IA32_ARCH_CAPABILITIES: - if (!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) return KVM_MSR_RET_UNSUPPORTED; msr_info->data = vcpu->arch.arch_capabilities; break; case MSR_IA32_PERF_CAPABILITIES: - if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) return KVM_MSR_RET_UNSUPPORTED; msr_info->data = vcpu->arch.perf_capabilities; break; @@ -4432,12 +4418,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0xbe702111; break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.length; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.status; break; @@ -4456,14 +4442,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #ifdef CONFIG_X86_64 case MSR_IA32_XFD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)) return 1; msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd; break; case MSR_IA32_XFD_ERR: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) + !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)) return 1; msr_info->data = vcpu->arch.guest_fpu.xfd_err; @@ -4545,6 +4531,20 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static u64 kvm_get_allowed_disable_exits(void) +{ + u64 r = KVM_X86_DISABLE_EXITS_PAUSE; + + if (!mitigate_smt_rsb) { + r |= KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_CSTATE; + + if (kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; + } + return r; +} + #ifdef CONFIG_KVM_HYPERV static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 __user *cpuid_arg) @@ -4687,15 +4687,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_VALID_FLAGS; break; case KVM_CAP_X86_DISABLE_EXITS: - r = KVM_X86_DISABLE_EXITS_PAUSE; - - if (!mitigate_smt_rsb) { - r |= KVM_X86_DISABLE_EXITS_HLT | - KVM_X86_DISABLE_EXITS_CSTATE; - - if (kvm_can_mwait_in_guest()) - r |= KVM_X86_DISABLE_EXITS_MWAIT; - } + r = kvm_get_allowed_disable_exits(); break; case KVM_CAP_X86_SMM: if (!IS_ENABLED(CONFIG_KVM_SMM)) @@ -5822,9 +5814,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; - if (vcpu->arch.pv_cpuid.enforce) - kvm_update_pv_runtime(vcpu); - return 0; default: return -EINVAL; @@ -6542,30 +6531,32 @@ split_irqchip_unlock: break; case KVM_CAP_X86_DISABLE_EXITS: r = -EINVAL; - if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + if (cap->args[0] & ~kvm_get_allowed_disable_exits()) break; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) - kvm->arch.pause_in_guest = true; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + goto disable_exits_unlock; #define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." - if (!mitigate_smt_rsb) { - if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && - (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) - pr_warn_once(SMT_RSB_MSG); - - if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && - kvm_can_mwait_in_guest()) - kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) - kvm->arch.hlt_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) - kvm->arch.cstate_in_guest = true; - } + if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) && + cpu_smt_possible() && + (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) + pr_warn_once(SMT_RSB_MSG); + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) + kvm->arch.pause_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; r = 0; +disable_exits_unlock: + mutex_unlock(&kvm->lock); break; case KVM_CAP_MSR_PLATFORM_INFO: kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; @@ -8511,17 +8502,17 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt) { - return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE); + return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE); } static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) { - return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); + return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); } static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt) { - return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID); + return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID); } static bool emulator_guest_cpuid_is_intel_compatible(struct x86_emulate_ctxt *ctxt) @@ -8813,6 +8804,28 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); +void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + u32 reason, intr_info, error_code; + struct kvm_run *run = vcpu->run; + u64 info1, info2; + int ndata = 0; + + kvm_x86_call(get_exit_info)(vcpu, &reason, &info1, &info2, + &intr_info, &error_code); + + run->internal.data[ndata++] = info2; + run->internal.data[ndata++] = reason; + run->internal.data[ndata++] = info1; + run->internal.data[ndata++] = gpa; + run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; + + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; + run->internal.ndata = ndata; +} +EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit); + static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { struct kvm *kvm = vcpu->kvm; @@ -9085,6 +9098,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT) return 1; + if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa, + emulation_type)) + return 1; + + if (r == X86EMUL_UNHANDLEABLE_VECTORING) { + kvm_prepare_event_vectoring_exit(vcpu, cr2_or_gpa); + return 0; + } + WARN_ON_ONCE(r != X86EMUL_UNHANDLEABLE); return handle_emulation_failure(vcpu, emulation_type); } @@ -9773,10 +9795,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; -#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) - cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); -#undef __kvm_cpu_cap_has - if (kvm_caps.has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9979,17 +9997,19 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) if (!is_64_bit_hypercall(vcpu)) ret = (u32)ret; kvm_rax_write(vcpu, ret); - ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl) +int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl, + int (*complete_hypercall)(struct kvm_vcpu *)) { unsigned long ret; + ++vcpu->stat.hypercalls; + trace_kvm_hypercall(nr, a0, a1, a2, a3); if (!op_64_bit) { @@ -10041,7 +10061,7 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, u64 gpa = a0, npages = a1, attrs = a2; ret = -KVM_ENOSYS; - if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) break; if (!PAGE_ALIGNED(gpa) || !npages || @@ -10052,6 +10072,13 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; + /* + * In principle this should have been -KVM_ENOSYS, but userspace (QEMU <=9.2) + * assumed that vcpu->run->hypercall.ret is never changed by KVM and thus that + * it was always zero on KVM_EXIT_HYPERCALL. Since KVM is now overwriting + * vcpu->run->hypercall.ret, ensuring that it is zero to not break QEMU. + */ + vcpu->run->hypercall.ret = 0; vcpu->run->hypercall.args[0] = gpa; vcpu->run->hypercall.args[1] = npages; vcpu->run->hypercall.args[2] = attrs; @@ -10060,8 +10087,7 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE; WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); - vcpu->arch.complete_userspace_io = complete_hypercall_exit; - /* stat is incremented on completion. */ + vcpu->arch.complete_userspace_io = complete_hypercall; return 0; } default: @@ -10070,41 +10096,23 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, } out: - ++vcpu->stat.hypercalls; - return ret; + vcpu->run->hypercall.ret = ret; + return 1; } -EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); +EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { - unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit; - int cpl; - if (kvm_xen_hypercall_enabled(vcpu->kvm)) return kvm_xen_hypercall(vcpu); if (kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); - nr = kvm_rax_read(vcpu); - a0 = kvm_rbx_read(vcpu); - a1 = kvm_rcx_read(vcpu); - a2 = kvm_rdx_read(vcpu); - a3 = kvm_rsi_read(vcpu); - op_64_bit = is_64_bit_hypercall(vcpu); - cpl = kvm_x86_call(get_cpl)(vcpu); - - ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); - if (nr == KVM_HC_MAP_GPA_RANGE && !ret) - /* MAP_GPA tosses the request to the user space. */ - return 0; - - if (!op_64_bit) - ret = (u32)ret; - kvm_rax_write(vcpu, ret); - - return kvm_skip_emulated_instruction(vcpu); + return __kvm_emulate_hypercall(vcpu, rax, rbx, rcx, rdx, rsi, + is_64_bit_hypercall(vcpu), + kvm_x86_call(get_cpl)(vcpu), + complete_hypercall_exit); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); @@ -11463,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; int r; + r = kvm_mmu_post_init_vm(vcpu->kvm); + if (r) + return r; + vcpu_load(vcpu); kvm_sigset_activate(vcpu); kvm_run->flags = 0; @@ -12276,9 +12288,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto free_emulate_ctxt; } - vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); - vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); - kvm_async_pf_hash_reset(vcpu); if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) { @@ -12301,6 +12310,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_xen_init_vcpu(vcpu); vcpu_load(vcpu); + kvm_vcpu_after_set_cpuid(vcpu); kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz); kvm_vcpu_reset(vcpu, false); kvm_init_mmu(vcpu); @@ -12742,7 +12752,8 @@ out: int kvm_arch_post_init_vm(struct kvm *kvm) { - return kvm_mmu_post_init_vm(kvm); + once_init(&kvm->arch.nx_once); + return 0; } static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) @@ -12800,7 +12811,8 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *slot; - /* Called with kvm->slots_lock held. */ + lockdep_assert_held(&kvm->slots_lock); + if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) return ERR_PTR_USR(-EINVAL); @@ -12833,7 +12845,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, m.guest_phys_addr = gpa; m.userspace_addr = hva; m.memory_size = size; - r = __kvm_set_memory_region(kvm, &m); + r = kvm_set_internal_memslot(kvm, &m); if (r < 0) return ERR_PTR_USR(r); } @@ -12934,7 +12946,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, /* * Clear out the previous array pointers for the KVM_MR_MOVE case. The - * old arrays will be freed by __kvm_set_memory_region() if installing + * old arrays will be freed by kvm_set_memory_region() if installing * the new memslot is successful. */ memset(&slot->arch, 0, sizeof(slot->arch)); @@ -13027,6 +13039,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn()) return -EINVAL; + if (kvm_is_gfn_alias(kvm, new->base_gfn + new->npages - 1)) + return -EINVAL; + return kvm_alloc_memslot_metadata(kvm, new); } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index ec623d23d13d..91e50a513100 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -550,7 +550,6 @@ static inline void kvm_machine_check(void) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); @@ -577,6 +576,11 @@ enum kvm_msr_access { #define KVM_MSR_RET_UNSUPPORTED 2 #define KVM_MSR_RET_FILTERED 3 +static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + return !(cr4 & vcpu->arch.cr4_guest_rsvd_bits); +} + #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ u64 __reserved_bits = CR4_RESERVED_BITS; \ @@ -612,4 +616,32 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, void *data, unsigned int count, int in); +static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr) +{ + return kvm->arch.hypercall_exit_enabled & BIT(hc_nr); +} + +int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl, + int (*complete_hypercall)(struct kvm_vcpu *)); + +#define __kvm_emulate_hypercall(_vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, complete_hypercall) \ +({ \ + int __ret; \ + \ + __ret = ____kvm_emulate_hypercall(_vcpu, \ + kvm_##nr##_read(_vcpu), kvm_##a0##_read(_vcpu), \ + kvm_##a1##_read(_vcpu), kvm_##a2##_read(_vcpu), \ + kvm_##a3##_read(_vcpu), op_64_bit, cpl, \ + complete_hypercall); \ + \ + if (__ret > 0) \ + __ret = complete_hypercall(_vcpu); \ + __ret; \ +}) + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + #endif diff --git a/include/linux/call_once.h b/include/linux/call_once.h new file mode 100644 index 000000000000..6261aa0b3fb0 --- /dev/null +++ b/include/linux/call_once.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_CALL_ONCE_H +#define _LINUX_CALL_ONCE_H + +#include <linux/types.h> +#include <linux/mutex.h> + +#define ONCE_NOT_STARTED 0 +#define ONCE_RUNNING 1 +#define ONCE_COMPLETED 2 + +struct once { + atomic_t state; + struct mutex lock; +}; + +static inline void __once_init(struct once *once, const char *name, + struct lock_class_key *key) +{ + atomic_set(&once->state, ONCE_NOT_STARTED); + __mutex_init(&once->lock, name, key); +} + +#define once_init(once) \ +do { \ + static struct lock_class_key __key; \ + __once_init((once), #once, &__key); \ +} while (0) + +static inline void call_once(struct once *once, void (*cb)(struct once *)) +{ + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return; + + guard(mutex)(&once->lock); + WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); + if (atomic_read(&once->state) != ONCE_NOT_STARTED) + return; + + atomic_set(&once->state, ONCE_RUNNING); + cb(once); + atomic_set_release(&once->state, ONCE_COMPLETED); +} + +#endif /* _LINUX_CALL_ONCE_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 401439bb21e3..3cb9a32a6330 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -255,11 +255,17 @@ union kvm_mmu_notifier_arg { unsigned long attributes; }; +enum kvm_gfn_range_filter { + KVM_FILTER_SHARED = BIT(0), + KVM_FILTER_PRIVATE = BIT(1), +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; union kvm_mmu_notifier_arg arg; + enum kvm_gfn_range_filter attr_filter; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); @@ -596,7 +602,12 @@ struct kvm_memory_slot { #ifdef CONFIG_KVM_PRIVATE_MEM struct { - struct file __rcu *file; + /* + * Writes protected by kvm->slots_lock. Acquiring a + * reference via kvm_gmem_get_file() is protected by + * either kvm->slots_lock or kvm->srcu. + */ + struct file *file; pgoff_t pgoff; } gmem; #endif @@ -963,6 +974,15 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { int num_vcpus = atomic_read(&kvm->online_vcpus); + + /* + * Explicitly verify the target vCPU is online, as the anti-speculation + * logic only limits the CPU's ability to speculate, e.g. given a "bad" + * index, clamping the index to 0 would return vCPU0, not NULL. + */ + if (i >= num_vcpus) + return NULL; + i = array_index_nospec(i, num_vcpus); /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ @@ -970,9 +990,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ - (atomic_read(&kvm->online_vcpus) - 1)) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + if (atomic_read(&kvm->online_vcpus)) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { @@ -1183,7 +1204,7 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn * -- just change its flags * * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): + * differentiation is the best we can do for kvm_set_memory_region(): */ enum kvm_mr_change { KVM_MR_CREATE, @@ -1192,10 +1213,8 @@ enum kvm_mr_change { KVM_MR_FLAGS_ONLY, }; -int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem); -int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem); +int kvm_set_internal_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 502ea63b5d2e..45e6d8fca9b9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -1070,6 +1066,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7f57abf936e7..1d41a046a7bf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -9,3 +9,4 @@ !config !settings !Makefile +!Makefile.kvm
\ No newline at end of file diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 41593d2e7de9..20af35a91d6f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,347 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only -include ../../../build/Build.include - -all: - top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),x86) - ARCH_DIR := x86_64 -else ifeq ($(ARCH),arm64) - ARCH_DIR := aarch64 -else ifeq ($(ARCH),s390) - ARCH_DIR := s390x -else - ARCH_DIR := $(ARCH) -endif - -LIBKVM += lib/assert.c -LIBKVM += lib/elf.c -LIBKVM += lib/guest_modes.c -LIBKVM += lib/io.c -LIBKVM += lib/kvm_util.c -LIBKVM += lib/memstress.c -LIBKVM += lib/guest_sprintf.c -LIBKVM += lib/rbtree.c -LIBKVM += lib/sparsebit.c -LIBKVM += lib/test_util.c -LIBKVM += lib/ucall_common.c -LIBKVM += lib/userfaultfd_util.c - -LIBKVM_STRING += lib/string_override.c - -LIBKVM_x86_64 += lib/x86_64/apic.c -LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/hyperv.c -LIBKVM_x86_64 += lib/x86_64/memstress.c -LIBKVM_x86_64 += lib/x86_64/pmu.c -LIBKVM_x86_64 += lib/x86_64/processor.c -LIBKVM_x86_64 += lib/x86_64/sev.c -LIBKVM_x86_64 += lib/x86_64/svm.c -LIBKVM_x86_64 += lib/x86_64/ucall.c -LIBKVM_x86_64 += lib/x86_64/vmx.c - -LIBKVM_aarch64 += lib/aarch64/gic.c -LIBKVM_aarch64 += lib/aarch64/gic_v3.c -LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c -LIBKVM_aarch64 += lib/aarch64/handlers.S -LIBKVM_aarch64 += lib/aarch64/processor.c -LIBKVM_aarch64 += lib/aarch64/spinlock.c -LIBKVM_aarch64 += lib/aarch64/ucall.c -LIBKVM_aarch64 += lib/aarch64/vgic.c - -LIBKVM_s390x += lib/s390x/diag318_test_handler.c -LIBKVM_s390x += lib/s390x/processor.c -LIBKVM_s390x += lib/s390x/ucall.c -LIBKVM_s390x += lib/s390x/facility.c - -LIBKVM_riscv += lib/riscv/handlers.S -LIBKVM_riscv += lib/riscv/processor.c -LIBKVM_riscv += lib/riscv/ucall.c - -# Non-compiled test targets -TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh - -# Compiled test targets -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test -TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test -TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test -TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test -TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test -TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test -TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test -TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test -TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test -TEST_GEN_PROGS_x86_64 += x86_64/amx_test -TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test -TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test -TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test -TEST_GEN_PROGS_x86_64 += access_tracking_perf_test -TEST_GEN_PROGS_x86_64 += coalesced_io_test -TEST_GEN_PROGS_x86_64 += demand_paging_test -TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_perf_test -TEST_GEN_PROGS_x86_64 += guest_memfd_test -TEST_GEN_PROGS_x86_64 += guest_print_test -TEST_GEN_PROGS_x86_64 += hardware_disable_test -TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += max_guest_memory_test -TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test -TEST_GEN_PROGS_x86_64 += memslot_perf_test -TEST_GEN_PROGS_x86_64 += rseq_test -TEST_GEN_PROGS_x86_64 += set_memory_region_test -TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test -TEST_GEN_PROGS_x86_64 += pre_fault_memory_test - -# Compiled outputs used by test targets -TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test - -TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases -TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/hypercalls -TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort -TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test -TEST_GEN_PROGS_aarch64 += aarch64/psci_test -TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter -TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config -TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq -TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress -TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access -TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 -TEST_GEN_PROGS_aarch64 += access_tracking_perf_test -TEST_GEN_PROGS_aarch64 += arch_timer -TEST_GEN_PROGS_aarch64 += coalesced_io_test -TEST_GEN_PROGS_aarch64 += demand_paging_test -TEST_GEN_PROGS_aarch64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_perf_test -TEST_GEN_PROGS_aarch64 += guest_print_test -TEST_GEN_PROGS_aarch64 += get-reg-list -TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus -TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test -TEST_GEN_PROGS_aarch64 += rseq_test -TEST_GEN_PROGS_aarch64 += set_memory_region_test -TEST_GEN_PROGS_aarch64 += steal_time -TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test - -TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += s390x/sync_regs_test -TEST_GEN_PROGS_s390x += s390x/tprot -TEST_GEN_PROGS_s390x += s390x/cmma_test -TEST_GEN_PROGS_s390x += s390x/debug_test -TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test -TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test -TEST_GEN_PROGS_s390x += s390x/ucontrol_test -TEST_GEN_PROGS_s390x += demand_paging_test -TEST_GEN_PROGS_s390x += dirty_log_test -TEST_GEN_PROGS_s390x += guest_print_test -TEST_GEN_PROGS_s390x += kvm_create_max_vcpus -TEST_GEN_PROGS_s390x += kvm_page_table_test -TEST_GEN_PROGS_s390x += rseq_test -TEST_GEN_PROGS_s390x += set_memory_region_test -TEST_GEN_PROGS_s390x += kvm_binary_stats_test - -TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test -TEST_GEN_PROGS_riscv += riscv/ebreak_test -TEST_GEN_PROGS_riscv += arch_timer -TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += steal_time - -SPLIT_TESTS += arch_timer -SPLIT_TESTS += get-reg-list - -TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) -LIBKVM += $(LIBKVM_$(ARCH_DIR)) - -OVERRIDE_TARGETS = 1 - -# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most -# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, -# which causes the environment variable to override the makefile). -include ../lib.mk - -INSTALL_HDR_PATH = $(top_srcdir)/usr -LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) +# Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include -else -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include + ARCH := x86 endif -CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -fno-builtin-memcmp -fno-builtin-memcpy \ - -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -fno-strict-aliasing \ - -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ - -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \ - -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) -ifeq ($(ARCH),s390) - CFLAGS += -march=z10 -endif -ifeq ($(ARCH),x86) -ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) - CFLAGS += -march=x86-64-v2 -endif -endif -ifeq ($(ARCH),arm64) -tools_dir := $(top_srcdir)/tools -arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ - -ifneq ($(abs_objdir),) -arm64_hdr_outdir := $(abs_objdir)/tools/ +include Makefile.kvm else -arm64_hdr_outdir := $(tools_dir)/ -endif - -GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ -CFLAGS += -I$(GEN_HDRS) - -$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +# Empty targets for unsupported architectures +all: +clean: endif - -no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -# On s390, build the testcases KVM-enabled -pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) - -LDLIBS += -ldl -LDFLAGS += -pthread $(no-pie-option) $(pgste-option) - -LIBKVM_C := $(filter %.c,$(LIBKVM)) -LIBKVM_S := $(filter %.S,$(LIBKVM)) -LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) -LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) -SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) -SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS)) - -TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) -TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) -TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) --include $(TEST_DEP_FILES) - -$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) - -$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ -$(TEST_GEN_PROGS_EXTENDED): %: %.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ -$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ -$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -EXTRA_CLEAN += $(GEN_HDRS) \ - $(LIBKVM_OBJS) \ - $(SPLIT_TEST_GEN_OBJ) \ - $(TEST_DEP_FILES) \ - $(TEST_GEN_OBJ) \ - cscope.* - -$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -# Compile the string overrides as freestanding to prevent the compiler from -# generating self-referential code, e.g. without "freestanding" the compiler may -# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. -$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ - -$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) -$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) -$(TEST_GEN_PROGS): $(LIBKVM_OBJS) -$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) -$(TEST_GEN_OBJ): $(GEN_HDRS) - -cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. -cscope: - $(RM) cscope.* - (find $(include_paths) -name '*.h' \ - -exec realpath --relative-base=$(PWD) {} \;; \ - find . -name '*.c' \ - -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files - cscope -b diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm new file mode 100644 index 000000000000..4277b983cace --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -0,0 +1,330 @@ +# SPDX-License-Identifier: GPL-2.0-only +include ../../../build/Build.include + +all: + +LIBKVM += lib/assert.c +LIBKVM += lib/elf.c +LIBKVM += lib/guest_modes.c +LIBKVM += lib/io.c +LIBKVM += lib/kvm_util.c +LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c +LIBKVM += lib/rbtree.c +LIBKVM += lib/sparsebit.c +LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c + +LIBKVM_STRING += lib/string_override.c + +LIBKVM_x86 += lib/x86/apic.c +LIBKVM_x86 += lib/x86/handlers.S +LIBKVM_x86 += lib/x86/hyperv.c +LIBKVM_x86 += lib/x86/memstress.c +LIBKVM_x86 += lib/x86/pmu.c +LIBKVM_x86 += lib/x86/processor.c +LIBKVM_x86 += lib/x86/sev.c +LIBKVM_x86 += lib/x86/svm.c +LIBKVM_x86 += lib/x86/ucall.c +LIBKVM_x86 += lib/x86/vmx.c + +LIBKVM_arm64 += lib/arm64/gic.c +LIBKVM_arm64 += lib/arm64/gic_v3.c +LIBKVM_arm64 += lib/arm64/gic_v3_its.c +LIBKVM_arm64 += lib/arm64/handlers.S +LIBKVM_arm64 += lib/arm64/processor.c +LIBKVM_arm64 += lib/arm64/spinlock.c +LIBKVM_arm64 += lib/arm64/ucall.c +LIBKVM_arm64 += lib/arm64/vgic.c + +LIBKVM_s390 += lib/s390/diag318_test_handler.c +LIBKVM_s390 += lib/s390/processor.c +LIBKVM_s390 += lib/s390/ucall.c +LIBKVM_s390 += lib/s390/facility.c + +LIBKVM_riscv += lib/riscv/handlers.S +LIBKVM_riscv += lib/riscv/processor.c +LIBKVM_riscv += lib/riscv/ucall.c + +# Non-compiled test targets +TEST_PROGS_x86 += x86/nx_huge_pages_test.sh + +# Compiled test targets +TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test +TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fix_hypercall_test +TEST_GEN_PROGS_x86 += x86/hwcr_msr_test +TEST_GEN_PROGS_x86 += x86/hyperv_clock +TEST_GEN_PROGS_x86 += x86/hyperv_cpuid +TEST_GEN_PROGS_x86 += x86/hyperv_evmcs +TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls +TEST_GEN_PROGS_x86 += x86/hyperv_features +TEST_GEN_PROGS_x86 += x86/hyperv_ipi +TEST_GEN_PROGS_x86 += x86/hyperv_svm_test +TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush +TEST_GEN_PROGS_x86 += x86/kvm_clock_test +TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/platform_info_test +TEST_GEN_PROGS_x86 += x86/pmu_counters_test +TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test +TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test +TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id +TEST_GEN_PROGS_x86 += x86/set_sregs_test +TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test +TEST_GEN_PROGS_x86 += x86/smm_test +TEST_GEN_PROGS_x86 += x86/state_test +TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test +TEST_GEN_PROGS_x86 += x86/svm_vmcall_test +TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test +TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test +TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync +TEST_GEN_PROGS_x86 += x86/sync_regs_test +TEST_GEN_PROGS_x86 += x86/ucna_injection_test +TEST_GEN_PROGS_x86 += x86/userspace_io_test +TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test +TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test +TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test +TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test +TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test +TEST_GEN_PROGS_x86 += x86/xapic_ipi_test +TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test +TEST_GEN_PROGS_x86 += x86/xss_msr_test +TEST_GEN_PROGS_x86 += x86/debug_regs +TEST_GEN_PROGS_x86 += x86/tsc_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test +TEST_GEN_PROGS_x86 += x86/xen_shinfo_test +TEST_GEN_PROGS_x86 += x86/xen_vmcall_test +TEST_GEN_PROGS_x86 += x86/sev_init2_tests +TEST_GEN_PROGS_x86 += x86/sev_migrate_tests +TEST_GEN_PROGS_x86 += x86/sev_smoke_test +TEST_GEN_PROGS_x86 += x86/amx_test +TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test +TEST_GEN_PROGS_x86 += x86/triple_fault_event_test +TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += access_tracking_perf_test +TEST_GEN_PROGS_x86 += coalesced_io_test +TEST_GEN_PROGS_x86 += demand_paging_test +TEST_GEN_PROGS_x86 += dirty_log_test +TEST_GEN_PROGS_x86 += dirty_log_perf_test +TEST_GEN_PROGS_x86 += guest_memfd_test +TEST_GEN_PROGS_x86 += guest_print_test +TEST_GEN_PROGS_x86 += hardware_disable_test +TEST_GEN_PROGS_x86 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86 += kvm_page_table_test +TEST_GEN_PROGS_x86 += memslot_modification_stress_test +TEST_GEN_PROGS_x86 += memslot_perf_test +TEST_GEN_PROGS_x86 += mmu_stress_test +TEST_GEN_PROGS_x86 += rseq_test +TEST_GEN_PROGS_x86 += set_memory_region_test +TEST_GEN_PROGS_x86 += steal_time +TEST_GEN_PROGS_x86 += kvm_binary_stats_test +TEST_GEN_PROGS_x86 += system_counter_offset_test +TEST_GEN_PROGS_x86 += pre_fault_memory_test + +# Compiled outputs used by test targets +TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test + +TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs +TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hypercalls +TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/page_fault_test +TEST_GEN_PROGS_arm64 += arm64/psci_test +TEST_GEN_PROGS_arm64 += arm64/set_id_regs +TEST_GEN_PROGS_arm64 += arm64/smccc_filter +TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config +TEST_GEN_PROGS_arm64 += arm64/vgic_init +TEST_GEN_PROGS_arm64 += arm64/vgic_irq +TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress +TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access +TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += access_tracking_perf_test +TEST_GEN_PROGS_arm64 += arch_timer +TEST_GEN_PROGS_arm64 += coalesced_io_test +TEST_GEN_PROGS_arm64 += demand_paging_test +TEST_GEN_PROGS_arm64 += dirty_log_test +TEST_GEN_PROGS_arm64 += dirty_log_perf_test +TEST_GEN_PROGS_arm64 += guest_print_test +TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus +TEST_GEN_PROGS_arm64 += kvm_page_table_test +TEST_GEN_PROGS_arm64 += memslot_modification_stress_test +TEST_GEN_PROGS_arm64 += memslot_perf_test +TEST_GEN_PROGS_arm64 += mmu_stress_test +TEST_GEN_PROGS_arm64 += rseq_test +TEST_GEN_PROGS_arm64 += set_memory_region_test +TEST_GEN_PROGS_arm64 += steal_time +TEST_GEN_PROGS_arm64 += kvm_binary_stats_test + +TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 += s390/resets +TEST_GEN_PROGS_s390 += s390/sync_regs_test +TEST_GEN_PROGS_s390 += s390/tprot +TEST_GEN_PROGS_s390 += s390/cmma_test +TEST_GEN_PROGS_s390 += s390/debug_test +TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test +TEST_GEN_PROGS_s390 += s390/shared_zeropage_test +TEST_GEN_PROGS_s390 += s390/ucontrol_test +TEST_GEN_PROGS_s390 += demand_paging_test +TEST_GEN_PROGS_s390 += dirty_log_test +TEST_GEN_PROGS_s390 += guest_print_test +TEST_GEN_PROGS_s390 += kvm_create_max_vcpus +TEST_GEN_PROGS_s390 += kvm_page_table_test +TEST_GEN_PROGS_s390 += rseq_test +TEST_GEN_PROGS_s390 += set_memory_region_test +TEST_GEN_PROGS_s390 += kvm_binary_stats_test + +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += demand_paging_test +TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += guest_print_test +TEST_GEN_PROGS_riscv += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += kvm_create_max_vcpus +TEST_GEN_PROGS_riscv += kvm_page_table_test +TEST_GEN_PROGS_riscv += set_memory_region_test +TEST_GEN_PROGS_riscv += steal_time + +SPLIT_TESTS += arch_timer +SPLIT_TESTS += get-reg-list + +TEST_PROGS += $(TEST_PROGS_$(ARCH)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH)) +LIBKVM += $(LIBKVM_$(ARCH)) + +OVERRIDE_TARGETS = 1 + +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include +CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +ifeq ($(ARCH),s390) + CFLAGS += -march=z10 +endif +ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif + +no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + +LDLIBS += -ldl +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) + +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS)) + +TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) +-include $(TEST_DEP_FILES) + +$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) + +$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ +$(TEST_GEN_PROGS_EXTENDED): %: %.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ +$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ +$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(GEN_HDRS) \ + $(LIBKVM_OBJS) \ + $(SPLIT_TEST_GEN_OBJ) \ + $(TEST_DEP_FILES) \ + $(TEST_GEN_OBJ) \ + cscope.* + +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + +$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) +$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) +$(TEST_GEN_PROGS): $(LIBKVM_OBJS) +$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) +$(TEST_GEN_OBJ): $(GEN_HDRS) + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index 8e5bd07a3727..447d61cae4db 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -97,7 +97,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_wi_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); /* @@ -106,7 +106,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) */ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -126,14 +126,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_invariant_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -144,7 +144,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { uint64_t val, el0; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index eeba1cc87ff8..eeba1cc87ff8 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..a36a7e2db434 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index ff7a949fc96a..c7fb55c9135b 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); /* Check if the current pc is expected. */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); TEST_ASSERT(!test_pc || pc == test_pc, "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); @@ -583,7 +583,7 @@ int main(int argc, char *argv[]) uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d43fb3f49050..d43fb3f49050 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index 9d192ce0078d..ec54ec7726e9 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -173,7 +173,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; /* First 'read' should be an upper limit of the features supported */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); @@ -184,7 +184,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); @@ -214,7 +214,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) * Before starting the VM, the test clears all the bits. * Check if that's still the case. */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c index 8b7a80a51b1c..8b7a80a51b1c 100644 --- a/tools/testing/selftests/kvm/aarch64/mmio_abort.c +++ b/tools/testing/selftests/kvm/arm64/mmio_abort.c diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index 58304bbc2036..ebd70430c89d 100644 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -164,7 +164,7 @@ int main(int argc, char *argv[]) uint64_t pfr0; vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0); + pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), "GICv3 not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..ec33a8f9c908 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index eaa7655fefc1..ab491ee9e5f7 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -111,8 +111,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { uint64_t obs_pc, obs_x0; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, "unexpected target cpu pc: %lx (expected: %lx)", @@ -152,7 +152,7 @@ static void host_test_cpu_on(void) */ vcpu_power_off(target); - vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); enter_guest(source); @@ -244,7 +244,7 @@ static void host_test_system_off2(void) setup_vm(guest_test_system_off2, &source, &target); - vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version); + psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), "Unexpected PSCI version %lu.%lu", diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 3a97c160b5fe..3dd85ce8551c 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -345,7 +345,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t mask = ftr_bits->mask; uint64_t val, new_val, ftr; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_safe_value(ftr_bits, ftr); @@ -355,7 +355,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, val |= ftr; vcpu_set_reg(vcpu, reg, val); - vcpu_get_reg(vcpu, reg, &new_val); + new_val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(new_val, val); return new_val; @@ -369,7 +369,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t val, old_val, ftr; int r; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_invalid_value(ftr_bits, ftr); @@ -383,7 +383,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT(r < 0 && errno == EINVAL, "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(val, old_val); } @@ -470,7 +470,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) } /* Get the id register value */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); /* Try to set MPAM=0. This should always be possible. */ val &= ~ID_AA64PFR0_EL1_MPAM_MASK; @@ -507,7 +507,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) } /* Get the id register value */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); /* Try to set MPAM_frac=0. This should always be possible. */ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; @@ -575,7 +575,7 @@ static void test_clidr(struct kvm_vcpu *vcpu) uint64_t clidr; int level; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr); + clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); /* find the first empty level in the cache hierarchy */ for (level = 1; level < 7; level++) { @@ -600,7 +600,7 @@ static void test_ctr(struct kvm_vcpu *vcpu) { u64 ctr; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr); + ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); ctr &= ~CTR_EL0_DIC_MASK; if (ctr & CTR_EL0_IminLine_MASK) ctr--; @@ -616,7 +616,7 @@ static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) test_clidr(vcpu); test_ctr(vcpu); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); val++; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); @@ -629,7 +629,7 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin size_t idx = encoding_to_range_idx(encoding); uint64_t observed; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed); + observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); TEST_ASSERT_EQ(test_reg_vals[idx], observed); } @@ -664,7 +664,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Check for AARCH64 only system */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 2d189f3da228..2d189f3da228 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c index 80b74c6f152b..80b74c6f152b 100644 --- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c +++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..b3b5fb0ff0a9 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..f4ac28d53747 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index fc4fe52fb6f8..fc4fe52fb6f8 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index f9c0c86d7e85..f16b3b27e32e 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -440,8 +440,7 @@ static void create_vpmu_vm(void *guest_code) "Failed to create vgic-v3, skipping"); /* Make sure that PMUv3 support is indicated in the ID register */ - vcpu_get_reg(vpmu_vm.vcpu, - KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0); + dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, @@ -484,7 +483,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig); + pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); pmcr = pmcr_orig; /* @@ -493,7 +492,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) */ set_pmcr_n(&pmcr, pmcr_n); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); if (expect_fail) TEST_ASSERT(pmcr_orig == pmcr, @@ -521,7 +520,7 @@ static void run_access_test(uint64_t pmcr_n) vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp); + sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); run_vcpu(vcpu, pmcr_n); @@ -572,12 +571,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) * Test if the 'set' and 'clr' variants of the registers * are initialized based on the number of valid counters. */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -589,12 +588,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -625,7 +624,7 @@ static uint64_t get_pmcr_n_limit(void) uint64_t pmcr; create_vpmu_vm(guest_code); - vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); destroy_vpmu_vm(); return get_pmcr_n(pmcr); } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9f24303acb8c..e79817bd0e29 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -21,7 +21,7 @@ #include "ucall_common.h" #ifdef __aarch64__ -#include "aarch64/vgic.h" +#include "arm64/vgic.h" static int gic_fd; diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index bf461de34785..bf461de34785 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h index 329e4f5079ea..329e4f5079ea 100644 --- a/tools/testing/selftests/kvm/include/aarch64/delay.h +++ b/tools/testing/selftests/kvm/include/arm64/delay.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index baeb3c859389..baeb3c859389 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h index a76615fa39a1..a76615fa39a1 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h index 3722ed9c8f96..3722ed9c8f96 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h index e43a57d99b56..e43a57d99b56 100644 --- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..1e8d0d531fbd 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h index cf0984106d14..cf0984106d14 100644 --- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h +++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h index 4ec801f37f00..4ec801f37f00 100644 --- a/tools/testing/selftests/kvm/include/aarch64/ucall.h +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index c481d0c00a5d..c481d0c00a5d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bc7c242480d6..4c4e5a847f67 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -702,16 +702,22 @@ static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t va return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + uint64_t val; + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); + return val; } static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) { struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h index 1bf275631cc6..1bf275631cc6 100644 --- a/tools/testing/selftests/kvm/include/s390x/debug_print.h +++ b/tools/testing/selftests/kvm/include/s390/debug_print.h diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h index b0ed71302722..b0ed71302722 100644 --- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h index 00a1ced6538b..00a1ced6538b 100644 --- a/tools/testing/selftests/kvm/include/s390x/facility.h +++ b/tools/testing/selftests/kvm/include/s390/facility.h diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h index e43a57d99b56..e43a57d99b56 100644 --- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h index 33fef6fd9617..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390/processor.h diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h index 160acd4a1db9..160acd4a1db9 100644 --- a/tools/testing/selftests/kvm/include/s390x/sie.h +++ b/tools/testing/selftests/kvm/include/s390/sie.h diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h index 8035a872a351..8035a872a351 100644 --- a/tools/testing/selftests/kvm/include/s390x/ucall.h +++ b/tools/testing/selftests/kvm/include/s390/ucall.h diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 51990094effd..80fe9f69b38d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/apic.h - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 901caf0e0939..5a74bb30e2f8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/evmcs.h - * * Copyright (C) 2018, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_EVMCS_H diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index 6849e2552f1b..f13e532be240 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/hyperv.h - * * Copyright (C) 2021, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_HYPERV_H diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 972bb1c4ab4c..972bb1c4ab4c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h index 6119321f3f5d..295f2d554754 100644 --- a/tools/testing/selftests/kvm/include/x86_64/mce.h +++ b/tools/testing/selftests/kvm/include/x86/mce.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/mce.h - * * Copyright (C) 2022, Google LLC. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 3c10c4dc0ae8..3c10c4dc0ae8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 645200e95f89..d60da8966772 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/processor.h - * * Copyright (C) 2018, Google LLC. */ @@ -29,6 +27,8 @@ extern uint64_t guest_tsc_khz; #define MAX_NR_CPUID_ENTRIES 100 #endif +#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull + /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -571,6 +571,11 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline void set_idt(const struct desc_ptr *idt_desc) +{ + __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); +} + static inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -1012,10 +1017,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) { + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + + vcpu_get_cpuid(vcpu); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } @@ -1036,7 +1050,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) return r; /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); return 0; } @@ -1046,12 +1060,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); -} - -static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) -{ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); } void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 82c11c81a956..82c11c81a956 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h index 4803e1056055..29cffd0a9181 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86/svm.h @@ -1,10 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/svm.h - * This is a copy of arch/x86/include/asm/svm.h - * - */ - #ifndef SELFTEST_KVM_SVM_H #define SELFTEST_KVM_SVM_H diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index 044f0f872ba9..b74c6dcddcbd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -1,8 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/svm_utils.h - * Header for nested SVM testing - * * Copyright (C) 2020, Red Hat, Inc. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h index d3825dcc3cd9..d3825dcc3cd9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/ucall.h +++ b/tools/testing/selftests/kvm/include/x86/ucall.h diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 5f0c0a29c556..edb3c391b982 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/vmx.h - * * Copyright (C) 2018, Google LLC. */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 7abbf8866512..7abbf8866512 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index d24e9ecc96c6..d24e9ecc96c6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 66d05506f78b..66d05506f78b 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..09f270545646 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S index 0e443eadfac6..0e443eadfac6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S +++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 698e34f39241..7ba3aa3755f3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -281,8 +281,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { @@ -360,8 +360,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { uint64_t pstate, pc; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c index a076e780be5d..a076e780be5d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c +++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index ddab0ce89d4d..ddab0ce89d4d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..4427f43f73ea 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 480e3a40d197..33fefeb3ca44 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1648,7 +1648,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu) rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vcpu); + if (!rc) + assert_on_unhandled_exception(vcpu); return rc; } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 6ae47b3d6b25..dd663bcf0cc0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -221,39 +221,39 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_riscv_core core; - vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); + core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); + core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); + core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); + core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); + core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); + core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); + core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); + core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); + core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); + core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); + core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); + core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); + core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); + core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); + core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); + core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); + core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); + core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); + core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); + core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); + core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); + core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); + core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); + core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); + core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); + core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); + core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); + core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); + core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); + core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); + core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); + core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); fprintf(stream, " MODE: 0x%lx\n", core.mode); diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..2c432fa164f1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c index d540812d911a..d540812d911a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/facility.c +++ b/tools/testing/selftests/kvm/lib/s390/facility.c diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..20cfe970e3e3 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c index cca98734653d..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390/ucall.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 89153a333e83..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S index 7629819734af..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86/handlers.S diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index 15bc8cd583aa..15bc8cd583aa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index d61e623afc8c..7f5d62a65c68 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to memstress.c. + * x86-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c index f31f0427c17c..f31f0427c17c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 636b29ba8985..bd5a802fa7a5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2018, Google LLC. */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index e9535ee20b7f..e9535ee20b7f 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 5495a92dfd5a..d239c2097391 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/svm.c * Helpers used for nested SVM testing * Largely inspired from KVM unit test svm.c * diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1265cecc7dd1..1265cecc7dd1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index d7ac122820bf..d4d1208dd023 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/vmx.c - * * Copyright (C) 2018, Google LLC. */ diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 0b9678858b6d..d9c76b4c0d88 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -15,16 +15,62 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +static bool mprotect_ro_done; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; + int i; - for (;;) { + for (i = 0; i < 2; i++) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; - GUEST_SYNC(0); + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(i); } + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa); + GUEST_SYNC(2); + + /* + * Write to the region while mprotect(PROT_READ) is underway. Keep + * looping until the memory is guaranteed to be read-only, otherwise + * vCPUs may complete their writes and advance to the next stage + * prematurely. + * + * For architectures that support skipping the faulting instruction, + * generate the store via inline assembly to ensure the exact length + * of the instruction is known and stable (vcpu_arch_put_guest() on + * fixed-length architectures should work, but the cost of paranoia + * is low in this case). For x86, hand-code the exact opcode so that + * there is no room for variability in the generated instruction. + */ + do { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) +#ifdef __x86_64__ + asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ +#elif defined(__aarch64__) + asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); +#else + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); +#endif + } while (!READ_ONCE(mprotect_ro_done)); + + /* + * Only architectures that write the entire range can explicitly sync, + * as other architectures will be stuck on the write fault. + */ +#if defined(__x86_64__) || defined(__aarch64__) + GUEST_SYNC(3); +#endif + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(4); + + GUEST_ASSERT(0); } struct vcpu_info { @@ -51,34 +97,100 @@ static void rendezvous_with_boss(void) } } -static void run_vcpu(struct kvm_vcpu *vcpu) +static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], stage); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + assert_sync_stage(vcpu, stage); } static void *vcpu_worker(void *data) { + struct kvm_sregs __maybe_unused sregs; struct vcpu_info *info = data; struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; + int r; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 0, write all of guest memory. */ + run_vcpu(vcpu, 0); rendezvous_with_boss(); - vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ + vcpu_sregs_get(vcpu, &sregs); /* Toggle CR0.WP to trigger a MMU context reset. */ sregs.cr0 ^= X86_CR0_WP; -#endif vcpu_sregs_set(vcpu, &sregs); +#endif rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 1, re-write all of guest memory. */ + run_vcpu(vcpu, 1); + rendezvous_with_boss(); + + /* Stage 2, read all of guest memory, which is now read-only. */ + run_vcpu(vcpu, 2); + + /* + * Stage 3, write guest memory and verify KVM returns -EFAULT for once + * the mprotect(PROT_READ) lands. Only architectures that support + * validating *all* of guest memory sync for this stage, as vCPUs will + * be stuck on the faulting instruction for other architectures. Go to + * stage 3 without a rendezvous + */ + do { + r = _vcpu_run(vcpu); + } while (!r); + TEST_ASSERT(r == -1 && errno == EFAULT, + "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + +#if defined(__x86_64__) || defined(__aarch64__) + /* + * Verify *all* writes from the guest hit EFAULT due to the VMA now + * being read-only. x86 and arm64 only at this time as skipping the + * instruction that hits the EFAULT requires advancing the program + * counter, which is arch specific and relies on inline assembly. + */ +#ifdef __x86_64__ + vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS; +#endif + for (;;) { + r = _vcpu_run(vcpu); + if (!r) + break; + TEST_ASSERT_EQ(errno, EFAULT); +#if defined(__x86_64__) + WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS); + vcpu->run->s.regs.regs.rip += 3; +#elif defined(__aarch64__) + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4); +#endif + + } + assert_sync_stage(vcpu, 3); +#endif /* __x86_64__ || __aarch64__ */ + rendezvous_with_boss(); + + /* + * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to + * make the memory writable again. + */ + do { + r = _vcpu_run(vcpu); + } while (r && errno == EFAULT); + TEST_ASSERT_EQ(r, 0); + assert_sync_stage(vcpu, 4); rendezvous_with_boss(); return NULL; @@ -161,7 +273,7 @@ int main(int argc, char *argv[]) const uint64_t start_gpa = SZ_4G; const int first_slot = 1; - struct timespec time_start, time_run1, time_reset, time_run2; + struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; @@ -209,7 +321,13 @@ int main(int argc, char *argv[]) vcpus = malloc(nr_vcpus * sizeof(*vcpus)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, +#ifdef __x86_64__ + max_mem / SZ_1G, +#else + max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size, +#endif + guest_code, vcpus); max_gpa = vm->max_gfn << vm->page_shift; TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); @@ -259,14 +377,28 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_reset, "reset"); rendezvous_with_vcpus(&time_run2, "run 2"); + mprotect(mem, slot_size, PROT_READ); + usleep(10); + mprotect_ro_done = true; + sync_global_to_guest(vm, mprotect_ro_done); + + rendezvous_with_vcpus(&time_ro, "mprotect RO"); + mprotect(mem, slot_size, PROT_READ | PROT_WRITE); + rendezvous_with_vcpus(&time_rw, "mprotect RW"); + + time_rw = timespec_sub(time_rw, time_ro); + time_ro = timespec_sub(time_ro, time_run2); time_run2 = timespec_sub(time_run2, time_reset); - time_reset = timespec_sub(time_reset, time_run1); + time_reset = timespec_sub(time_reset, time_run1); time_run1 = timespec_sub(time_run1, time_start); - pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, " + "ro = %ld.%.9lds, rw = %ld.%.9lds\n", time_run1.tv_sec, time_run1.tv_nsec, time_reset.tv_sec, time_reset.tv_nsec, - time_run2.tv_sec, time_run2.tv_nsec); + time_run2.tv_sec, time_run2.tv_nsec, + time_ro.tv_sec, time_ro.tv_nsec, + time_rw.tv_sec, time_rw.tv_nsec); /* * Delete even numbered slots (arbitrary) and unmap the first half of diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 2c792228ac0b..9e370800a6a2 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -93,7 +93,7 @@ struct kvm_vm *test_vm_create(void) vcpu_init_vector_tables(vcpus[i]); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); pr_debug("timer_freq: %lu\n", timer_freq); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 0e0712854953..cfed6c727bfc 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -60,7 +60,7 @@ int main(void) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); /* skip sw_bp_1 */ diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 4bc1051848e5..8515921dfdbf 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -52,6 +52,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: @@ -71,6 +73,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: @@ -112,6 +115,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: @@ -429,6 +433,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -448,6 +454,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -535,10 +542,11 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), - KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), }; if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) @@ -949,6 +957,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE); KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); +KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); @@ -964,6 +973,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); +KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); @@ -983,6 +994,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); +KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); @@ -1017,6 +1029,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_sta, &config_sbi_pmu, &config_sbi_dbcn, + &config_sbi_susp, &config_aia, &config_fp_f, &config_fp_d, @@ -1031,6 +1044,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svinval, &config_svnapot, &config_svpbmt, + &config_svvptc, + &config_zabha, &config_zacas, &config_zawrs, &config_zba, @@ -1050,6 +1065,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zfhmin, &config_zicbom, &config_zicboz, + &config_ziccrse, &config_zicntr, &config_zicond, &config_zicsr, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index f299cbfd23ca..f45c0ecc902d 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -608,7 +608,7 @@ static void test_vm_events_overflow(void *guest_code) vcpu_init_vector_tables(vcpu); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); run_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index e32dd59703a0..e32dd59703a0 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390/config index 23270f2d679f..23270f2d679f 100644 --- a/tools/testing/selftests/kvm/s390x/config +++ b/tools/testing/selftests/kvm/s390/config diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c index 27255880dabd..27255880dabd 100644 --- a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c index ad8095968601..ad8095968601 100644 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ b/tools/testing/selftests/kvm/s390/debug_test.c diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 4374b4cd2a80..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c index 357943f2bea8..b58f75b381e5 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) { uint64_t eval_reg; - vcpu_get_reg(vcpu, id, &eval_reg); + eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index bba0d9a6dcc8..bba0d9a6dcc8 100644 --- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c index 53def355ccba..53def355ccba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 12d5e1cb62e3..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 135ee22856cf..135ee22856cf 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a8267628e9ed..bc440d5aba57 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -17,9 +17,9 @@ #include <processor.h> /* - * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a - * 2MB sized and aligned region so that the initial region corresponds to - * exactly one large page. + * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB + * sized and aligned region so that the initial region corresponds to exactly + * one large page. */ #define MEM_REGION_SIZE 0x200000 @@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void) * in the guest will never succeed, and so isn't an option. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); GUEST_SYNC(0); @@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void) close(memfd); kvm_vm_free(vm); } + +static void guest_code_mmio_during_vectoring(void) +{ + const struct desc_ptr idt_desc = { + .address = MEM_REGION_GPA, + .size = 0xFFF, + }; + + set_idt(&idt_desc); + + /* Generate a #GP by dereferencing a non-canonical address */ + *((uint8_t *)NONCANONICAL) = 0x1; + + GUEST_ASSERT(0); +} + +/* + * This test points the IDT descriptor base to an MMIO address. It should cause + * a KVM internal error when an event occurs in the guest. + */ +static void test_mmio_during_vectoring(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + u64 expected_gpa; + + pr_info("Testing MMIO during vectoring error handling\n"); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1); + + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV, + "Unexpected suberror = %d", vcpu->run->internal.suberror); + TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d", + run->internal.ndata); + + /* The reported GPA should be IDT base + offset of the GP vector */ + expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry); + + TEST_ASSERT(run->internal.data[3] == expected_gpa, + "Unexpected GPA = %llx (expected %lx)", + vcpu->run->internal.data[3], expected_gpa); + + kvm_vm_free(vm); +} #endif int main(int argc, char *argv[]) @@ -568,6 +618,7 @@ int main(int argc, char *argv[]) * KVM_RUN fails with ENOEXEC or EFAULT. */ test_zero_memory_regions(); + test_mmio_during_vectoring(); #endif test_invalid_memory_region_flags(); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index a8d3afa0b86b..cce2520af720 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -269,9 +269,8 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); - unsigned long enabled; + unsigned long enabled = vcpu_get_reg(vcpu, id); - vcpu_get_reg(vcpu, id, &enabled); TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); return enabled; diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index f4ce5a185a7d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index f8916bb34405..f8916bb34405 100644 --- a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index 7b3fda6842bc..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c index 28cc66454601..28cc66454601 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index 2d814c1d1dc4..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..2929c067c207 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c index 81055476d394..81055476d394 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c index a72f13ae2edb..a72f13ae2edb 100644 --- a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index 762628f7d4ba..762628f7d4ba 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index 37b1a9f52864..37b1a9f52864 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c index 10b1b0ba374e..10b1b0ba374e 100644 --- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index e058bc676cd6..e058bc676cd6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4f5881d4ef66..4f5881d4ef66 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 74cf19661309..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 949e08e98f31..949e08e98f31 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 068e9c69710d..068e9c69710d 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 22c0c124582f..22c0c124582f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index 0ddb63229bcb..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 077cd0ec3040..077cd0ec3040 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5bc12222d87a..5bc12222d87a 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 78878b3a2725..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -139,10 +139,12 @@ static void test_pv_unhalt(void) struct kvm_vm *vm; struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; + int r; - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); /* KVM_PV_UNHALT test */ vm = vm_create_with_one_vcpu(&vcpu, guest_main); @@ -151,19 +153,45 @@ static void test_pv_unhalt(void) TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); kvm_sig_old = ent->ebx; ent->ebx = 0xdeadbeef; vcpu_set_cpuid(vcpu); - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); ent->ebx = kvm_sig_old; vcpu_set_cpuid(vcpu); TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c index 7e2bfb3c3f3b..7e2bfb3c3f3b 100644 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..2b550eff35f1 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa39..3eb0313ffa39 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..e7efb2b35f8b 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh index caad084b8bfd..caad084b8bfd 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c index 9cbf283ebc55..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 698cb36989db..698cb36989db 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index c15513cd74d1..c15513cd74d1 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 82a8d88b5338..82a8d88b5338 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c index 13e72fcec8dd..13e72fcec8dd 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c index cbc92a862ea9..cbc92a862ea9 100644 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c index 49913784bc82..49913784bc82 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index c021c0795a96..f4095a3d1278 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -41,13 +41,15 @@ do { \ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ } while (0) +#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + static uint64_t calc_supported_cr4_feature_bits(void) { - uint64_t cr4; + uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | - X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | - X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; if (kvm_cpu_has(X86_FEATURE_LA57)) @@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -int main(int argc, char *argv[]) +static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) { struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t cr4; int rc, i; - /* - * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and - * use it to verify all supported CR4 bits can be set prior to defining - * the vCPU model, i.e. without doing KVM_SET_CPUID2. - */ - vm = vm_create_barebones(); - vcpu = __vm_vcpu_add(vm, 0); - vcpu_sregs_get(vcpu, &sregs); - - sregs.cr0 = 0; - sregs.cr4 |= calc_supported_cr4_feature_bits(); - cr4 = sregs.cr4; - + sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + sregs.cr4 |= cr4; rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)), + "KVM didn't %s OSXSAVE in CPUID as expected", + (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear"); + + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)), + "KVM didn't %s OSPKE in CPUID as expected", + (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear"); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); - /* Verify all unsupported features are rejected by KVM. */ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); @@ -119,10 +116,28 @@ int main(int argc, char *argv[]) /* NW without CD is illegal, as is PG without PE. */ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); +} + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify KVM enforces guest CPUID even if *userspace* never + * sets CPUID. + */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4); kvm_vm_free(vm); - /* Create a "real" VM and verify APIC_BASE can be set. */ + /* Create a "real" VM with a fully populated guest CPUID and verify + * APIC_BASE and all supported CR4 can be set. + */ vm = vm_create_with_one_vcpu(&vcpu, NULL); vcpu_sregs_get(vcpu, &sregs); @@ -135,6 +150,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", sregs.apic_base); + test_cr_bits(vcpu, calc_supported_cr4_feature_bits()); + kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index 3fb967f40c6a..3fb967f40c6a 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c index 0a6dfba3905b..0a6dfba3905b 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index ae77698e6e97..a1a688e75266 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -155,7 +155,7 @@ static void guest_shutdown_code(void) /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); __asm__ __volatile__("ud2"); } diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index fabeeaddfb3a..fabeeaddfb3a 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 55c88d664a94..55c88d664a94 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 141b7fc0c965..141b7fc0c965 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 916e04248fbb..916e04248fbb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c index 00135cbba35e..00135cbba35e 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 7b6481d6c0d3..7b6481d6c0d3 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c index 8a62cca28cfb..8a62cca28cfb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c index 8fa3948b0170..8fa3948b0170 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c index 56306a19144a..56306a19144a 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c index 12b0964f4f13..12b0964f4f13 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c index 59c7304f805e..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 57f157c06b39..57f157c06b39 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 9481cbcf284f..9481cbcf284f 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..32b2794b78fe 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index a81a24761aac..a81a24761aac 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c index dad988351493..dad988351493 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index fa512d033205..fa512d033205 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..3fd6eceab46f 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c index a100ee5f0009..a100ee5f0009 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index 90720b6205f4..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c index 1759fa5cb3f2..1759fa5cb3f2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index a1f5ff45d518..a1f5ff45d518 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c index 00dd2ac07a61..00dd2ac07a61 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c index 67a62a5a8895..67a62a5a8895 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c index 2ceb5c78c442..2ceb5c78c442 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index a76078a08ff8..a76078a08ff8 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 88bcca188799..88bcca188799 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index c8a5c5e51661..c8a5c5e51661 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index a59b3c799bb2..a59b3c799bb2 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c index 2585087cdf5c..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c index f331a4e9bae3..f331a4e9bae3 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 47a9f68f7b24..b2aa6bf24d3a 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -118,6 +118,8 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff, .slot = slot, .may_block = true, + /* guest memfd is relevant to only private mappings. */ + .attr_filter = KVM_FILTER_PRIVATE, }; if (!found_memslot) { @@ -259,15 +261,19 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) * dereferencing the slot for existing bindings needs to be protected * against memslot updates, specifically so that unbind doesn't race * and free the memslot (kvm_gmem_get_file() will return NULL). + * + * Since .release is called only when the reference count is zero, + * after which file_ref_get() and get_file_active() fail, + * kvm_gmem_get_pfn() cannot be using the file concurrently. + * file_ref_put() provides a full barrier, and get_file_active() the + * matching acquire barrier. */ mutex_lock(&kvm->slots_lock); filemap_invalidate_lock(inode->i_mapping); xa_for_each(&gmem->bindings, index, slot) - rcu_assign_pointer(slot->gmem.file, NULL); - - synchronize_rcu(); + WRITE_ONCE(slot->gmem.file, NULL); /* * All in-flight operations are gone and new bindings can be created. @@ -296,8 +302,7 @@ static inline struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot) /* * Do not return slot->gmem.file if it has already been closed; * there might be some time between the last fput() and when - * kvm_gmem_release() clears slot->gmem.file, and you do not - * want to spin in the meanwhile. + * kvm_gmem_release() clears slot->gmem.file. */ return get_file_active(&slot->gmem.file); } @@ -508,11 +513,11 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, } /* - * No synchronize_rcu() needed, any in-flight readers are guaranteed to - * be see either a NULL file or this new file, no need for them to go - * away. + * memslots of flag KVM_MEM_GUEST_MEMFD are immutable to change, so + * kvm_gmem_bind() must occur on a new memslot. Because the memslot + * is not visible yet, kvm_gmem_get_pfn() is guaranteed to see the file. */ - rcu_assign_pointer(slot->gmem.file, file); + WRITE_ONCE(slot->gmem.file, file); slot->gmem.pgoff = start; xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL); @@ -548,8 +553,12 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) filemap_invalidate_lock(file->f_mapping); xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL); - rcu_assign_pointer(slot->gmem.file, NULL); - synchronize_rcu(); + + /* + * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn() + * cannot see this memslot. + */ + WRITE_ONCE(slot->gmem.file, NULL); filemap_invalidate_unlock(file->f_mapping); fput(file); @@ -561,11 +570,12 @@ static struct folio *__kvm_gmem_get_pfn(struct file *file, pgoff_t index, kvm_pfn_t *pfn, bool *is_prepared, int *max_order) { + struct file *gmem_file = READ_ONCE(slot->gmem.file); struct kvm_gmem *gmem = file->private_data; struct folio *folio; - if (file != slot->gmem.file) { - WARN_ON_ONCE(slot->gmem.file); + if (file != gmem_file) { + WARN_ON_ONCE(gmem_file); return ERR_PTR(-EFAULT); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de2c11dae231..faf10671eed2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -594,6 +594,11 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, */ gfn_range.arg = range->arg; gfn_range.may_block = range->may_block; + /* + * HVA-based notifications aren't relevant to private + * mappings as they don't have a userspace mapping. + */ + gfn_range.attr_filter = KVM_FILTER_SHARED; /* * {gfn(page) | page intersects with [hva_start, hva_end)} = @@ -1926,16 +1931,8 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id, return false; } -/* - * Allocate some memory and give it an address in the guest physical address - * space. - * - * Discontiguous memory is allowed, mostly for framebuffers. - * - * Must be called holding kvm->slots_lock for write. - */ -int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem) +static int kvm_set_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem) { struct kvm_memory_slot *old, *new; struct kvm_memslots *slots; @@ -1945,6 +1942,8 @@ int __kvm_set_memory_region(struct kvm *kvm, int as_id, id; int r; + lockdep_assert_held(&kvm->slots_lock); + r = check_memory_region_flags(kvm, mem); if (r) return r; @@ -2056,19 +2055,19 @@ out: kfree(new); return r; } -EXPORT_SYMBOL_GPL(__kvm_set_memory_region); -int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem) +int kvm_set_internal_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem) { - int r; + if (WARN_ON_ONCE(mem->slot < KVM_USER_MEM_SLOTS)) + return -EINVAL; - mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem); - mutex_unlock(&kvm->slots_lock); - return r; + if (WARN_ON_ONCE(mem->flags)) + return -EINVAL; + + return kvm_set_memory_region(kvm, mem); } -EXPORT_SYMBOL_GPL(kvm_set_memory_region); +EXPORT_SYMBOL_GPL(kvm_set_internal_memslot); static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region2 *mem) @@ -2076,6 +2075,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; + guard(mutex)(&kvm->slots_lock); return kvm_set_memory_region(kvm, mem); } @@ -2408,6 +2408,14 @@ static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, gfn_range.arg = range->arg; gfn_range.may_block = range->may_block; + /* + * If/when KVM supports more attributes beyond private .vs shared, this + * _could_ set KVM_FILTER_{SHARED,PRIVATE} appropriately if the entire target + * range already has the desired private vs. shared state (it's unclear + * if that is a net win). For now, KVM reaches this point if and only + * if the private flag is being toggled, i.e. all mappings are in play. + */ + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { slots = __kvm_memslots(kvm, i); @@ -2464,6 +2472,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, struct kvm_mmu_notifier_range pre_set_range = { .start = start, .end = end, + .arg.attributes = attributes, .handler = kvm_pre_set_memory_attributes, .on_lock = kvm_mmu_invalidate_begin, .flush_on_ret = true, @@ -4116,32 +4125,30 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) mutex_lock(&kvm->lock); -#ifdef CONFIG_LOCKDEP - /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ - mutex_lock(&vcpu->mutex); - mutex_unlock(&vcpu->mutex); -#endif - if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; } vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); - r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT); + r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r == -EBUSY); if (r) goto unlock_vcpu_destroy; - /* Now it's all set up, let userspace reach it */ + /* + * Now it's all set up, let userspace reach it. Grab the vCPU's mutex + * so that userspace can't invoke vCPU ioctl()s until the vCPU is fully + * visible (per online_vcpus), e.g. so that KVM doesn't get tricked + * into a NULL-pointer dereference because KVM thinks the _current_ + * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep + * knows it's taken *inside* kvm->lock. + */ + mutex_lock(&vcpu->mutex); kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); if (r < 0) - goto kvm_put_xa_release; - - if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { - r = -EINVAL; - goto kvm_put_xa_release; - } + goto kvm_put_xa_erase; /* * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu @@ -4149,15 +4156,17 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) */ smp_wmb(); atomic_inc(&kvm->online_vcpus); + mutex_unlock(&vcpu->mutex); mutex_unlock(&kvm->lock); kvm_arch_vcpu_postcreate(vcpu); kvm_create_vcpu_debugfs(vcpu); return r; -kvm_put_xa_release: +kvm_put_xa_erase: + mutex_unlock(&vcpu->mutex); kvm_put_kvm_no_destroy(kvm); - xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); + xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: mutex_unlock(&kvm->lock); kvm_dirty_ring_free(&vcpu->dirty_ring); @@ -4282,6 +4291,33 @@ static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, } #endif +static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + /* + * In practice, this happy path will always be taken, as a well-behaved + * VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns. + */ + if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus))) + return 0; + + /* + * Acquire and release the vCPU's mutex to wait for vCPU creation to + * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU + * is fully online). + */ + if (mutex_lock_killable(&vcpu->mutex)) + return -EINTR; + + mutex_unlock(&vcpu->mutex); + + if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx))) + return -EIO; + + return 0; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4298,6 +4334,15 @@ static long kvm_vcpu_ioctl(struct file *filp, return -EINVAL; /* + * Wait for the vCPU to be online before handling the ioctl(), as KVM + * assumes the vCPU is reachable via vcpu_array, i.e. may dereference + * a NULL pointer if userspace invokes an ioctl() before KVM is ready. + */ + r = kvm_wait_for_vcpu_online(vcpu); + if (r) + return r; + + /* * Some architectures have vcpu ioctls that are asynchronous to vcpu * execution; mutex_lock() would break them. */ |