summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/netdevices.rst10
-rw-r--r--Documentation/virt/kvm/api.rst723
-rw-r--r--MAINTAINERS2
-rw-r--r--Makefile7
-rw-r--r--arch/arm/configs/at91_dt_defconfig1
-rw-r--r--arch/arm/configs/collie_defconfig1
-rw-r--r--arch/arm/configs/davinci_all_defconfig1
-rw-r--r--arch/arm/configs/dove_defconfig1
-rw-r--r--arch/arm/configs/exynos_defconfig1
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/configs/lpc18xx_defconfig1
-rw-r--r--arch/arm/configs/lpc32xx_defconfig1
-rw-r--r--arch/arm/configs/milbeaut_m10v_defconfig2
-rw-r--r--arch/arm/configs/mmp2_defconfig1
-rw-r--r--arch/arm/configs/multi_v4t_defconfig1
-rw-r--r--arch/arm/configs/multi_v5_defconfig1
-rw-r--r--arch/arm/configs/mvebu_v5_defconfig1
-rw-r--r--arch/arm/configs/mxs_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig3
-rw-r--r--arch/arm/configs/orion5x_defconfig1
-rw-r--r--arch/arm/configs/pxa168_defconfig1
-rw-r--r--arch/arm/configs/pxa910_defconfig1
-rw-r--r--arch/arm/configs/pxa_defconfig2
-rw-r--r--arch/arm/configs/s5pv210_defconfig1
-rw-r--r--arch/arm/configs/sama7_defconfig2
-rw-r--r--arch/arm/configs/spitz_defconfig1
-rw-r--r--arch/arm/configs/stm32_defconfig1
-rw-r--r--arch/arm/configs/wpcm450_defconfig2
-rw-r--r--arch/arm64/include/asm/esr.h44
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h7
-rw-r--r--arch/arm64/include/asm/kvm_ras.h2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h70
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c9
-rw-r--r--arch/arm64/kvm/mmu.c31
-rw-r--r--arch/arm64/tools/sysreg7
-rw-r--r--arch/hexagon/configs/comet_defconfig3
-rw-r--r--arch/m68k/configs/amcore_defconfig1
-rw-r--r--arch/mips/configs/ath79_defconfig1
-rw-r--r--arch/mips/configs/bigsur_defconfig1
-rw-r--r--arch/mips/configs/fuloong2e_defconfig1
-rw-r--r--arch/mips/configs/ip22_defconfig1
-rw-r--r--arch/mips/configs/ip27_defconfig1
-rw-r--r--arch/mips/configs/ip30_defconfig1
-rw-r--r--arch/mips/configs/ip32_defconfig1
-rw-r--r--arch/mips/configs/omega2p_defconfig1
-rw-r--r--arch/mips/configs/rb532_defconfig1
-rw-r--r--arch/mips/configs/rt305x_defconfig1
-rw-r--r--arch/mips/configs/sb1250_swarm_defconfig1
-rw-r--r--arch/mips/configs/vocore2_defconfig1
-rw-r--r--arch/mips/configs/xway_defconfig1
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig2
-rw-r--r--arch/parisc/configs/generic-64bit_defconfig1
-rw-r--r--arch/powerpc/configs/44x/sam440ep_defconfig1
-rw-r--r--arch/powerpc/configs/44x/warp_defconfig2
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itx_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/ge_imp3a_defconfig2
-rw-r--r--arch/powerpc/configs/85xx/stx_gp3_defconfig2
-rw-r--r--arch/powerpc/configs/85xx/xes_mpc85xx_defconfig1
-rw-r--r--arch/powerpc/configs/86xx-hw.config1
-rw-r--r--arch/powerpc/configs/amigaone_defconfig1
-rw-r--r--arch/powerpc/configs/chrp32_defconfig1
-rw-r--r--arch/powerpc/configs/fsl-emb-nonhw.config1
-rw-r--r--arch/powerpc/configs/g5_defconfig1
-rw-r--r--arch/powerpc/configs/gamecube_defconfig1
-rw-r--r--arch/powerpc/configs/linkstation_defconfig2
-rw-r--r--arch/powerpc/configs/mpc83xx_defconfig1
-rw-r--r--arch/powerpc/configs/mpc866_ads_defconfig1
-rw-r--r--arch/powerpc/configs/mvme5100_defconfig2
-rw-r--r--arch/powerpc/configs/pasemi_defconfig1
-rw-r--r--arch/powerpc/configs/pmac32_defconfig1
-rw-r--r--arch/powerpc/configs/ppc44x_defconfig1
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig1
-rw-r--r--arch/powerpc/configs/ps3_defconfig2
-rw-r--r--arch/powerpc/configs/skiroot_defconfig2
-rw-r--r--arch/powerpc/configs/storcenter_defconfig1
-rw-r--r--arch/powerpc/configs/wii_defconfig1
-rw-r--r--arch/s390/kvm/intercept.c2
-rw-r--r--arch/s390/kvm/interrupt.c8
-rw-r--r--arch/s390/kvm/kvm-s390.c10
-rw-r--r--arch/s390/kvm/trace-s390.h4
-rw-r--r--arch/sh/configs/ap325rxa_defconfig1
-rw-r--r--arch/sh/configs/ecovec24_defconfig1
-rw-r--r--arch/sh/configs/edosk7705_defconfig1
-rw-r--r--arch/sh/configs/espt_defconfig1
-rw-r--r--arch/sh/configs/hp6xx_defconfig2
-rw-r--r--arch/sh/configs/kfr2r09-romimage_defconfig1
-rw-r--r--arch/sh/configs/landisk_defconfig1
-rw-r--r--arch/sh/configs/lboxre2_defconfig1
-rw-r--r--arch/sh/configs/magicpanelr2_defconfig2
-rw-r--r--arch/sh/configs/migor_defconfig1
-rw-r--r--arch/sh/configs/r7780mp_defconfig1
-rw-r--r--arch/sh/configs/r7785rp_defconfig1
-rw-r--r--arch/sh/configs/rts7751r2d1_defconfig1
-rw-r--r--arch/sh/configs/rts7751r2dplus_defconfig1
-rw-r--r--arch/sh/configs/sdk7780_defconfig1
-rw-r--r--arch/sh/configs/se7206_defconfig3
-rw-r--r--arch/sh/configs/se7712_defconfig1
-rw-r--r--arch/sh/configs/se7721_defconfig1
-rw-r--r--arch/sh/configs/se7724_defconfig1
-rw-r--r--arch/sh/configs/sh03_defconfig1
-rw-r--r--arch/sh/configs/sh2007_defconfig2
-rw-r--r--arch/sh/configs/sh7724_generic_defconfig1
-rw-r--r--arch/sh/configs/sh7763rdp_defconfig1
-rw-r--r--arch/sh/configs/sh7770_generic_defconfig1
-rw-r--r--arch/sh/configs/titan_defconfig1
-rw-r--r--arch/sparc/configs/sparc64_defconfig1
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/smap.h12
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c8
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c37
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/xen/enlighten.c10
-rw-r--r--arch/x86/xen/enlighten_pvh.c19
-rw-r--r--arch/x86/xen/setup.c3
-rw-r--r--arch/x86/xen/xen-asm.S4
-rw-r--r--drivers/block/Kconfig2
-rw-r--r--drivers/block/drbd/Kconfig2
-rw-r--r--drivers/block/ublk_drv.c85
-rw-r--r--drivers/firmware/smccc/kvm_guest.c4
-rw-r--r--drivers/gpio/TODO34
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c4
-rw-r--r--drivers/gpio/gpio-tegra186.c27
-rw-r--r--drivers/gpio/gpio-zynq.c1
-rw-r--r--drivers/gpio/gpiolib-devres.c6
-rw-r--r--drivers/gpio/gpiolib-of.c8
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/persistent-data/Kconfig2
-rw-r--r--drivers/mtd/inftlcore.c9
-rw-r--r--drivers/mtd/nand/Makefile3
-rw-r--r--drivers/mtd/nand/raw/r852.c3
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig4
-rw-r--r--drivers/net/ethernet/cavium/Kconfig2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c5
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c6
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h3
-rw-r--r--drivers/net/phy/phy_device.c57
-rw-r--r--drivers/net/ppp/ppp_synctty.c5
-rw-r--r--drivers/xen/Kconfig2
-rw-r--r--drivers/xen/balloon.c34
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c1
-rw-r--r--fs/bcachefs/Kconfig2
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/erofs/Kconfig2
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/xfs/Kconfig2
-rw-r--r--include/kunit/test.h2
-rw-r--r--include/linux/cgroup-defs.h1
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/gpio/consumer.h1
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/mtd/spinand.h2
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/rtnetlink.h2
-rw-r--r--include/net/sctp/structs.h3
-rw-r--r--include/net/sock.h40
-rw-r--r--io_uring/kbuf.c2
-rw-r--r--io_uring/rsrc.c17
-rw-r--r--io_uring/zcrx.c19
-rw-r--r--io_uring/zcrx.h5
-rw-r--r--kernel/cgroup/cgroup.c6
-rw-r--r--kernel/cgroup/cpuset-internal.h1
-rw-r--r--kernel/cgroup/cpuset.c401
-rw-r--r--kernel/cgroup/rstat.c3
-rw-r--r--kernel/trace/fprobe.c170
-rw-r--r--kernel/trace/trace_fprobe.c26
-rw-r--r--lib/Kconfig57
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/ceph/Kconfig2
-rw-r--r--net/core/dev.c11
-rw-r--r--net/core/dev_api.c16
-rw-r--r--net/core/link_watch.c28
-rw-r--r--net/core/lock_debug.c2
-rw-r--r--net/core/rtnetlink.c17
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ethtool/cmis.h1
-rw-r--r--net/ethtool/cmis_cdb.c18
-rw-r--r--net/ethtool/common.c1
-rw-r--r--net/ethtool/ioctl.c2
-rw-r--r--net/ethtool/netlink.c8
-rw-r--r--net/hsr/hsr_device.c6
-rw-r--r--net/ipv6/addrconf.c9
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/mptcp/subflow.c8
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/ipvs/Kconfig2
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c3
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/cls_api.c66
-rw-r--r--net/sched/sch_codel.c5
-rw-r--r--net/sched/sch_drr.c7
-rw-r--r--net/sched/sch_ets.c8
-rw-r--r--net/sched/sch_fq_codel.c6
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_qfq.c7
-rw-r--r--net/sched/sch_sfq.c66
-rw-r--r--net/sctp/Kconfig2
-rw-r--r--net/sctp/socket.c22
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tls/tls_main.c6
-rw-r--r--sound/soc/sof/intel/Kconfig1
-rw-r--r--tools/include/uapi/linux/types.h3
-rw-r--r--tools/objtool/arch/x86/decode.c18
-rw-r--r--tools/objtool/arch/x86/special.c2
-rw-r--r--tools/objtool/check.c59
-rw-r--r--tools/objtool/include/objtool/arch.h3
-rw-r--r--tools/power/x86/turbostat/turbostat.816
-rw-r--r--tools/power/x86/turbostat/turbostat.c162
-rw-r--r--tools/testing/kunit/kunit_parser.py4
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py4
-rw-r--r--tools/testing/selftests/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/config.x86_641
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh617
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py33
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c2
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm45
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c2
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h67
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c60
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c5
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c108
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c3
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh18
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh39
-rw-r--r--tools/testing/selftests/net/tls.c36
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json155
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json36
-rw-r--r--tools/testing/selftests/tpm2/.gitignore3
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh24
-rw-r--r--virt/kvm/Kconfig2
-rw-r--r--virt/kvm/eventfd.c10
242 files changed, 2725 insertions, 1472 deletions
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 6c2d8945f597..eab601ab2db0 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -338,10 +338,11 @@ operations directly under the netdev instance lock.
Devices drivers are encouraged to rely on the instance lock where possible.
For the (mostly software) drivers that need to interact with the core stack,
-there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g.,
-``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle
-acquiring the instance lock themselves, while the ``netif_xxx`` functions
-assume that the driver has already acquired the instance lock.
+there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx``
+(e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx``
+functions handle acquiring the instance lock themselves, while the
+``netif_xxx`` functions assume that the driver has already acquired
+the instance lock.
Notifiers and netdev instance lock
==================================
@@ -354,6 +355,7 @@ For devices with locked ops, currently only the following notifiers are
running under the lock:
* ``NETDEV_REGISTER``
* ``NETDEV_UP``
+* ``NETDEV_CHANGE``
The following notifiers are running without the lock:
* ``NETDEV_UNREGISTER``
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1f8625b7646a..47c7c3f92314 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7447,6 +7447,75 @@ Unused bitfields in the bitarrays must be set to zero.
This capability connects the vcpu to an in-kernel XIVE device.
+6.76 KVM_CAP_HYPERV_SYNIC
+-------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that the kernel has an implementation of the
+Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
+used to support Windows Hyper-V based guest paravirt drivers(VMBus).
+
+In order to use SynIC, it has to be activated by setting this
+capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
+will disable the use of APIC hardware virtualization even if supported
+by the CPU, as it's incompatible with SynIC auto-EOI behavior.
+
+6.77 KVM_CAP_HYPERV_SYNIC2
+--------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
+
+6.78 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+-----------------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+This capability indicates that KVM running on top of Hyper-V hypervisor
+enables Direct TLB flush for its guests meaning that TLB flush
+hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
+Due to the different ABI for hypercall parameters between Hyper-V and
+KVM, enabling this capability effectively disables all hypercall
+handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
+flush hypercalls by Hyper-V) so userspace should disable KVM identification
+in CPUID and only exposes Hyper-V identification. In this case, guest
+thinks it's running on Hyper-V and only use Hyper-V hypercalls.
+
+6.79 KVM_CAP_HYPERV_ENFORCE_CPUID
+---------------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+When enabled, KVM will disable emulated Hyper-V features provided to the
+guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
+currently implemented Hyper-V features are provided unconditionally when
+Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
+leaf.
+
+6.80 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
+-------------------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+When enabled, KVM will disable paravirtual features provided to the
+guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
+(0x40000001). Otherwise, a guest may use the paravirtual features
+regardless of what has actually been exposed through the CPUID leaf.
+
+.. _KVM_CAP_DIRTY_LOG_RING:
+
+
.. _cap_enable_vm:
7. Capabilities that can be enabled on VMs
@@ -7927,10 +7996,10 @@ by POWER10 processor.
7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
-------------------------------------
-Architectures: x86 SEV enabled
-Type: vm
-Parameters: args[0] is the fd of the source vm
-Returns: 0 on success; ENOTTY on error
+:Architectures: x86 SEV enabled
+:Type: vm
+:Parameters: args[0] is the fd of the source vm
+:Returns: 0 on success; ENOTTY on error
This capability enables userspace to copy encryption context from the vm
indicated by the fd to the vm this is called on.
@@ -7963,24 +8032,6 @@ default.
See Documentation/arch/x86/sgx.rst for more details.
-7.26 KVM_CAP_PPC_RPT_INVALIDATE
--------------------------------
-
-:Capability: KVM_CAP_PPC_RPT_INVALIDATE
-:Architectures: ppc
-:Type: vm
-
-This capability indicates that the kernel is capable of handling
-H_RPT_INVALIDATE hcall.
-
-In order to enable the use of H_RPT_INVALIDATE in the guest,
-user space might have to advertise it for the guest. For example,
-IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
-present in the "ibm,hypertas-functions" device-tree property.
-
-This capability is enabled for hypervisors on platforms like POWER9
-that support radix MMU.
-
7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE
--------------------------------------
@@ -8038,24 +8089,9 @@ indicated by the fd to the VM this is called on.
This is intended to support intra-host migration of VMs between userspace VMMs,
upgrading the VMM process without interrupting the guest.
-7.30 KVM_CAP_PPC_AIL_MODE_3
--------------------------------
-
-:Capability: KVM_CAP_PPC_AIL_MODE_3
-:Architectures: ppc
-:Type: vm
-
-This capability indicates that the kernel supports the mode 3 setting for the
-"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location"
-resource that is controlled with the H_SET_MODE hypercall.
-
-This capability allows a guest kernel to use a better-performance mode for
-handling interrupts and system calls.
-
7.31 KVM_CAP_DISABLE_QUIRKS2
----------------------------
-:Capability: KVM_CAP_DISABLE_QUIRKS2
:Parameters: args[0] - set of KVM quirks to disable
:Architectures: x86
:Type: vm
@@ -8210,27 +8246,6 @@ This capability is aimed to mitigate the threat that malicious VMs can
cause CPU stuck (due to event windows don't open up) and make the CPU
unavailable to host or other VMs.
-7.34 KVM_CAP_MEMORY_FAULT_INFO
-------------------------------
-
-:Architectures: x86
-:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
-
-The presence of this capability indicates that KVM_RUN will fill
-kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if
-there is a valid memslot but no backing VMA for the corresponding host virtual
-address.
-
-The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns
-an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set
-to KVM_EXIT_MEMORY_FAULT.
-
-Note: Userspaces which attempt to resolve memory faults so that they can retry
-KVM_RUN are encouraged to guard against repeatedly receiving the same
-error/annotated fault.
-
-See KVM_EXIT_MEMORY_FAULT for more information.
-
7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS
-----------------------------------
@@ -8248,19 +8263,220 @@ by KVM_CHECK_EXTENSION.
Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the
core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest.
-7.36 KVM_CAP_X86_GUEST_MODE
-------------------------------
+7.36 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+----------------------------------------------------------
+
+:Architectures: x86, arm64
+:Type: vm
+:Parameters: args[0] - size of the dirty log ring
+
+KVM is capable of tracking dirty memory using ring buffers that are
+mmapped into userspace; there is one dirty ring per vcpu.
+
+The dirty ring is available to userspace as an array of
+``struct kvm_dirty_gfn``. Each dirty entry is defined as::
+
+ struct kvm_dirty_gfn {
+ __u32 flags;
+ __u32 slot; /* as_id | slot_id */
+ __u64 offset;
+ };
+
+The following values are defined for the flags field to define the
+current state of the entry::
+
+ #define KVM_DIRTY_GFN_F_DIRTY BIT(0)
+ #define KVM_DIRTY_GFN_F_RESET BIT(1)
+ #define KVM_DIRTY_GFN_F_MASK 0x3
+
+Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ioctl to enable this capability for the new guest and set the size of
+the rings. Enabling the capability is only allowed before creating any
+vCPU, and the size of the ring must be a power of two. The larger the
+ring buffer, the less likely the ring is full and the VM is forced to
+exit to userspace. The optimal size depends on the workload, but it is
+recommended that it be at least 64 KiB (4096 entries).
+
+Just like for dirty page bitmaps, the buffer tracks writes to
+all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
+set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered
+with the flag set, userspace can start harvesting dirty pages from the
+ring buffer.
+
+An entry in the ring buffer can be unused (flag bits ``00``),
+dirty (flag bits ``01``) or harvested (flag bits ``1X``). The
+state machine for the entry is as follows::
+
+ dirtied harvested reset
+ 00 -----------> 01 -------------> 1X -------+
+ ^ |
+ | |
+ +------------------------------------------+
+
+To harvest the dirty pages, userspace accesses the mmapped ring buffer
+to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage
+the RESET bit must be cleared), then it means this GFN is a dirty GFN.
+The userspace should harvest this GFN and mark the flags from state
+``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
+to show that this GFN is harvested and waiting for a reset), and move
+on to the next GFN. The userspace should continue to do this until the
+flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
+all the dirty GFNs that were available.
+
+Note that on weakly ordered architectures, userspace accesses to the
+ring buffer (and more specifically the 'flags' field) must be ordered,
+using load-acquire/store-release accessors when available, or any
+other memory barrier that will ensure this ordering.
+
+It's not necessary for userspace to harvest the all dirty GFNs at once.
+However it must collect the dirty GFNs in sequence, i.e., the userspace
+program cannot skip one dirty GFN to collect the one next to it.
+
+After processing one or more entries in the ring buffer, userspace
+calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
+it, so that the kernel will reprotect those collected GFNs.
+Therefore, the ioctl must be called *before* reading the content of
+the dirty pages.
+
+The dirty ring can get full. When it happens, the KVM_RUN of the
+vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
+
+The dirty ring interface has a major difference comparing to the
+KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
+userspace, it's still possible that the kernel has not yet flushed the
+processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
+flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one
+needs to kick the vcpu out of KVM_RUN using a signal. The resulting
+vmexit ensures that all dirty GFNs are flushed to the dirty rings.
+
+NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
+should be exposed by weakly ordered architecture, in order to indicate
+the additional memory ordering requirements imposed on userspace when
+reading the state of an entry and mutating it from DIRTY to HARVESTED.
+Architecture with TSO-like ordering (such as x86) are allowed to
+expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+to userspace.
+
+After enabling the dirty rings, the userspace needs to detect the
+capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
+ring structures can be backed by per-slot bitmaps. With this capability
+advertised, it means the architecture can dirty guest pages without
+vcpu/ring context, so that some of the dirty information will still be
+maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
+can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+hasn't been enabled, or any memslot has been existing.
+
+Note that the bitmap here is only a backup of the ring structure. The
+use of the ring and bitmap combination is only beneficial if there is
+only a very small amount of memory that is dirtied out of vcpu/ring
+context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
+be considered.
+
+To collect dirty bits in the backup bitmap, userspace can use the same
+KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
+the generation of the dirty bits is done in a single pass. Collecting
+the dirty bitmap should be the very last thing that the VMM does before
+considering the state as complete. VMM needs to ensure that the dirty
+state is final and avoid missing dirty pages from another ioctl ordered
+after the bitmap collection.
+
+NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its
+tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on
+KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through
+command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device
+"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save
+vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES}
+command on KVM device "kvm-arm-vgic-v3".
+
+7.37 KVM_CAP_PMU_CAPABILITY
+---------------------------
:Architectures: x86
-:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
+:Type: vm
+:Parameters: arg[0] is bitmask of PMU virtualization capabilities.
+:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
-The presence of this capability indicates that KVM_RUN will update the
-KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
-vCPU was executing nested guest code when it exited.
+This capability alters PMU virtualization in KVM.
-KVM exits with the register state of either the L1 or L2 guest
-depending on which executed at the time of an exit. Userspace must
-take care to differentiate between these cases.
+Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of
+PMU virtualization capabilities that can be adjusted on a VM.
+
+The argument to KVM_ENABLE_CAP is also a bitmask and selects specific
+PMU virtualization capabilities to be applied to the VM. This can
+only be invoked on a VM prior to the creation of VCPUs.
+
+At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting
+this capability will disable PMU virtualization for that VM. Usermode
+should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
+
+7.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
+-------------------------------------
+
+:Architectures: x86
+:Type: vm
+:Parameters: arg[0] must be 0.
+:Returns: 0 on success, -EPERM if the userspace process does not
+ have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
+ created.
+
+This capability disables the NX huge pages mitigation for iTLB MULTIHIT.
+
+The capability has no effect if the nx_huge_pages module parameter is not set.
+
+This capability may only be set before any vCPUs are created.
+
+7.39 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
+---------------------------------------
+
+:Architectures: arm64
+:Type: vm
+:Parameters: arg[0] is the new split chunk size.
+:Returns: 0 on success, -EINVAL if any memslot was already created.
+
+This capability sets the chunk size used in Eager Page Splitting.
+
+Eager Page Splitting improves the performance of dirty-logging (used
+in live migrations) when guest memory is backed by huge-pages. It
+avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
+it eagerly when enabling dirty logging (with the
+KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
+KVM_CLEAR_DIRTY_LOG.
+
+The chunk size specifies how many pages to break at a time, using a
+single allocation for each chunk. Bigger the chunk size, more pages
+need to be allocated ahead of time.
+
+The chunk size needs to be a valid block size. The list of acceptable
+block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
+64-bit bitmap (each bit describing a block size). The default value is
+0, to disable the eager page splitting.
+
+7.40 KVM_CAP_EXIT_HYPERCALL
+---------------------------
+
+:Architectures: x86
+:Type: vm
+
+This capability, if enabled, will cause KVM to exit to userspace
+with KVM_EXIT_HYPERCALL exit reason to process some hypercalls.
+
+Calling KVM_CHECK_EXTENSION for this capability will return a bitmask
+of hypercalls that can be configured to exit to userspace.
+Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE.
+
+The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
+of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
+the hypercalls whose corresponding bit is in the argument, and return
+ENOSYS for the others.
+
+7.41 KVM_CAP_ARM_SYSTEM_SUSPEND
+-------------------------------
+
+:Architectures: arm64
+:Type: vm
+
+When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
+type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
-------------------------------------
@@ -8297,21 +8513,6 @@ H_RANDOM hypercall backed by a hardware random-number generator.
If present, the kernel H_RANDOM handler can be enabled for guest use
with the KVM_CAP_PPC_ENABLE_HCALL capability.
-8.2 KVM_CAP_HYPERV_SYNIC
-------------------------
-
-:Architectures: x86
-
-This capability, if KVM_CHECK_EXTENSION indicates that it is
-available, means that the kernel has an implementation of the
-Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
-used to support Windows Hyper-V based guest paravirt drivers(VMBus).
-
-In order to use SynIC, it has to be activated by setting this
-capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
-will disable the use of APIC hardware virtualization even if supported
-by the CPU, as it's incompatible with SynIC auto-EOI behavior.
-
8.3 KVM_CAP_PPC_MMU_RADIX
-------------------------
@@ -8362,20 +8563,6 @@ may be incompatible with the MIPS VZ ASE.
virtualization, including standard guest virtual memory segments.
== ==========================================================================
-8.6 KVM_CAP_MIPS_TE
--------------------
-
-:Architectures: mips
-
-This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
-it is available, means that the trap & emulate implementation is available to
-run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware
-assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed
-to KVM_CREATE_VM to create a VM which utilises it.
-
-If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
-available, it means that the VM is using trap & emulate.
-
8.7 KVM_CAP_MIPS_64BIT
----------------------
@@ -8457,16 +8644,6 @@ virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is
available.
-8.11 KVM_CAP_HYPERV_SYNIC2
---------------------------
-
-:Architectures: x86
-
-This capability enables a newer version of Hyper-V Synthetic interrupt
-controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
-doesn't clear SynIC message and event flags pages when they are enabled by
-writing to the respective MSRs.
-
8.12 KVM_CAP_HYPERV_VP_INDEX
----------------------------
@@ -8481,7 +8658,6 @@ capability is absent, userspace can still query this msr's value.
-------------------------------
:Architectures: s390
-:Parameters: none
This capability indicates if the flic device will be able to get/set the
AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
@@ -8555,21 +8731,6 @@ This capability indicates that KVM supports paravirtualized Hyper-V IPI send
hypercalls:
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
-8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
------------------------------------
-
-:Architectures: x86
-
-This capability indicates that KVM running on top of Hyper-V hypervisor
-enables Direct TLB flush for its guests meaning that TLB flush
-hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
-Due to the different ABI for hypercall parameters between Hyper-V and
-KVM, enabling this capability effectively disables all hypercall
-handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
-flush hypercalls by Hyper-V) so userspace should disable KVM identification
-in CPUID and only exposes Hyper-V identification. In this case, guest
-thinks it's running on Hyper-V and only use Hyper-V hypercalls.
-
8.22 KVM_CAP_S390_VCPU_RESETS
-----------------------------
@@ -8647,142 +8808,6 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
-8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
--------------------------------------
-
-Architectures: x86
-
-When enabled, KVM will disable paravirtual features provided to the
-guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
-(0x40000001). Otherwise, a guest may use the paravirtual features
-regardless of what has actually been exposed through the CPUID leaf.
-
-.. _KVM_CAP_DIRTY_LOG_RING:
-
-8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
-----------------------------------------------------------
-
-:Architectures: x86, arm64
-:Parameters: args[0] - size of the dirty log ring
-
-KVM is capable of tracking dirty memory using ring buffers that are
-mmapped into userspace; there is one dirty ring per vcpu.
-
-The dirty ring is available to userspace as an array of
-``struct kvm_dirty_gfn``. Each dirty entry is defined as::
-
- struct kvm_dirty_gfn {
- __u32 flags;
- __u32 slot; /* as_id | slot_id */
- __u64 offset;
- };
-
-The following values are defined for the flags field to define the
-current state of the entry::
-
- #define KVM_DIRTY_GFN_F_DIRTY BIT(0)
- #define KVM_DIRTY_GFN_F_RESET BIT(1)
- #define KVM_DIRTY_GFN_F_MASK 0x3
-
-Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
-ioctl to enable this capability for the new guest and set the size of
-the rings. Enabling the capability is only allowed before creating any
-vCPU, and the size of the ring must be a power of two. The larger the
-ring buffer, the less likely the ring is full and the VM is forced to
-exit to userspace. The optimal size depends on the workload, but it is
-recommended that it be at least 64 KiB (4096 entries).
-
-Just like for dirty page bitmaps, the buffer tracks writes to
-all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
-set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered
-with the flag set, userspace can start harvesting dirty pages from the
-ring buffer.
-
-An entry in the ring buffer can be unused (flag bits ``00``),
-dirty (flag bits ``01``) or harvested (flag bits ``1X``). The
-state machine for the entry is as follows::
-
- dirtied harvested reset
- 00 -----------> 01 -------------> 1X -------+
- ^ |
- | |
- +------------------------------------------+
-
-To harvest the dirty pages, userspace accesses the mmapped ring buffer
-to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage
-the RESET bit must be cleared), then it means this GFN is a dirty GFN.
-The userspace should harvest this GFN and mark the flags from state
-``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
-to show that this GFN is harvested and waiting for a reset), and move
-on to the next GFN. The userspace should continue to do this until the
-flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
-all the dirty GFNs that were available.
-
-Note that on weakly ordered architectures, userspace accesses to the
-ring buffer (and more specifically the 'flags' field) must be ordered,
-using load-acquire/store-release accessors when available, or any
-other memory barrier that will ensure this ordering.
-
-It's not necessary for userspace to harvest the all dirty GFNs at once.
-However it must collect the dirty GFNs in sequence, i.e., the userspace
-program cannot skip one dirty GFN to collect the one next to it.
-
-After processing one or more entries in the ring buffer, userspace
-calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
-it, so that the kernel will reprotect those collected GFNs.
-Therefore, the ioctl must be called *before* reading the content of
-the dirty pages.
-
-The dirty ring can get full. When it happens, the KVM_RUN of the
-vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
-
-The dirty ring interface has a major difference comparing to the
-KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
-userspace, it's still possible that the kernel has not yet flushed the
-processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
-flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one
-needs to kick the vcpu out of KVM_RUN using a signal. The resulting
-vmexit ensures that all dirty GFNs are flushed to the dirty rings.
-
-NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
-should be exposed by weakly ordered architecture, in order to indicate
-the additional memory ordering requirements imposed on userspace when
-reading the state of an entry and mutating it from DIRTY to HARVESTED.
-Architecture with TSO-like ordering (such as x86) are allowed to
-expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
-to userspace.
-
-After enabling the dirty rings, the userspace needs to detect the
-capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
-ring structures can be backed by per-slot bitmaps. With this capability
-advertised, it means the architecture can dirty guest pages without
-vcpu/ring context, so that some of the dirty information will still be
-maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
-can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
-hasn't been enabled, or any memslot has been existing.
-
-Note that the bitmap here is only a backup of the ring structure. The
-use of the ring and bitmap combination is only beneficial if there is
-only a very small amount of memory that is dirtied out of vcpu/ring
-context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
-be considered.
-
-To collect dirty bits in the backup bitmap, userspace can use the same
-KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
-the generation of the dirty bits is done in a single pass. Collecting
-the dirty bitmap should be the very last thing that the VMM does before
-considering the state as complete. VMM needs to ensure that the dirty
-state is final and avoid missing dirty pages from another ioctl ordered
-after the bitmap collection.
-
-NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its
-tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on
-KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through
-command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device
-"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save
-vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES}
-command on KVM device "kvm-arm-vgic-v3".
-
8.30 KVM_CAP_XEN_HVM
--------------------
@@ -8847,10 +8872,9 @@ clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be
done when the KVM_CAP_XEN_HVM ioctl sets the
KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag.
-8.31 KVM_CAP_PPC_MULTITCE
--------------------------
+8.31 KVM_CAP_SPAPR_MULTITCE
+---------------------------
-:Capability: KVM_CAP_PPC_MULTITCE
:Architectures: ppc
:Type: vm
@@ -8882,72 +8906,9 @@ This capability indicates that the KVM virtual PTP service is
supported in the host. A VMM can check whether the service is
available to the guest on migration.
-8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
----------------------------------
-
-Architectures: x86
-
-When enabled, KVM will disable emulated Hyper-V features provided to the
-guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
-currently implemented Hyper-V features are provided unconditionally when
-Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
-leaf.
-
-8.34 KVM_CAP_EXIT_HYPERCALL
----------------------------
-
-:Capability: KVM_CAP_EXIT_HYPERCALL
-:Architectures: x86
-:Type: vm
-
-This capability, if enabled, will cause KVM to exit to userspace
-with KVM_EXIT_HYPERCALL exit reason to process some hypercalls.
-
-Calling KVM_CHECK_EXTENSION for this capability will return a bitmask
-of hypercalls that can be configured to exit to userspace.
-Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE.
-
-The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
-of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
-the hypercalls whose corresponding bit is in the argument, and return
-ENOSYS for the others.
-
-8.35 KVM_CAP_PMU_CAPABILITY
----------------------------
-
-:Capability: KVM_CAP_PMU_CAPABILITY
-:Architectures: x86
-:Type: vm
-:Parameters: arg[0] is bitmask of PMU virtualization capabilities.
-:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
-
-This capability alters PMU virtualization in KVM.
-
-Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of
-PMU virtualization capabilities that can be adjusted on a VM.
-
-The argument to KVM_ENABLE_CAP is also a bitmask and selects specific
-PMU virtualization capabilities to be applied to the VM. This can
-only be invoked on a VM prior to the creation of VCPUs.
-
-At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting
-this capability will disable PMU virtualization for that VM. Usermode
-should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
-
-8.36 KVM_CAP_ARM_SYSTEM_SUSPEND
--------------------------------
-
-:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND
-:Architectures: arm64
-:Type: vm
-
-When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
-type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
-
8.37 KVM_CAP_S390_PROTECTED_DUMP
--------------------------------
-:Capability: KVM_CAP_S390_PROTECTED_DUMP
:Architectures: s390
:Type: vm
@@ -8957,27 +8918,9 @@ PV guests. The `KVM_PV_DUMP` command is available for the
dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
available and supports the `KVM_PV_DUMP_CPU` subcommand.
-8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
--------------------------------------
-
-:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
-:Architectures: x86
-:Type: vm
-:Parameters: arg[0] must be 0.
-:Returns: 0 on success, -EPERM if the userspace process does not
- have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
- created.
-
-This capability disables the NX huge pages mitigation for iTLB MULTIHIT.
-
-The capability has no effect if the nx_huge_pages module parameter is not set.
-
-This capability may only be set before any vCPUs are created.
-
8.39 KVM_CAP_S390_CPU_TOPOLOGY
------------------------------
-:Capability: KVM_CAP_S390_CPU_TOPOLOGY
:Architectures: s390
:Type: vm
@@ -8999,37 +8942,9 @@ structure.
When getting the Modified Change Topology Report value, the attr->addr
must point to a byte where the value will be stored or retrieved from.
-8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
----------------------------------------
-
-:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
-:Architectures: arm64
-:Type: vm
-:Parameters: arg[0] is the new split chunk size.
-:Returns: 0 on success, -EINVAL if any memslot was already created.
-
-This capability sets the chunk size used in Eager Page Splitting.
-
-Eager Page Splitting improves the performance of dirty-logging (used
-in live migrations) when guest memory is backed by huge-pages. It
-avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
-it eagerly when enabling dirty logging (with the
-KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
-KVM_CLEAR_DIRTY_LOG.
-
-The chunk size specifies how many pages to break at a time, using a
-single allocation for each chunk. Bigger the chunk size, more pages
-need to be allocated ahead of time.
-
-The chunk size needs to be a valid block size. The list of acceptable
-block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
-64-bit bitmap (each bit describing a block size). The default value is
-0, to disable the eager page splitting.
-
8.41 KVM_CAP_VM_TYPES
---------------------
-:Capability: KVM_CAP_MEMORY_ATTRIBUTES
:Architectures: x86
:Type: system ioctl
@@ -9046,6 +8961,67 @@ Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in
production. The behavior and effective ABI for software-protected VMs is
unstable.
+8.42 KVM_CAP_PPC_RPT_INVALIDATE
+-------------------------------
+
+:Architectures: ppc
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is enabled for hypervisors on platforms like POWER9
+that support radix MMU.
+
+8.43 KVM_CAP_PPC_AIL_MODE_3
+---------------------------
+
+:Architectures: ppc
+
+This capability indicates that the kernel supports the mode 3 setting for the
+"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location"
+resource that is controlled with the H_SET_MODE hypercall.
+
+This capability allows a guest kernel to use a better-performance mode for
+handling interrupts and system calls.
+
+8.44 KVM_CAP_MEMORY_FAULT_INFO
+------------------------------
+
+:Architectures: x86
+
+The presence of this capability indicates that KVM_RUN will fill
+kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if
+there is a valid memslot but no backing VMA for the corresponding host virtual
+address.
+
+The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns
+an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set
+to KVM_EXIT_MEMORY_FAULT.
+
+Note: Userspaces which attempt to resolve memory faults so that they can retry
+KVM_RUN are encouraged to guard against repeatedly receiving the same
+error/annotated fault.
+
+See KVM_EXIT_MEMORY_FAULT for more information.
+
+8.45 KVM_CAP_X86_GUEST_MODE
+---------------------------
+
+:Architectures: x86
+
+The presence of this capability indicates that KVM_RUN will update the
+KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
+vCPU was executing nested guest code when it exited.
+
+KVM exits with the register state of either the L1 or L2 guest
+depending on which executed at the time of an exit. Userspace must
+take care to differentiate between these cases.
+
9. Known KVM API problems
=========================
@@ -9076,9 +9052,10 @@ the local APIC.
The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature.
-CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``.
-It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
-has enabled in-kernel emulation of the local APIC.
+On older versions of Linux, CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by
+``KVM_GET_SUPPORTED_CPUID``, but it can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER``
+is present and the kernel has enabled in-kernel emulation of the local APIC.
+On newer versions, ``KVM_GET_SUPPORTED_CPUID`` does report the bit as available.
CPU topology
~~~~~~~~~~~~
diff --git a/MAINTAINERS b/MAINTAINERS
index 96b827049501..c59316109e3f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10151,6 +10151,8 @@ F: include/linux/gpio.h
F: include/linux/gpio/
F: include/linux/of_gpio.h
K: (devm_)?gpio_(request|free|direction|get|set)
+K: GPIOD_FLAGS_BIT_NONEXCLUSIVE
+K: devm_gpiod_unhinge
GPIO UAPI
M: Bartosz Golaszewski <brgl@bgdev.pl>
diff --git a/Makefile b/Makefile
index e55726a71d95..f42418556507 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
-PATCHLEVEL = 14
+PATCHLEVEL = 15
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Baby Opossum Posse
# *DOCUMENTATION*
@@ -1068,6 +1068,9 @@ ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += -fconserve-stack
endif
+# Ensure compilers do not transform certain loops into calls to wcslen()
+KBUILD_CFLAGS += -fno-builtin-wcslen
+
# change __FILE__ to the relative path to the source directory
ifdef building_out_of_srctree
KBUILD_CPPFLAGS += $(call cc-option,-ffile-prefix-map=$(srcroot)/=)
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index f2596a1b2f7d..ff13e1ecf4bb 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -232,7 +232,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_DEV_ATMEL_AES=y
CONFIG_CRYPTO_DEV_ATMEL_TDES=y
CONFIG_CRYPTO_DEV_ATMEL_SHA=y
-CONFIG_CRC_CCITT=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_ACORN_8x8=y
diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig
index 42cb1c854118..578c6a4af620 100644
--- a/arch/arm/configs/collie_defconfig
+++ b/arch/arm/configs/collie_defconfig
@@ -78,7 +78,6 @@ CONFIG_ROMFS_FS=y
CONFIG_NLS_DEFAULT="cp437"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_FONTS=y
CONFIG_FONT_MINI_4x6=y
# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 3474e475373a..70b8c78386f4 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -249,7 +249,6 @@ CONFIG_NLS_ASCII=m
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_T10DIF=m
CONFIG_DMA_CMA=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_RT_MUTEXES=y
diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig
index b382a2e175fb..d76eb12d29a7 100644
--- a/arch/arm/configs/dove_defconfig
+++ b/arch/arm/configs/dove_defconfig
@@ -128,7 +128,6 @@ CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 7ad48fdda1da..e81a5d6c1c20 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -370,7 +370,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRYPTO_DEV_EXYNOS_RNG=y
CONFIG_CRYPTO_DEV_S5P=y
-CONFIG_CRC_CCITT=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=96
CONFIG_FONTS=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 297c6a7b978a..062c1eb8dd60 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -481,8 +481,6 @@ CONFIG_SECURITYFS=y
CONFIG_CRYPTO_DEV_FSL_CAAM=y
CONFIG_CRYPTO_DEV_SAHARA=y
CONFIG_CRYPTO_DEV_MXS_DCP=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_CMA_SIZE_MBYTES=64
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig
index 2aa2ac8c6507..2d489186e945 100644
--- a/arch/arm/configs/lpc18xx_defconfig
+++ b/arch/arm/configs/lpc18xx_defconfig
@@ -147,7 +147,6 @@ CONFIG_EXT2_FS=y
# CONFIG_INOTIFY_USER is not set
CONFIG_JFFS2_FS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_CRC_ITU_T=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_MUST_CHECK is not set
# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig
index 98e267213b21..9afccd76446b 100644
--- a/arch/arm/configs/lpc32xx_defconfig
+++ b/arch/arm/configs/lpc32xx_defconfig
@@ -179,7 +179,6 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_ANSI_CPRNG=y
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig
index acd16204f8d7..275ddf7a3a14 100644
--- a/arch/arm/configs/milbeaut_m10v_defconfig
+++ b/arch/arm/configs/milbeaut_m10v_defconfig
@@ -108,8 +108,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_AES_ARM_CE=m
CONFIG_CRYPTO_CHACHA20_NEON=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_ITU_T=m
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=64
CONFIG_PRINTK_TIME=y
diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig
index f6f9e135353e..842a989baa27 100644
--- a/arch/arm/configs/mmp2_defconfig
+++ b/arch/arm/configs/mmp2_defconfig
@@ -67,7 +67,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig
index 27d650635d9b..1a86dc305523 100644
--- a/arch/arm/configs/multi_v4t_defconfig
+++ b/arch/arm/configs/multi_v4t_defconfig
@@ -91,6 +91,5 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_CRAMFS=y
CONFIG_MINIX_FS=y
-CONFIG_CRC_CCITT=y
# CONFIG_FTRACE is not set
CONFIG_DEBUG_USER=y
diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig
index db81862bdb93..cf6180b4296e 100644
--- a/arch/arm/configs/multi_v5_defconfig
+++ b/arch/arm/configs/multi_v5_defconfig
@@ -289,7 +289,6 @@ CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
-CONFIG_CRC_CCITT=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig
index a518d4a2581e..23dbb80fcc2e 100644
--- a/arch/arm/configs/mvebu_v5_defconfig
+++ b/arch/arm/configs/mvebu_v5_defconfig
@@ -187,7 +187,6 @@ CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
-CONFIG_CRC_CCITT=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
index d8a6e43c401e..c76d66135abb 100644
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@@ -160,7 +160,6 @@ CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_CRYPTO_DEV_MXS_DCP=y
-CONFIG_CRC_ITU_T=m
CONFIG_FONTS=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 113d6dfe5243..75b326bc7830 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -706,9 +706,6 @@ CONFIG_CRYPTO_DEV_OMAP=m
CONFIG_CRYPTO_DEV_OMAP_SHAM=m
CONFIG_CRYPTO_DEV_OMAP_AES=m
CONFIG_CRYPTO_DEV_OMAP_DES=m
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC_ITU_T=y
CONFIG_DMA_CMA=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig
index 0629b088a584..62b9c6102789 100644
--- a/arch/arm/configs/orion5x_defconfig
+++ b/arch/arm/configs/orion5x_defconfig
@@ -136,7 +136,6 @@ CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
-CONFIG_CRC_T10DIF=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/pxa168_defconfig b/arch/arm/configs/pxa168_defconfig
index ce10fe2104bf..4748c7d33cb8 100644
--- a/arch/arm/configs/pxa168_defconfig
+++ b/arch/arm/configs/pxa168_defconfig
@@ -41,7 +41,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/configs/pxa910_defconfig b/arch/arm/configs/pxa910_defconfig
index 1f28aea86014..49b59c600ae1 100644
--- a/arch/arm/configs/pxa910_defconfig
+++ b/arch/arm/configs/pxa910_defconfig
@@ -50,7 +50,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index de0ac8f521d7..24fca8608554 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -663,8 +663,6 @@ CONFIG_CRYPTO_SHA1_ARM=m
CONFIG_CRYPTO_SHA256_ARM=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=m
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
diff --git a/arch/arm/configs/s5pv210_defconfig b/arch/arm/configs/s5pv210_defconfig
index 5dbe85c263de..02121eec3658 100644
--- a/arch/arm/configs/s5pv210_defconfig
+++ b/arch/arm/configs/s5pv210_defconfig
@@ -113,7 +113,6 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_CCITT=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig
index ea7ddf640ba7..e14720a9a5ac 100644
--- a/arch/arm/configs/sama7_defconfig
+++ b/arch/arm/configs/sama7_defconfig
@@ -227,8 +227,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_DEV_ATMEL_AES=y
CONFIG_CRYPTO_DEV_ATMEL_TDES=y
CONFIG_CRYPTO_DEV_ATMEL_SHA=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_ITU_T=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=32
CONFIG_CMA_ALIGNMENT=9
diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig
index ac5b7a5aaff6..ffec59e3f49c 100644
--- a/arch/arm/configs/spitz_defconfig
+++ b/arch/arm/configs/spitz_defconfig
@@ -234,7 +234,6 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_WP512=m
-CONFIG_CRC_CCITT=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 423bb41c4225..dcd9c316072e 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -74,7 +74,6 @@ CONFIG_EXT3_FS=y
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
CONFIG_NLS=y
-CONFIG_CRC_ITU_T=y
CONFIG_PRINTK_TIME=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/configs/wpcm450_defconfig b/arch/arm/configs/wpcm450_defconfig
index 5e4397f7f828..cd4b3e70ff68 100644
--- a/arch/arm/configs/wpcm450_defconfig
+++ b/arch/arm/configs/wpcm450_defconfig
@@ -191,8 +191,6 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
CONFIG_X509_CERTIFICATE_PARSER=y
CONFIG_PKCS7_MESSAGE_PARSER=y
CONFIG_SYSTEM_TRUSTED_KEYRING=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_ITU_T=m
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d1b1a33f9a8b..e4f77757937e 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -121,6 +121,15 @@
#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
+#define ESR_ELx_FSC_ADDRSZ (0x00)
+
+/*
+ * Annoyingly, the negative levels for Address size faults aren't laid out
+ * contiguously (or in the desired order)
+ */
+#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C)
+#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
+ (ESR_ELx_FSC_ADDRSZ + (n)))
/* Status codes for individual page table levels */
#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
@@ -161,8 +170,6 @@
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
/* ISS field definitions for exceptions taken in to Hyp */
-#define ESR_ELx_FSC_ADDRSZ (0x00)
-#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
@@ -464,6 +471,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
(esr == ESR_ELx_FSC_ACCESS_L(0));
}
+static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
+{
+ esr &= ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(2)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(0)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(-1));
+}
+
+static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(-1));
+}
+
+static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(-1));
+}
+
/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
static inline bool esr_iss_is_eretax(unsigned long esr)
{
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d7cf66573aca..bd020fc28aa9 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -305,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc
static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
- return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+ u64 hpfar = vcpu->arch.fault.hpfar_el2;
+
+ if (unlikely(!(hpfar & HPFAR_EL2_NS)))
+ return INVALID_GPA;
+
+ return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}
static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h
index 87e10d9a635b..9398ade632aa 100644
--- a/arch/arm64/include/asm/kvm_ras.h
+++ b/arch/arm64/include/asm/kvm_ras.h
@@ -14,7 +14,7 @@
* Was this synchronous external abort a RAS notification?
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
*/
-static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
+static inline int kvm_handle_guest_sea(void)
{
/* apei_claim_sea(NULL) expects to mask interrupts itself */
lockdep_assert_irqs_enabled();
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 17df94570f03..fc573fc767b0 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -12,6 +12,16 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+static inline bool __fault_safe_to_translate(u64 esr)
+{
+ u64 fsc = esr & ESR_ELx_FSC;
+
+ if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr))
+ return false;
+
+ return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV));
+}
+
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
int ret;
@@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
return true;
}
-static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+/*
+ * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR.
+ */
+static inline bool __hpfar_valid(u64 esr)
{
- u64 hpfar, far;
-
- far = read_sysreg_el2(SYS_FAR);
-
/*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
+ * CPUs affected by ARM erratum #834220 may incorrectly report a
+ * stage-2 translation fault when a stage-1 permission fault occurs.
*
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
+ * Re-walk the page tables to determine if a stage-1 fault actually
+ * occurred.
*/
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- esr_fsc_is_permission_fault(esr))) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_834220) &&
+ esr_fsc_is_translation_fault(esr))
+ return false;
+
+ if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr))
+ return true;
+
+ if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr))
+ return true;
+
+ return esr_fsc_is_addr_sz_fault(esr);
+}
+
+static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+{
+ u64 hpfar;
+
+ fault->far_el2 = read_sysreg_el2(SYS_FAR);
+ fault->hpfar_el2 = 0;
+
+ if (__hpfar_valid(esr))
hpfar = read_sysreg(hpfar_el2);
- }
+ else if (unlikely(!__fault_safe_to_translate(esr)))
+ return true;
+ else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar))
+ return false;
- fault->far_el2 = far;
- fault->hpfar_el2 = hpfar;
+ /*
+ * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid
+ * HPFAR value.
+ */
+ fault->hpfar_el2 = hpfar | HPFAR_EL2_NS;
return true;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index e433dfab882a..3369dd0c4009 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_ffa_version = ffa_req_version;
}
- if (hyp_ffa_post_init())
+ if (hyp_ffa_post_init()) {
res->a0 = FFA_RET_NOT_SUPPORTED;
- else {
- has_version_negotiated = true;
+ } else {
+ smp_store_release(&has_version_negotiated, true);
res->a0 = hyp_ffa_version;
}
unlock:
@@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
if (!is_ffa_call(func_id))
return false;
- if (!has_version_negotiated && func_id != FFA_VERSION) {
+ if (func_id != FFA_VERSION &&
+ !smp_load_acquire(&has_version_negotiated)) {
ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS);
goto out_handled;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index f34f11c720d7..2a5284f749b4 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
return;
}
- addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
+
+ /*
+ * Yikes, we couldn't resolve the fault IPA. This should reinject an
+ * abort into the host when we figure out how to do that.
+ */
+ BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
+ addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
+
ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2feb6c6b63af..754f2fe0cc67 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1794,9 +1794,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;
+ /* Synchronous External Abort? */
+ if (kvm_vcpu_abt_issea(vcpu)) {
+ /*
+ * For RAS the host kernel may handle this abort.
+ * There is no need to pass the error into the guest.
+ */
+ if (kvm_handle_guest_sea())
+ kvm_inject_vabt(vcpu);
+
+ return 1;
+ }
+
esr = kvm_vcpu_get_esr(vcpu);
+ /*
+ * The fault IPA should be reliable at this point as we're not dealing
+ * with an SEA.
+ */
ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm))
+ return -EFAULT;
+
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (esr_fsc_is_translation_fault(esr)) {
@@ -1818,18 +1837,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
}
- /* Synchronous External Abort? */
- if (kvm_vcpu_abt_issea(vcpu)) {
- /*
- * For RAS the host kernel may handle this abort.
- * There is no need to pass the error into the guest.
- */
- if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
- kvm_inject_vabt(vcpu);
-
- return 1;
- }
-
trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index f9476848a2ed..bdf044c5d11b 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -3536,3 +3536,10 @@ Field 5 F
Field 4 P
Field 3:0 Align
EndSysreg
+
+Sysreg HPFAR_EL2 3 4 6 0 4
+Field 63 NS
+Res0 62:48
+Field 47:4 FIPA
+Res0 3:0
+EndSysreg
diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig
index 469c025297c6..c6108f000288 100644
--- a/arch/hexagon/configs/comet_defconfig
+++ b/arch/hexagon/configs/comet_defconfig
@@ -72,9 +72,6 @@ CONFIG_INET=y
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=y
-CONFIG_CRC_T10DIF=y
CONFIG_FRAME_WARN=0
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig
index 67a0d157122d..110279a64aa4 100644
--- a/arch/m68k/configs/amcore_defconfig
+++ b/arch/m68k/configs/amcore_defconfig
@@ -89,4 +89,3 @@ CONFIG_PANIC_ON_OOPS=y
# CONFIG_CRYPTO_ECHAINIV is not set
CONFIG_CRYPTO_ANSI_CPRNG=y
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC16=y
diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig
index 8caa03a41327..cba0b85c6707 100644
--- a/arch/mips/configs/ath79_defconfig
+++ b/arch/mips/configs/ath79_defconfig
@@ -82,7 +82,6 @@ CONFIG_LEDS_GPIO=y
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
# CONFIG_PROC_PAGE_MONITOR is not set
-CONFIG_CRC_ITU_T=m
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index fe282630b51c..8f7c36868204 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -238,7 +238,6 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRC_T10DIF=m
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 5ab149cd3178..114fcd67898d 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -218,4 +218,3 @@ CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_LZO=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=y
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 31ca93d3acc5..f1a8ccf2c459 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -326,5 +326,4 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_T10DIF=m
CONFIG_DEBUG_MEMORY_INIT=y
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index b8907b3d7a33..5d079941fd20 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -317,4 +317,3 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRC_T10DIF=m
diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig
index 270181a7320a..a4524e785469 100644
--- a/arch/mips/configs/ip30_defconfig
+++ b/arch/mips/configs/ip30_defconfig
@@ -179,4 +179,3 @@ CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRC_T10DIF=m
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 121e7e48fa77..d8ac11427f69 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -177,7 +177,6 @@ CONFIG_CRYPTO_SERPENT=y
CONFIG_CRYPTO_TEA=y
CONFIG_CRYPTO_TWOFISH=y
CONFIG_CRYPTO_DEFLATE=y
-CONFIG_CRC_T10DIF=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig
index 128f9abab7fc..e2bcdfd290a1 100644
--- a/arch/mips/configs/omega2p_defconfig
+++ b/arch/mips/configs/omega2p_defconfig
@@ -111,7 +111,6 @@ CONFIG_NLS_KOI8_U=y
CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
-CONFIG_CRC16=y
CONFIG_XZ_DEC=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 0261969a6e45..42b161d587c7 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -155,5 +155,4 @@ CONFIG_JFFS2_COMPRESSION_OPTIONS=y
CONFIG_SQUASHFS=y
CONFIG_CRYPTO_TEST=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC16=m
CONFIG_STRIP_ASM_SYMS=y
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index 8404e0a9d8b2..8f9701efef19 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -128,7 +128,6 @@ CONFIG_SQUASHFS=y
# CONFIG_SQUASHFS_ZLIB is not set
CONFIG_SQUASHFS_XZ=y
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRC_ITU_T=m
# CONFIG_XZ_DEC_X86 is not set
# CONFIG_XZ_DEC_POWERPC is not set
# CONFIG_XZ_DEC_IA64 is not set
diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
index ce855b644bb0..ae2afff00e01 100644
--- a/arch/mips/configs/sb1250_swarm_defconfig
+++ b/arch/mips/configs/sb1250_swarm_defconfig
@@ -99,4 +99,3 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_LZO=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC16=m
diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig
index 917967fed45f..2a9a9b12847d 100644
--- a/arch/mips/configs/vocore2_defconfig
+++ b/arch/mips/configs/vocore2_defconfig
@@ -111,7 +111,6 @@ CONFIG_NLS_KOI8_U=y
CONFIG_NLS_UTF8=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
-CONFIG_CRC16=y
CONFIG_XZ_DEC=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig
index 7b91edfe3e07..aae8497b6872 100644
--- a/arch/mips/configs/xway_defconfig
+++ b/arch/mips/configs/xway_defconfig
@@ -140,7 +140,6 @@ CONFIG_SQUASHFS=y
# CONFIG_SQUASHFS_ZLIB is not set
CONFIG_SQUASHFS_XZ=y
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRC_ITU_T=m
CONFIG_PRINTK_TIME=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index f5fffc24c3bc..5b65c9859613 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -264,8 +264,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_DEFLATE=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_FONTS=y
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig
index 2487765b7be3..ecc9ffcc11cd 100644
--- a/arch/parisc/configs/generic-64bit_defconfig
+++ b/arch/parisc/configs/generic-64bit_defconfig
@@ -292,7 +292,6 @@ CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_DEFLATE=m
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=m
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_STRIP_ASM_SYMS=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 2479ab62d12f..98221bda380d 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -91,5 +91,4 @@ CONFIG_AFFS_FS=m
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index 20891c413149..5757625469c4 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -85,8 +85,6 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_FS=y
diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
index 1715ff547442..b99caba8724a 100644
--- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
@@ -73,6 +73,5 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
index e65c0057147f..11163052fdba 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
@@ -80,5 +80,4 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
index 17714bf0ed40..312d39e4242c 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
@@ -72,5 +72,4 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index 58fae5131fa7..ac27f99faab8 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -75,6 +75,5 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index 6f58ee1edf1f..7beb36a41d45 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -221,8 +221,6 @@ CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_MD5=y
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index e7080497048d..0a42072fa23c 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -60,8 +60,6 @@ CONFIG_CRAMFS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NLS=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=m
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_BDI_SWITCH=y
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index 3a6381aa9fdc..488d03ae6d6c 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -132,7 +132,6 @@ CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_T10DIF=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config
index 0cb24b33c88e..e7bd265fae5a 100644
--- a/arch/powerpc/configs/86xx-hw.config
+++ b/arch/powerpc/configs/86xx-hw.config
@@ -5,7 +5,6 @@ CONFIG_BROADCOM_PHY=y
# CONFIG_CARDBUS is not set
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_ST=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_HMAC=y
CONFIG_DS1682=y
CONFIG_EEPROM_LEGACY=y
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index 200bb1ecb560..69ef3dc31c4b 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -106,7 +106,6 @@ CONFIG_TMPFS=y
CONFIG_AFFS_FS=m
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index fb314f75ad4b..b799c95480ae 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -110,7 +110,6 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
index d6d2a458847b..2f81bc2d819e 100644
--- a/arch/powerpc/configs/fsl-emb-nonhw.config
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -15,7 +15,6 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_SCHED=y
CONFIG_CGROUPS=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_CRC_T10DIF=y
CONFIG_CPUSETS=y
CONFIG_CRAMFS=y
CONFIG_CRYPTO_MD4=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 9215bed53291..7e58f3e6c987 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -231,7 +231,6 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index d77eeb525366..cdd99657b71b 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -82,7 +82,6 @@ CONFIG_ROOT_NFS=y
CONFIG_CIFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index fa707de761be..b564f9e33a0d 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -125,8 +125,6 @@ CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_932=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index 83c4710017e9..a815d9e5e3e8 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -97,7 +97,6 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA512=y
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index a0d27c59ea78..dfbdd5e8e108 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -38,4 +38,3 @@ CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_CCITT=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index d1c7fd5bf34b..fa2b3b9c5945 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -107,8 +107,6 @@ CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_932=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_XZ_DEC=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 61993944db40..8bbf51b38480 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -159,7 +159,6 @@ CONFIG_NFSD=y
CONFIG_NFSD_V4=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index e8b3f67bf3f5..1bc3466bc909 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -276,7 +276,6 @@ CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 8b595f67068c..41c930f74ed4 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -90,7 +90,6 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=m
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_CRYPTO_ECB=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 4c05f4e4d505..d2e659a2d8cb 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -207,7 +207,6 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2b175ddf82f0..0b48d2b776c4 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -148,8 +148,6 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 3086c4a12d6d..2b71a6dc399e 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -278,8 +278,6 @@ CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
# CONFIG_INTEGRITY is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC16=y
-CONFIG_CRC_ITU_T=y
# CONFIG_XZ_DEC_X86 is not set
# CONFIG_XZ_DEC_IA64 is not set
# CONFIG_XZ_DEC_ARM is not set
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index 7a978d396991..e415222bd839 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -75,4 +75,3 @@ CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 5017a697b67b..7c714a19221e 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -114,7 +114,6 @@ CONFIG_ROOT_NFS=y
CONFIG_CIFS=m
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 610dd44a948b..a06a000f196c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -95,7 +95,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+ KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy,
current->pid, vcpu->kvm);
/* do not warn on invalid runtime instrumentation mode */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2811a6c093b8..60c360c18690 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3161,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm)
if (!gi->origin)
return;
gisa_clear_ipm(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin);
}
void kvm_s390_gisa_init(struct kvm *kvm)
@@ -3177,7 +3177,7 @@ void kvm_s390_gisa_init(struct kvm *kvm)
hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin);
}
void kvm_s390_gisa_enable(struct kvm *kvm)
@@ -3218,7 +3218,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
- VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+ VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa);
}
void kvm_s390_gisa_disable(struct kvm *kvm)
@@ -3467,7 +3467,7 @@ int __init kvm_s390_gib_init(u8 nisc)
}
}
- KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+ KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc);
goto out;
out_unreg_gal:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fff863734975..3f3175193fd7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1022,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce);
break;
}
@@ -3466,7 +3466,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
- KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+ KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
return 0;
out_err:
@@ -3529,7 +3529,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+ KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}
/* Section: vcpu related */
@@ -3650,7 +3650,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
free_page((unsigned long)old_sca);
- VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+ VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
old_sca, kvm->arch.sca);
return 0;
}
@@ -4027,7 +4027,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_free_sie_block;
}
- VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 9ac92dbf680d..9e28f165c114 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
- TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
__entry->id, __entry->vcpu, __entry->sie_block)
);
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
- TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
__entry->kvm)
);
diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index 4464a2ad42ed..b6f36c938f1d 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -99,4 +99,3 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig
index ee1b36682155..e76694aace25 100644
--- a/arch/sh/configs/ecovec24_defconfig
+++ b/arch/sh/configs/ecovec24_defconfig
@@ -128,4 +128,3 @@ CONFIG_DEBUG_FS=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig
index 296ed768cbbb..ee3f6db7d8da 100644
--- a/arch/sh/configs/edosk7705_defconfig
+++ b/arch/sh/configs/edosk7705_defconfig
@@ -33,4 +33,3 @@ CONFIG_CMDLINE_FROM_BOOTLOADER=y
# CONFIG_PROC_FS is not set
# CONFIG_SYSFS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index 67716a44463e..da176f100e00 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -110,4 +110,3 @@ CONFIG_NLS_UTF8=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index 77e3185f63e4..3582af15ad86 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -56,5 +56,3 @@ CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC16=y
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/kfr2r09-romimage_defconfig b/arch/sh/configs/kfr2r09-romimage_defconfig
index 42bf34181a3e..88fbb65cb9f9 100644
--- a/arch/sh/configs/kfr2r09-romimage_defconfig
+++ b/arch/sh/configs/kfr2r09-romimage_defconfig
@@ -49,4 +49,3 @@ CONFIG_TMPFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index d871623955c5..924bb3233b0b 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -111,4 +111,3 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_SH_STANDARD_BIOS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 6a234761bfd7..0307bb2be79f 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -58,4 +58,3 @@ CONFIG_ROMFS_FS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_SH_STANDARD_BIOS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 8d443749550e..93b9aa32dc7c 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -86,5 +86,3 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_KOBJECT=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_FRAME_POINTER=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC16=m
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index 2d1e65cad239..fc2010c241fb 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -90,4 +90,3 @@ CONFIG_DEBUG_FS=y
CONFIG_CRYPTO_MANAGER=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index 6bd6c0ae85d7..f28b8c4181c2 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -105,4 +105,3 @@ CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index cde668569cc1..3a4239f20ff1 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -103,4 +103,3 @@ CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index c863a11c7592..69568cc40396 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -87,4 +87,3 @@ CONFIG_MINIX_FS=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_DEBUG_FS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 7e4f710d46c7..ecb4bdb5bb58 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -92,4 +92,3 @@ CONFIG_MINIX_FS=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_DEBUG_FS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index cd24cf08210e..9870d16d9711 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -136,4 +136,3 @@ CONFIG_SH_STANDARD_BIOS=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index 472fdf365cad..64f9308ee586 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -101,6 +101,3 @@ CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=y
-CONFIG_CRC_ITU_T=y
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index 49a4961889de..8770a72e6a63 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -96,4 +96,3 @@ CONFIG_FRAME_POINTER=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_CCITT=y
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index de293792db84..b15c6406a0e8 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -122,4 +122,3 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_FRAME_POINTER=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_CCITT=y
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index 96521271758c..9501e69eb886 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -128,4 +128,3 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 48f38ec236b6..4d75c92cac10 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -120,6 +120,5 @@ CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_DEFLATE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_CCITT=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=y
diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig
index 1b1174a07e36..cc6292b3235a 100644
--- a/arch/sh/configs/sh2007_defconfig
+++ b/arch/sh/configs/sh2007_defconfig
@@ -193,5 +193,3 @@ CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
-CONFIG_CRC_CCITT=y
-CONFIG_CRC16=y
diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig
index 5440bd0ca4ed..e6298f22623a 100644
--- a/arch/sh/configs/sh7724_generic_defconfig
+++ b/arch/sh/configs/sh7724_generic_defconfig
@@ -39,4 +39,3 @@ CONFIG_UIO_PDRV_GENIRQ=y
# CONFIG_SYSFS is not set
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index 57923c3296cc..b77b3313157e 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -112,4 +112,3 @@ CONFIG_NLS_UTF8=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig
index 4338af8d02d0..2e2b46980b58 100644
--- a/arch/sh/configs/sh7770_generic_defconfig
+++ b/arch/sh/configs/sh7770_generic_defconfig
@@ -41,4 +41,3 @@ CONFIG_UIO_PDRV_GENIRQ=y
# CONFIG_SYSFS is not set
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 8e85f205d8f5..f022ada363b5 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -264,4 +264,3 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC16=m
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 01b2bdfbf9a8..f1ba0fefe1f9 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -229,7 +229,6 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC16=m
CONFIG_VCC=m
CONFIG_PATA_CMD64X=y
CONFIG_IP_PNP=y
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a884ab544335..3bdae454a959 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1472,8 +1472,13 @@ struct kvm_arch {
struct once nx_once;
#ifdef CONFIG_X86_64
- /* The number of TDP MMU pages across all roots. */
+#ifdef CONFIG_KVM_PROVE_MMU
+ /*
+ * The number of TDP MMU pages across all roots. Used only to sanity
+ * check that KVM isn't leaking TDP MMU pages.
+ */
atomic64_t tdp_mmu_pages;
+#endif
/*
* List of struct kvm_mmu_pages being used as roots.
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 55a5e656e4b9..4f84d421d1cf 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -16,23 +16,23 @@
#ifdef __ASSEMBLER__
#define ASM_CLAC \
- ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "clac", X86_FEATURE_SMAP
+ ALTERNATIVE "", "clac", X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "stac", X86_FEATURE_SMAP
+ ALTERNATIVE "", "stac", X86_FEATURE_SMAP
#else /* __ASSEMBLER__ */
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP);
+ alternative("", "clac", X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP);
+ alternative("", "stac", X86_FEATURE_SMAP);
}
static __always_inline unsigned long smap_save(void)
@@ -59,9 +59,9 @@ static __always_inline void smap_restore(unsigned long flags)
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP)
+ ALTERNATIVE("", "clac", X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP)
+ ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
#define ASM_CLAC_UNSAFE \
ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 5e4d4934c0d3..571c906ffcbf 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1427,8 +1427,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case 0xa: { /* Architectural Performance Monitoring */
- union cpuid10_eax eax;
- union cpuid10_edx edx;
+ union cpuid10_eax eax = { };
+ union cpuid10_edx edx = { };
if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
@@ -1444,8 +1444,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
if (kvm_pmu_cap.version)
edx.split.anythread_deprecated = 1;
- edx.split.reserved1 = 0;
- edx.split.reserved2 = 0;
entry->eax = eax.full;
entry->ebx = kvm_pmu_cap.events_mask;
@@ -1763,7 +1761,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
- union cpuid_0x80000022_ebx ebx;
+ union cpuid_0x80000022_ebx ebx = { };
entry->ecx = entry->edx = 0;
if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7cc0564f5f97..21a3b8166242 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -40,7 +40,9 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
kvm_tdp_mmu_invalidate_roots(kvm, KVM_VALID_ROOTS);
kvm_tdp_mmu_zap_invalidated_roots(kvm, false);
- WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
+#ifdef CONFIG_KVM_PROVE_MMU
+ KVM_MMU_WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
+#endif
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
@@ -325,13 +327,17 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, +1);
+#ifdef CONFIG_KVM_PROVE_MMU
atomic64_inc(&kvm->arch.tdp_mmu_pages);
+#endif
}
static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, -1);
+#ifdef CONFIG_KVM_PROVE_MMU
atomic64_dec(&kvm->arch.tdp_mmu_pages);
+#endif
}
/**
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index ec08fa3caf43..51116fe69a50 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
*/
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
+#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING
+
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
return &(to_vmx(vcpu)->pi_desc);
@@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
* current pCPU if the task was migrated.
*/
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
- raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu);
+
+ /*
+ * In addition to taking the wakeup lock for the regular/IRQ
+ * context, tell lockdep it is being taken for the "sched out"
+ * context as well. vCPU loads happens in task context, and
+ * this is taking the lock of the *previous* CPU, i.e. can race
+ * with both the scheduler and the wakeup handler.
+ */
+ raw_spin_lock(spinlock);
+ spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_);
list_del(&vmx->pi_wakeup_list);
- raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ spin_release(&spinlock->dep_map, _RET_IP_);
+ raw_spin_unlock(spinlock);
}
dest = cpu_physical_id(cpu);
@@ -148,11 +161,23 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct pi_desc old, new;
- unsigned long flags;
- local_irq_save(flags);
+ lockdep_assert_irqs_disabled();
- raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ /*
+ * Acquire the wakeup lock using the "sched out" context to workaround
+ * a lockdep false positive. When this is called, schedule() holds
+ * various per-CPU scheduler locks. When the wakeup handler runs, it
+ * holds this CPU's wakeup lock while calling try_to_wake_up(), which
+ * can eventually take the aforementioned scheduler locks, which causes
+ * lockdep to assume there is deadlock.
+ *
+ * Deadlock can't actually occur because IRQs are disabled for the
+ * entirety of the sched_out critical section, i.e. the wakeup handler
+ * can't run while the scheduler locks are held.
+ */
+ raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
+ PI_LOCK_SCHED_OUT);
list_add_tail(&vmx->pi_wakeup_list,
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
@@ -176,8 +201,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
*/
if (pi_test_on(&new))
__apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);
-
- local_irq_restore(flags);
}
static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c841817a914a..3712dde0bf9d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11786,6 +11786,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
if (kvm_mpx_supported())
kvm_load_guest_fpu(vcpu);
+ kvm_vcpu_srcu_read_lock(vcpu);
+
r = kvm_apic_accept_events(vcpu);
if (r < 0)
goto out;
@@ -11799,6 +11801,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state = vcpu->arch.mp_state;
out:
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
if (kvm_mpx_supported())
kvm_put_guest_fpu(vcpu);
vcpu_put(vcpu);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 43dcd8c7badc..1b7710bd0d05 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -70,6 +70,9 @@ EXPORT_SYMBOL(xen_start_flags);
*/
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
+
static __ref void xen_get_vendor(void)
{
init_cpu_devs();
@@ -466,6 +469,13 @@ int __init arch_xen_unpopulated_init(struct resource **res)
xen_free_unpopulated_pages(1, &pg);
}
+ /*
+ * Account for the region being in the physmap but unpopulated.
+ * The value in xen_released_pages is used by the balloon
+ * driver to know how much of the physmap is unpopulated and
+ * set an accurate initial memory target.
+ */
+ xen_released_pages += xen_extra_mem[i].n_pfns;
/* Zero so region is not also added to the balloon driver. */
xen_extra_mem[i].n_pfns = 0;
}
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 0e3d930bcb89..9d25d9373945 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/acpi.h>
+#include <linux/cpufreq.h>
+#include <linux/cpuidle.h>
#include <linux/export.h>
#include <linux/mm.h>
@@ -123,8 +125,23 @@ static void __init pvh_arch_setup(void)
{
pvh_reserve_extra_memory();
- if (xen_initial_domain())
+ if (xen_initial_domain()) {
xen_add_preferred_consoles();
+
+ /*
+ * Disable usage of CPU idle and frequency drivers: when
+ * running as hardware domain the exposed native ACPI tables
+ * causes idle and/or frequency drivers to attach and
+ * malfunction. It's Xen the entity that controls the idle and
+ * frequency states.
+ *
+ * For unprivileged domains the exposed ACPI tables are
+ * fabricated and don't contain such data.
+ */
+ disable_cpuidle();
+ disable_cpufreq();
+ WARN_ON(xen_set_default_idle());
+ }
}
void __init xen_pvh_init(struct boot_params *boot_params)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c3db71d96c43..3823e52aef52 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,9 +37,6 @@
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
-/* Number of pages released from the initial allocation. */
-unsigned long xen_released_pages;
-
/* Memory map would allow PCI passthrough. */
bool xen_pv_pci_possible;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 109af12f7647..461bb1526502 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -226,9 +226,7 @@ SYM_CODE_END(xen_early_idt_handler_array)
push %rax
mov $__HYPERVISOR_iret, %eax
syscall /* Do the IRET. */
-#ifdef CONFIG_MITIGATION_SLS
- int3
-#endif
+ ud2 /* The SYSCALL should never return. */
.endm
SYM_CODE_START(xen_iret)
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index a97f2c40c640..2551ebf88dda 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -367,7 +367,7 @@ config BLK_DEV_RBD
tristate "Rados block device (RBD)"
depends on INET && BLOCK
select CEPH_LIB
- select LIBCRC32C
+ select CRC32
select CRYPTO_AES
select CRYPTO
help
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
index 6fb4e38fca88..495a72da04c6 100644
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -10,7 +10,7 @@ config BLK_DEV_DRBD
tristate "DRBD Distributed Replicated Block Device support"
depends on PROC_FS && INET
select LRU_CACHE
- select LIBCRC32C
+ select CRC32
help
NOTE: In order to authenticate connections you have to select
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2fd05c1bd30b..cdb1543fa4a9 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1140,6 +1140,25 @@ static void ublk_complete_rq(struct kref *ref)
__ublk_complete_rq(req);
}
+static void ublk_do_fail_rq(struct request *req)
+{
+ struct ublk_queue *ubq = req->mq_hctx->driver_data;
+
+ if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+ blk_mq_requeue_request(req, false);
+ else
+ __ublk_complete_rq(req);
+}
+
+static void ublk_fail_rq_fn(struct kref *ref)
+{
+ struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+ ref);
+ struct request *req = blk_mq_rq_from_pdu(data);
+
+ ublk_do_fail_rq(req);
+}
+
/*
* Since ublk_rq_task_work_cb always fails requests immediately during
* exiting, __ublk_fail_req() is only called from abort context during
@@ -1153,10 +1172,13 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
- if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
- blk_mq_requeue_request(req, false);
- else
- ublk_put_req_ref(ubq, req);
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ kref_put(&data->ref, ublk_fail_rq_fn);
+ } else {
+ ublk_do_fail_rq(req);
+ }
}
static void ubq_complete_io_cmd(struct ublk_io *io, int res,
@@ -1349,7 +1371,8 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
return BLK_EH_RESET_TIMER;
}
-static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq)
+static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq,
+ bool check_cancel)
{
blk_status_t res;
@@ -1368,7 +1391,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq)
if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
return BLK_STS_IOERR;
- if (unlikely(ubq->canceling))
+ if (check_cancel && unlikely(ubq->canceling))
return BLK_STS_IOERR;
/* fill iod to slot in io cmd buffer */
@@ -1387,7 +1410,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
struct request *rq = bd->rq;
blk_status_t res;
- res = ublk_prep_req(ubq, rq);
+ res = ublk_prep_req(ubq, rq, false);
if (res != BLK_STS_OK)
return res;
@@ -1419,7 +1442,7 @@ static void ublk_queue_rqs(struct rq_list *rqlist)
ublk_queue_cmd_list(ubq, &submit_list);
ubq = this_q;
- if (ublk_prep_req(ubq, req) == BLK_STS_OK)
+ if (ublk_prep_req(ubq, req, true) == BLK_STS_OK)
rq_list_add_tail(&submit_list, req);
else
rq_list_add_tail(&requeue_list, req);
@@ -2413,9 +2436,9 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
return ub;
}
-static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
+static int ublk_ctrl_start_dev(struct ublk_device *ub,
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
const struct ublk_param_basic *p = &ub->params.basic;
int ublksrv_pid = (int)header->data[0];
struct queue_limits lim = {
@@ -2534,9 +2557,8 @@ out_unlock:
}
static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
- struct io_uring_cmd *cmd)
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
cpumask_var_t cpumask;
unsigned long queue;
@@ -2585,9 +2607,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
info->nr_hw_queues, info->queue_depth);
}
-static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
+static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublksrv_ctrl_dev_info info;
struct ublk_device *ub;
@@ -2812,9 +2833,8 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
}
static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
- struct io_uring_cmd *cmd)
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
@@ -2843,9 +2863,8 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
}
static int ublk_ctrl_get_params(struct ublk_device *ub,
- struct io_uring_cmd *cmd)
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret;
@@ -2874,9 +2893,8 @@ static int ublk_ctrl_get_params(struct ublk_device *ub,
}
static int ublk_ctrl_set_params(struct ublk_device *ub,
- struct io_uring_cmd *cmd)
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret = -EFAULT;
@@ -2940,9 +2958,8 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
}
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
- struct io_uring_cmd *cmd)
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ret = -EINVAL;
int i;
@@ -2988,9 +3005,8 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
}
static int ublk_ctrl_end_recovery(struct ublk_device *ub,
- struct io_uring_cmd *cmd)
+ const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL;
int i;
@@ -3037,9 +3053,8 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
return ret;
}
-static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
+static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header)
{
- const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
u64 features = UBLK_F_ALL;
@@ -3178,7 +3193,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
goto out;
if (cmd_op == UBLK_U_CMD_GET_FEATURES) {
- ret = ublk_ctrl_get_features(cmd);
+ ret = ublk_ctrl_get_features(header);
goto out;
}
@@ -3195,17 +3210,17 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
switch (_IOC_NR(cmd_op)) {
case UBLK_CMD_START_DEV:
- ret = ublk_ctrl_start_dev(ub, cmd);
+ ret = ublk_ctrl_start_dev(ub, header);
break;
case UBLK_CMD_STOP_DEV:
ret = ublk_ctrl_stop_dev(ub);
break;
case UBLK_CMD_GET_DEV_INFO:
case UBLK_CMD_GET_DEV_INFO2:
- ret = ublk_ctrl_get_dev_info(ub, cmd);
+ ret = ublk_ctrl_get_dev_info(ub, header);
break;
case UBLK_CMD_ADD_DEV:
- ret = ublk_ctrl_add_dev(cmd);
+ ret = ublk_ctrl_add_dev(header);
break;
case UBLK_CMD_DEL_DEV:
ret = ublk_ctrl_del_dev(&ub, true);
@@ -3214,19 +3229,19 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
ret = ublk_ctrl_del_dev(&ub, false);
break;
case UBLK_CMD_GET_QUEUE_AFFINITY:
- ret = ublk_ctrl_get_queue_affinity(ub, cmd);
+ ret = ublk_ctrl_get_queue_affinity(ub, header);
break;
case UBLK_CMD_GET_PARAMS:
- ret = ublk_ctrl_get_params(ub, cmd);
+ ret = ublk_ctrl_get_params(ub, header);
break;
case UBLK_CMD_SET_PARAMS:
- ret = ublk_ctrl_set_params(ub, cmd);
+ ret = ublk_ctrl_set_params(ub, header);
break;
case UBLK_CMD_START_USER_RECOVERY:
- ret = ublk_ctrl_start_recovery(ub, cmd);
+ ret = ublk_ctrl_start_recovery(ub, header);
break;
case UBLK_CMD_END_USER_RECOVERY:
- ret = ublk_ctrl_end_recovery(ub, cmd);
+ ret = ublk_ctrl_end_recovery(ub, header);
break;
default:
ret = -EOPNOTSUPP;
diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c
index 5767aed25cdc..a123c05cbc9e 100644
--- a/drivers/firmware/smccc/kvm_guest.c
+++ b/drivers/firmware/smccc/kvm_guest.c
@@ -95,7 +95,7 @@ void __init kvm_arm_target_impl_cpu_init(void)
for (i = 0; i < max_cpus; i++) {
arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID,
- i, &res);
+ i, 0, 0, &res);
if (res.a0 != SMCCC_RET_SUCCESS) {
pr_warn("Discovering target implementation CPUs failed\n");
goto mem_free;
@@ -103,7 +103,7 @@ void __init kvm_arm_target_impl_cpu_init(void)
target[i].midr = res.a1;
target[i].revidr = res.a2;
target[i].aidr = res.a3;
- };
+ }
if (!cpu_errata_set_target_impl(max_cpus, target)) {
pr_warn("Failed to set target implementation CPUs\n");
diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO
index b5f0a7a2e1bf..4b70cbaa1caa 100644
--- a/drivers/gpio/TODO
+++ b/drivers/gpio/TODO
@@ -186,3 +186,37 @@ their hardware offsets within the chip.
Encourage users to switch to using them and eventually remove the existing
global export/unexport attribues.
+
+-------------------------------------------------------------------------------
+
+Remove GPIOD_FLAGS_BIT_NONEXCLUSIVE
+
+GPIOs in the linux kernel are meant to be an exclusive resource. This means
+that the GPIO descriptors (the software representation of the hardware concept)
+are not reference counted and - in general - only one user at a time can
+request a GPIO line and control its settings. The consumer API is designed
+around full control of the line's state as evidenced by the fact that, for
+instance, gpiod_set_value() does indeed drive the line as requested, instead
+of bumping an enable counter of some sort.
+
+A problematic use-case for GPIOs is when two consumers want to use the same
+descriptor independently. An example of such a user is the regulator subsystem
+which may instantiate several struct regulator_dev instances containing
+a struct device but using the same enable GPIO line.
+
+A workaround was introduced in the form of the GPIOD_FLAGS_BIT_NONEXCLUSIVE
+flag but its implementation is problematic: it does not provide any
+synchronization of usage nor did it introduce any enable count meaning the
+non-exclusive users of the same descriptor will in fact "fight" for the
+control over it. This flag should be removed and replaced with a better
+solution, possibly based on the new power sequencing subsystem.
+
+-------------------------------------------------------------------------------
+
+Remove devm_gpiod_unhinge()
+
+devm_gpiod_unhinge() is provided as a way to transfer the ownership of managed
+enable GPIOs to the regulator core. Rather than doing that however, we should
+make it possible for the regulator subsystem to deal with GPIO resources the
+lifetime of which it doesn't control as logically, a GPIO obtained by a caller
+should also be freed by it.
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 0cd4c36ae8aa..541517536489 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -410,7 +410,9 @@ static int mpc8xxx_probe(struct platform_device *pdev)
goto err;
}
- device_init_wakeup(dev, true);
+ ret = devm_device_init_wakeup(dev);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to init wakeup\n");
return 0;
err:
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 6895b65c86af..d27bfac6c9f5 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -823,6 +823,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
struct gpio_irq_chip *irq;
struct tegra_gpio *gpio;
struct device_node *np;
+ struct resource *res;
char **names;
int err;
@@ -842,19 +843,19 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
gpio->num_banks++;
/* get register apertures */
- gpio->secure = devm_platform_ioremap_resource_byname(pdev, "security");
- if (IS_ERR(gpio->secure)) {
- gpio->secure = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(gpio->secure))
- return PTR_ERR(gpio->secure);
- }
-
- gpio->base = devm_platform_ioremap_resource_byname(pdev, "gpio");
- if (IS_ERR(gpio->base)) {
- gpio->base = devm_platform_ioremap_resource(pdev, 1);
- if (IS_ERR(gpio->base))
- return PTR_ERR(gpio->base);
- }
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "security");
+ if (!res)
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ gpio->secure = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(gpio->secure))
+ return PTR_ERR(gpio->secure);
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gpio");
+ if (!res)
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ gpio->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(gpio->base))
+ return PTR_ERR(gpio->base);
err = platform_irq_count(pdev);
if (err < 0)
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index be81fa2b17ab..3dae63f3ea21 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -1011,6 +1011,7 @@ static void zynq_gpio_remove(struct platform_device *pdev)
ret = pm_runtime_get_sync(&pdev->dev);
if (ret < 0)
dev_warn(&pdev->dev, "pm_runtime_get_sync() Failed\n");
+ device_init_wakeup(&pdev->dev, 0);
gpiochip_remove(&gpio->chip);
device_set_wakeup_capable(&pdev->dev, 0);
pm_runtime_disable(&pdev->dev);
diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c
index 08205f355ceb..120d1ec5af3b 100644
--- a/drivers/gpio/gpiolib-devres.c
+++ b/drivers/gpio/gpiolib-devres.c
@@ -317,11 +317,15 @@ EXPORT_SYMBOL_GPL(devm_gpiod_put);
* @dev: GPIO consumer
* @desc: GPIO descriptor to remove resource management from
*
+ * *DEPRECATED*
+ * This function should not be used. It's been provided as a workaround for
+ * resource ownership issues in the regulator framework and should be replaced
+ * with a better solution.
+ *
* Remove resource management from a GPIO descriptor. This is needed when
* you want to hand over lifecycle management of a descriptor to another
* mechanism.
*/
-
void devm_gpiod_unhinge(struct device *dev, struct gpio_desc *desc)
{
int ret;
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index eb667f8f1ead..65f6a7177b78 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -193,6 +193,8 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
*/
{ "himax,hx8357", "gpios-reset", false },
{ "himax,hx8369", "gpios-reset", false },
+#endif
+#if IS_ENABLED(CONFIG_MTD_NAND_JZ4780)
/*
* The rb-gpios semantics was undocumented and qi,lb60 (along with
* the ingenic driver) got it wrong. The active state encodes the
@@ -266,6 +268,9 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np,
{ "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" },
{ "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" },
#endif
+#if IS_ENABLED(CONFIG_MMC_ATMELMCI)
+ { "atmel,hsmci", "cd-gpios", "cd-inverted" },
+#endif
#if IS_ENABLED(CONFIG_PCI_IMX6)
{ "fsl,imx6q-pcie", "reset-gpio", "reset-gpio-active-high" },
{ "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" },
@@ -292,9 +297,6 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np,
{ "regulator-gpio", "enable-gpio", "enable-active-high" },
{ "regulator-gpio", "enable-gpios", "enable-active-high" },
#endif
-#if IS_ENABLED(CONFIG_MMC_ATMELMCI)
- { "atmel,hsmci", "cd-gpios", "cd-inverted" },
-#endif
};
unsigned int i;
bool active_high;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 06f809e70f15..ddb37f6670de 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -139,7 +139,7 @@ config MD_RAID456
tristate "RAID-4/RAID-5/RAID-6 mode"
depends on BLK_DEV_MD
select RAID6_PQ
- select LIBCRC32C
+ select CRC32
select ASYNC_MEMCPY
select ASYNC_XOR
select ASYNC_PQ
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index f4f948b0e173..dbb97a7233ab 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -2,7 +2,7 @@
config DM_PERSISTENT_DATA
tristate
depends on BLK_DEV_DM
- select LIBCRC32C
+ select CRC32
select DM_BUFIO
help
Library providing immutable on-disk data structure support for
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 9739387cff8c..58c6e1743f5c 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -482,10 +482,11 @@ static inline u16 INFTL_findwriteunit(struct INFTLrecord *inftl, unsigned block)
silly = MAX_LOOPS;
while (thisEUN <= inftl->lastEUN) {
- inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) +
- blockofs, 8, &retlen, (char *)&bci);
-
- status = bci.Status | bci.Status1;
+ if (inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) +
+ blockofs, 8, &retlen, (char *)&bci) < 0)
+ status = SECTOR_IGNORE;
+ else
+ status = bci.Status | bci.Status1;
pr_debug("INFTL: status of block %d in EUN %d is %x\n",
block , writeEUN, status);
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index db516a45f0c5..44913ff1bf12 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -3,11 +3,8 @@
nandcore-objs := core.o bbt.o
obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o
-ifeq ($(CONFIG_SPI_QPIC_SNAND),y)
obj-$(CONFIG_SPI_QPIC_SNAND) += qpic_common.o
-else
obj-$(CONFIG_MTD_NAND_QCOM) += qpic_common.o
-endif
obj-y += onenand/
obj-y += raw/
obj-y += spi/
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index b07c2f8b4035..918974d088cf 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -387,6 +387,9 @@ static int r852_wait(struct nand_chip *chip)
static int r852_ready(struct nand_chip *chip)
{
struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
+ if (dev->card_unstable)
+ return 0;
+
return !(r852_read_reg(dev, R852_CARD_STA) & R852_CARD_STA_BUSY);
}
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index eeec8bf17cf4..1bd4313215d7 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -143,7 +143,7 @@ config BNX2X
depends on PTP_1588_CLOCK_OPTIONAL
select FW_LOADER
select ZLIB_INFLATE
- select LIBCRC32C
+ select CRC32
select MDIO
help
This driver supports Broadcom NetXtremeII 10 gigabit Ethernet cards.
@@ -207,7 +207,7 @@ config BNXT
depends on PCI
depends on PTP_1588_CLOCK_OPTIONAL
select FW_LOADER
- select LIBCRC32C
+ select CRC32
select NET_DEVLINK
select PAGE_POOL
select DIMLIB
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index ca742cc146d7..7dae5aad3689 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -70,8 +70,8 @@ config LIQUIDIO
depends on 64BIT && PCI
depends on PCI
depends on PTP_1588_CLOCK_OPTIONAL
+ select CRC32
select FW_LOADER
- select LIBCRC32C
select LIQUIDIO_CORE
select NET_DEVLINK
help
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 0f844c14485a..35acc07bd964 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -165,6 +165,11 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf,
otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
} else if (level == NIX_TXSCH_LVL_TL2) {
+ /* configure parent txschq */
+ cfg->reg[num_regs] = NIX_AF_TL2X_PARENT(node->schq);
+ cfg->regval[num_regs] = (u64)hw->tx_link << 16;
+ num_regs++;
+
/* configure link cfg */
if (level == pfvf->qos.link_cfg_lvl) {
cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 00b0b318df27..e69eaa65e0de 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -310,7 +310,8 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring,
return true;
page = page_pool_dev_alloc_pages(rx_ring->page_pool);
- WARN_ON(!page);
+ if (unlikely(!page))
+ return false;
dma = page_pool_get_dma_addr(page);
bi->page_dma = dma;
@@ -546,7 +547,8 @@ static void wx_rx_checksum(struct wx_ring *ring,
return;
/* Hardware can't guarantee csum if IPv6 Dest Header found */
- if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && WX_RXD_IPV6EX(rx_desc))
+ if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP &&
+ wx_test_staterr(rx_desc, WX_RXD_STAT_IPV6EX))
return;
/* if L4 checksum error */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 5b230ecbbabb..4c545b2aa997 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -513,6 +513,7 @@ enum WX_MSCA_CMD_value {
#define WX_RXD_STAT_L4CS BIT(7) /* L4 xsum calculated */
#define WX_RXD_STAT_IPCS BIT(8) /* IP xsum calculated */
#define WX_RXD_STAT_OUTERIPCS BIT(10) /* Cloud IP xsum calculated*/
+#define WX_RXD_STAT_IPV6EX BIT(12) /* IPv6 Dest Header */
#define WX_RXD_STAT_TS BIT(14) /* IEEE1588 Time Stamp */
#define WX_RXD_ERR_OUTERIPER BIT(26) /* CRC IP Header error */
@@ -589,8 +590,6 @@ enum wx_l2_ptypes {
#define WX_RXD_PKTTYPE(_rxd) \
((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF)
-#define WX_RXD_IPV6EX(_rxd) \
- ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1)
/*********************** Transmit Descriptor Config Masks ****************/
#define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */
#define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 675fbd225378..cc1bfd22fb81 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -244,6 +244,46 @@ static bool phy_drv_wol_enabled(struct phy_device *phydev)
return wol.wolopts != 0;
}
+static void phy_link_change(struct phy_device *phydev, bool up)
+{
+ struct net_device *netdev = phydev->attached_dev;
+
+ if (up)
+ netif_carrier_on(netdev);
+ else
+ netif_carrier_off(netdev);
+ phydev->adjust_link(netdev);
+ if (phydev->mii_ts && phydev->mii_ts->link_state)
+ phydev->mii_ts->link_state(phydev->mii_ts, phydev);
+}
+
+/**
+ * phy_uses_state_machine - test whether consumer driver uses PAL state machine
+ * @phydev: the target PHY device structure
+ *
+ * Ultimately, this aims to indirectly determine whether the PHY is attached
+ * to a consumer which uses the state machine by calling phy_start() and
+ * phy_stop().
+ *
+ * When the PHY driver consumer uses phylib, it must have previously called
+ * phy_connect_direct() or one of its derivatives, so that phy_prepare_link()
+ * has set up a hook for monitoring state changes.
+ *
+ * When the PHY driver is used by the MAC driver consumer through phylink (the
+ * only other provider of a phy_link_change() method), using the PHY state
+ * machine is not optional.
+ *
+ * Return: true if consumer calls phy_start() and phy_stop(), false otherwise.
+ */
+static bool phy_uses_state_machine(struct phy_device *phydev)
+{
+ if (phydev->phy_link_change == phy_link_change)
+ return phydev->attached_dev && phydev->adjust_link;
+
+ /* phydev->phy_link_change is implicitly phylink_phy_change() */
+ return true;
+}
+
static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
{
struct device_driver *drv = phydev->mdio.dev.driver;
@@ -310,7 +350,7 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
* may call phy routines that try to grab the same lock, and that may
* lead to a deadlock.
*/
- if (phydev->attached_dev && phydev->adjust_link)
+ if (phy_uses_state_machine(phydev))
phy_stop_machine(phydev);
if (!mdio_bus_phy_may_suspend(phydev))
@@ -364,7 +404,7 @@ no_resume:
}
}
- if (phydev->attached_dev && phydev->adjust_link)
+ if (phy_uses_state_machine(phydev))
phy_start_machine(phydev);
return 0;
@@ -1055,19 +1095,6 @@ struct phy_device *phy_find_first(struct mii_bus *bus)
}
EXPORT_SYMBOL(phy_find_first);
-static void phy_link_change(struct phy_device *phydev, bool up)
-{
- struct net_device *netdev = phydev->attached_dev;
-
- if (up)
- netif_carrier_on(netdev);
- else
- netif_carrier_off(netdev);
- phydev->adjust_link(netdev);
- if (phydev->mii_ts && phydev->mii_ts->link_state)
- phydev->mii_ts->link_state(phydev->mii_ts, phydev);
-}
-
/**
* phy_prepare_link - prepares the PHY layer to monitor link status
* @phydev: target phy_device struct
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 644e99fc3623..9c4932198931 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -506,6 +506,11 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
unsigned char *data;
int islcp;
+ /* Ensure we can safely access protocol field and LCP code */
+ if (!pskb_may_pull(skb, 3)) {
+ kfree_skb(skb);
+ return NULL;
+ }
data = skb->data;
proto = get_unaligned_be16(data);
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index f7d6f47971fd..24f485827e03 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -278,7 +278,7 @@ config XEN_PRIVCMD_EVENTFD
config XEN_ACPI_PROCESSOR
tristate "Xen ACPI processor"
- depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
+ depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
default m
help
This ACPI processor uploads Power Management information to the Xen
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 65d4e7fa1eb8..8c852807ba1c 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -679,7 +679,7 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages)
}
EXPORT_SYMBOL(xen_free_ballooned_pages);
-static void __init balloon_add_regions(void)
+static int __init balloon_add_regions(void)
{
unsigned long start_pfn, pages;
unsigned long pfn, extra_pfn_end;
@@ -702,26 +702,38 @@ static void __init balloon_add_regions(void)
for (pfn = start_pfn; pfn < extra_pfn_end; pfn++)
balloon_append(pfn_to_page(pfn));
- balloon_stats.total_pages += extra_pfn_end - start_pfn;
+ /*
+ * Extra regions are accounted for in the physmap, but need
+ * decreasing from current_pages to balloon down the initial
+ * allocation, because they are already accounted for in
+ * total_pages.
+ */
+ if (extra_pfn_end - start_pfn >= balloon_stats.current_pages) {
+ WARN(1, "Extra pages underflow current target");
+ return -ERANGE;
+ }
+ balloon_stats.current_pages -= extra_pfn_end - start_pfn;
}
+
+ return 0;
}
static int __init balloon_init(void)
{
struct task_struct *task;
+ int rc;
if (!xen_domain())
return -ENODEV;
pr_info("Initialising balloon driver\n");
-#ifdef CONFIG_XEN_PV
- balloon_stats.current_pages = xen_pv_domain()
- ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
- : get_num_physpages();
-#else
- balloon_stats.current_pages = get_num_physpages();
-#endif
+ if (xen_released_pages >= get_num_physpages()) {
+ WARN(1, "Released pages underflow current target");
+ return -ERANGE;
+ }
+
+ balloon_stats.current_pages = get_num_physpages() - xen_released_pages;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -738,7 +750,9 @@ static int __init balloon_init(void)
register_sysctl_init("xen/balloon", balloon_table);
#endif
- balloon_add_regions();
+ rc = balloon_add_regions();
+ if (rc)
+ return rc;
task = kthread_run(balloon_thread, NULL, "xen-balloon");
if (IS_ERR(task)) {
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index fcb335bb7b18..6d1819269cbe 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -513,4 +513,5 @@ static int __init boot_wait_for_devices(void)
late_initcall(boot_wait_for_devices);
#endif
+MODULE_DESCRIPTION("Xen PV-device frontend support");
MODULE_LICENSE("GPL");
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index bf1c94e51dd0..adcf425a9b88 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -4,7 +4,7 @@ config BCACHEFS_FS
depends on BLOCK
select EXPORTFS
select CLOSURES
- select LIBCRC32C
+ select CRC32
select CRC64
select FS_POSIX_ACL
select LZ4_COMPRESS
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index fa8515598341..73a2dfb854c5 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -3,9 +3,9 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
select BLK_CGROUP_PUNT_BIO
+ select CRC32
select CRYPTO
select CRYPTO_CRC32C
- select LIBCRC32C
select CRYPTO_XXHASH
select CRYPTO_SHA256
select CRYPTO_BLAKE2B
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 7249d70e1a43..3e7def3d31c1 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,7 +3,7 @@ config CEPH_FS
tristate "Ceph distributed file system"
depends on INET
select CEPH_LIB
- select LIBCRC32C
+ select CRC32
select CRYPTO_AES
select CRYPTO
select NETFS_SUPPORT
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 331e49cd1b8d..8f68ec49ad89 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -3,8 +3,8 @@
config EROFS_FS
tristate "EROFS filesystem support"
depends on BLOCK
+ select CRC32
select FS_IOMAP
- select LIBCRC32C
help
EROFS (Enhanced Read-Only File System) is a lightweight read-only
file system with modern designs (e.g. no buffer heads, inline
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index be7f87a8e11a..7bd231d16d4a 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,7 +4,6 @@ config GFS2_FS
select BUFFER_HEAD
select FS_POSIX_ACL
select CRC32
- select LIBCRC32C
select QUOTACTL
select FS_IOMAP
help
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index fffd6fffdce0..ae0ca6858496 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -3,7 +3,7 @@ config XFS_FS
tristate "XFS filesystem support"
depends on BLOCK
select EXPORTFS
- select LIBCRC32C
+ select CRC32
select FS_IOMAP
help
XFS is a high performance journaling filesystem which originated
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 0ffb97c78566..39c768f87dc9 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -67,7 +67,7 @@ enum kunit_status {
/*
* Speed Attribute is stored as an enum and separated into categories of
- * speed: very_slowm, slow, and normal. These speeds are relative to
+ * speed: very_slow, slow, and normal. These speeds are relative to
* other KUnit tests.
*
* Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL.
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 485b651869d9..5bc8f55c8cca 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -710,6 +710,7 @@ struct cgroup_subsys {
void (*css_released)(struct cgroup_subsys_state *css);
void (*css_free)(struct cgroup_subsys_state *css);
void (*css_reset)(struct cgroup_subsys_state *css);
+ void (*css_killed)(struct cgroup_subsys_state *css);
void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
int (*css_extra_stat_show)(struct seq_file *seq,
struct cgroup_subsys_state *css);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 28e999f2c642..e7da3c3b098b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp)
*/
static inline bool css_is_dying(struct cgroup_subsys_state *css)
{
- return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
+ return css->flags & CSS_DYING;
}
static inline void cgroup_get(struct cgroup *cgrp)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 45b651c05b9c..8adc8e9cb4a7 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -31,6 +31,7 @@ struct gpio_descs {
#define GPIOD_FLAGS_BIT_DIR_OUT BIT(1)
#define GPIOD_FLAGS_BIT_DIR_VAL BIT(2)
#define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3)
+/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */
#define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4)
/**
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5438a1b446a6..291d49b9bf05 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2382,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
struct kvm_vcpu *kvm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
bool kvm_arch_has_irq_bypass(void);
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 1e748958dad4..311f145eb4e8 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -67,7 +67,7 @@
SPI_MEM_OP_ADDR(2, addr, 1), \
SPI_MEM_OP_DUMMY(ndummy, 1), \
SPI_MEM_OP_DATA_IN(len, buf, 1), \
- __VA_OPT__(SPI_MEM_OP_MAX_FREQ(__VA_ARGS__)))
+ SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0))
#define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \
SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..2d11d013cabe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4429,6 +4429,7 @@ void linkwatch_fire_event(struct net_device *dev);
* pending work list (if queued).
*/
void linkwatch_sync_dev(struct net_device *dev);
+void __linkwatch_sync_dev(struct net_device *dev);
/**
* netif_carrier_ok - test if carrier present
@@ -4974,6 +4975,7 @@ void dev_set_rx_mode(struct net_device *dev);
int dev_set_promiscuity(struct net_device *dev, int inc);
int netif_set_allmulti(struct net_device *dev, int inc, bool notify);
int dev_set_allmulti(struct net_device *dev, int inc);
+void netif_state_change(struct net_device *dev);
void netdev_state_change(struct net_device *dev);
void __netdev_notify_peers(struct net_device *dev);
void netdev_notify_peers(struct net_device *dev);
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ccaaf4c7d5f6..ea39dd23a197 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
}
-void netdev_set_operstate(struct net_device *dev, int newstate);
+void netif_set_operstate(struct net_device *dev, int newstate);
#endif /* __LINUX_RTNETLINK_H */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 31248cfdfb23..dcd288fa1bb6 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,6 +775,7 @@ struct sctp_transport {
/* Reference counting. */
refcount_t refcnt;
+ __u32 dead:1,
/* RTO-Pending : A flag used to track if one of the DATA
* chunks sent to this address is currently being
* used to compute a RTT. If this flag is 0,
@@ -784,7 +785,7 @@ struct sctp_transport {
* calculation completes (i.e. the DATA chunk
* is SACK'd) clear this flag.
*/
- __u32 rto_pending:1,
+ rto_pending:1,
/*
* hb_sent : a flag that signals that we have a pending
diff --git a/include/net/sock.h b/include/net/sock.h
index 8daf1b3b12c6..694f954258d4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -339,6 +339,8 @@ struct sk_filter;
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
* @sk_user_frags: xarray of pages the user is holding a reference on.
+ * @sk_owner: reference to the real owner of the socket that calls
+ * sock_lock_init_class_and_name().
*/
struct sock {
/*
@@ -547,6 +549,10 @@ struct sock {
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
struct xarray sk_user_frags;
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+ struct module *sk_owner;
+#endif
};
struct sock_bh_locked {
@@ -1583,6 +1589,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
sk_mem_reclaim(sk);
}
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+ __module_get(owner);
+ sk->sk_owner = owner;
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+ sk->sk_owner = NULL;
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+ module_put(sk->sk_owner);
+}
+#else
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+}
+#endif
/*
* Macro so as to not evaluate some arguments when
* lockdep is not enabled.
@@ -1592,13 +1627,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
*/
#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \
do { \
+ sk_owner_set(sk, THIS_MODULE); \
sk->sk_lock.owned = 0; \
init_waitqueue_head(&sk->sk_lock.wq); \
spin_lock_init(&(sk)->sk_lock.slock); \
debug_check_no_locks_freed((void *)&(sk)->sk_lock, \
- sizeof((sk)->sk_lock)); \
+ sizeof((sk)->sk_lock)); \
lockdep_set_class_and_name(&(sk)->sk_lock.slock, \
- (skey), (sname)); \
+ (skey), (sname)); \
lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
} while (0)
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 098109259671..953d5e742569 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -504,6 +504,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
p->nbufs = tmp;
p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len);
+ if (!p->len)
+ return -EINVAL;
if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
&size))
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 5e64a8bb30a4..b36c8825550e 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -175,6 +175,18 @@ void io_rsrc_cache_free(struct io_ring_ctx *ctx)
io_alloc_cache_free(&ctx->imu_cache, kfree);
}
+static void io_clear_table_tags(struct io_rsrc_data *data)
+{
+ int i;
+
+ for (i = 0; i < data->nr; i++) {
+ struct io_rsrc_node *node = data->nodes[i];
+
+ if (node)
+ node->tag = 0;
+ }
+}
+
__cold void io_rsrc_data_free(struct io_ring_ctx *ctx,
struct io_rsrc_data *data)
{
@@ -583,6 +595,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
io_file_table_set_alloc_range(ctx, 0, ctx->file_table.data.nr);
return 0;
fail:
+ io_clear_table_tags(&ctx->file_table.data);
io_sqe_files_unregister(ctx);
return ret;
}
@@ -902,8 +915,10 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
}
ctx->buf_table = data;
- if (ret)
+ if (ret) {
+ io_clear_table_tags(&ctx->buf_table);
io_sqe_buffers_unregister(ctx);
+ }
return ret;
}
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 80d4a6f71d29..0f46e0404c04 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -181,7 +181,7 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
kvfree(area->nia.niovs);
kvfree(area->user_refs);
if (area->pages) {
- unpin_user_pages(area->pages, area->nia.num_niovs);
+ unpin_user_pages(area->pages, area->nr_folios);
kvfree(area->pages);
}
kfree(area);
@@ -192,7 +192,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
struct io_uring_zcrx_area_reg *area_reg)
{
struct io_zcrx_area *area;
- int i, ret, nr_pages;
+ int i, ret, nr_pages, nr_iovs;
struct iovec iov;
if (area_reg->flags || area_reg->rq_area_token)
@@ -220,27 +220,28 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
area->pages = NULL;
goto err;
}
- area->nia.num_niovs = nr_pages;
+ area->nr_folios = nr_iovs = nr_pages;
+ area->nia.num_niovs = nr_iovs;
- area->nia.niovs = kvmalloc_array(nr_pages, sizeof(area->nia.niovs[0]),
+ area->nia.niovs = kvmalloc_array(nr_iovs, sizeof(area->nia.niovs[0]),
GFP_KERNEL | __GFP_ZERO);
if (!area->nia.niovs)
goto err;
- area->freelist = kvmalloc_array(nr_pages, sizeof(area->freelist[0]),
+ area->freelist = kvmalloc_array(nr_iovs, sizeof(area->freelist[0]),
GFP_KERNEL | __GFP_ZERO);
if (!area->freelist)
goto err;
- for (i = 0; i < nr_pages; i++)
+ for (i = 0; i < nr_iovs; i++)
area->freelist[i] = i;
- area->user_refs = kvmalloc_array(nr_pages, sizeof(area->user_refs[0]),
+ area->user_refs = kvmalloc_array(nr_iovs, sizeof(area->user_refs[0]),
GFP_KERNEL | __GFP_ZERO);
if (!area->user_refs)
goto err;
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; i < nr_iovs; i++) {
struct net_iov *niov = &area->nia.niovs[i];
niov->owner = &area->nia;
@@ -248,7 +249,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
atomic_set(&area->user_refs[i], 0);
}
- area->free_count = nr_pages;
+ area->free_count = nr_iovs;
area->ifq = ifq;
/* we're only supporting one area per ifq for now */
area->area_id = 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 706cc7300780..47f1c0e8c197 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -15,6 +15,7 @@ struct io_zcrx_area {
bool is_mapped;
u16 area_id;
struct page **pages;
+ unsigned long nr_folios;
/* freelist */
spinlock_t freelist_lock ____cacheline_aligned_in_smp;
@@ -26,11 +27,11 @@ struct io_zcrx_ifq {
struct io_ring_ctx *ctx;
struct io_zcrx_area *area;
+ spinlock_t rq_lock ____cacheline_aligned_in_smp;
struct io_uring *rq_ring;
struct io_uring_zcrx_rqe *rqes;
- u32 rq_entries;
u32 cached_rq_head;
- spinlock_t rq_lock;
+ u32 rq_entries;
u32 if_rxq;
struct device *dev;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 27f08aa17b56..3caf2cd86e65 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5923,6 +5923,12 @@ static void kill_css(struct cgroup_subsys_state *css)
if (css->flags & CSS_DYING)
return;
+ /*
+ * Call css_killed(), if defined, before setting the CSS_DYING flag
+ */
+ if (css->ss->css_killed)
+ css->ss->css_killed(css);
+
css->flags |= CSS_DYING;
/*
diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h
index 976a8bc3ff60..383963e28ac6 100644
--- a/kernel/cgroup/cpuset-internal.h
+++ b/kernel/cgroup/cpuset-internal.h
@@ -33,6 +33,7 @@ enum prs_errcode {
PERR_CPUSEMPTY,
PERR_HKEEPING,
PERR_ACCESS,
+ PERR_REMOTE,
};
/* bits in struct cpuset flags field */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 39c1fc643d77..306b60430091 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -61,10 +61,17 @@ static const char * const perr_strings[] = {
[PERR_CPUSEMPTY] = "cpuset.cpus and cpuset.cpus.exclusive are empty",
[PERR_HKEEPING] = "partition config conflicts with housekeeping setup",
[PERR_ACCESS] = "Enable partition not permitted",
+ [PERR_REMOTE] = "Have remote partition underneath",
};
/*
- * Exclusive CPUs distributed out to sub-partitions of top_cpuset
+ * For local partitions, update to subpartitions_cpus & isolated_cpus is done
+ * in update_parent_effective_cpumask(). For remote partitions, it is done in
+ * the remote_partition_*() and remote_cpus_update() helpers.
+ */
+/*
+ * Exclusive CPUs distributed out to local or remote sub-partitions of
+ * top_cpuset
*/
static cpumask_var_t subpartitions_cpus;
@@ -86,7 +93,6 @@ static struct list_head remote_children;
* A flag to force sched domain rebuild at the end of an operation.
* It can be set in
* - update_partition_sd_lb()
- * - remote_partition_check()
* - update_cpumasks_hier()
* - cpuset_update_flag()
* - cpuset_hotplug_update_tasks()
@@ -1089,9 +1095,14 @@ void cpuset_reset_sched_domains(void)
*
* Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_mutex held,
- * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
- * is used instead of effective_cpus to make sure all offline CPUs are also
- * included as hotplug code won't update cpumasks for tasks in top_cpuset.
+ * cpuset membership stays stable.
+ *
+ * For top_cpuset, task_cpu_possible_mask() is used instead of effective_cpus
+ * to make sure all offline CPUs are also included as hotplug code won't
+ * update cpumasks for tasks in top_cpuset.
+ *
+ * As task_cpu_possible_mask() can be task dependent in arm64, we have to
+ * do cpu masking per task instead of doing it once for all.
*/
void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{
@@ -1151,7 +1162,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
*
* Return: 0 if successful, an error code otherwise
*/
-static int update_partition_exclusive(struct cpuset *cs, int new_prs)
+static int update_partition_exclusive_flag(struct cpuset *cs, int new_prs)
{
bool exclusive = (new_prs > PRS_MEMBER);
@@ -1234,12 +1245,12 @@ static void reset_partition_data(struct cpuset *cs)
}
/*
- * partition_xcpus_newstate - Exclusive CPUs state change
+ * isolated_cpus_update - Update the isolated_cpus mask
* @old_prs: old partition_root_state
* @new_prs: new partition_root_state
* @xcpus: exclusive CPUs with state change
*/
-static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
+static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus)
{
WARN_ON_ONCE(old_prs == new_prs);
if (new_prs == PRS_ISOLATED)
@@ -1273,8 +1284,8 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
isolcpus_updated = (new_prs != parent->partition_root_state);
if (isolcpus_updated)
- partition_xcpus_newstate(parent->partition_root_state, new_prs,
- xcpus);
+ isolated_cpus_update(parent->partition_root_state, new_prs,
+ xcpus);
cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
return isolcpus_updated;
@@ -1304,8 +1315,8 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
isolcpus_updated = (old_prs != parent->partition_root_state);
if (isolcpus_updated)
- partition_xcpus_newstate(old_prs, parent->partition_root_state,
- xcpus);
+ isolated_cpus_update(old_prs, parent->partition_root_state,
+ xcpus);
cpumask_and(xcpus, xcpus, cpu_active_mask);
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
@@ -1340,20 +1351,57 @@ EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated);
* compute_effective_exclusive_cpumask - compute effective exclusive CPUs
* @cs: cpuset
* @xcpus: effective exclusive CPUs value to be set
- * Return: true if xcpus is not empty, false otherwise.
+ * @real_cs: the real cpuset (can be NULL)
+ * Return: 0 if there is no sibling conflict, > 0 otherwise
*
- * Starting with exclusive_cpus (cpus_allowed if exclusive_cpus is not set),
- * it must be a subset of parent's effective_xcpus.
+ * If exclusive_cpus isn't explicitly set or a real_cs is provided, we have to
+ * scan the sibling cpusets and exclude their exclusive_cpus or effective_xcpus
+ * as well. The provision of real_cs means that a cpumask is being changed and
+ * the given cs is a trial one.
*/
-static bool compute_effective_exclusive_cpumask(struct cpuset *cs,
- struct cpumask *xcpus)
+static int compute_effective_exclusive_cpumask(struct cpuset *cs,
+ struct cpumask *xcpus,
+ struct cpuset *real_cs)
{
+ struct cgroup_subsys_state *css;
struct cpuset *parent = parent_cs(cs);
+ struct cpuset *sibling;
+ int retval = 0;
if (!xcpus)
xcpus = cs->effective_xcpus;
- return cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus);
+ cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus);
+
+ if (!real_cs) {
+ if (!cpumask_empty(cs->exclusive_cpus))
+ return 0;
+ } else {
+ cs = real_cs;
+ }
+
+ /*
+ * Exclude exclusive CPUs from siblings
+ */
+ rcu_read_lock();
+ cpuset_for_each_child(sibling, css, parent) {
+ if (sibling == cs)
+ continue;
+
+ if (!cpumask_empty(sibling->exclusive_cpus) &&
+ cpumask_intersects(xcpus, sibling->exclusive_cpus)) {
+ cpumask_andnot(xcpus, xcpus, sibling->exclusive_cpus);
+ retval++;
+ continue;
+ }
+ if (!cpumask_empty(sibling->effective_xcpus) &&
+ cpumask_intersects(xcpus, sibling->effective_xcpus)) {
+ cpumask_andnot(xcpus, xcpus, sibling->effective_xcpus);
+ retval++;
+ }
+ }
+ rcu_read_unlock();
+ return retval;
}
static inline bool is_remote_partition(struct cpuset *cs)
@@ -1395,7 +1443,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
* remote partition root underneath it, its exclusive_cpus must
* have overlapped with subpartitions_cpus.
*/
- compute_effective_exclusive_cpumask(cs, tmp->new_cpus);
+ compute_effective_exclusive_cpumask(cs, tmp->new_cpus, NULL);
if (cpumask_empty(tmp->new_cpus) ||
cpumask_intersects(tmp->new_cpus, subpartitions_cpus) ||
cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus))
@@ -1404,8 +1452,11 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
spin_lock_irq(&callback_lock);
isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
list_add(&cs->remote_sibling, &remote_children);
+ cpumask_copy(cs->effective_xcpus, tmp->new_cpus);
spin_unlock_irq(&callback_lock);
update_unbound_workqueue_cpumask(isolcpus_updated);
+ cpuset_force_rebuild();
+ cs->prs_err = 0;
/*
* Propagate changes in top_cpuset's effective_cpus down the hierarchy.
@@ -1428,20 +1479,24 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
{
bool isolcpus_updated;
- compute_effective_exclusive_cpumask(cs, tmp->new_cpus);
WARN_ON_ONCE(!is_remote_partition(cs));
- WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus));
+ WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
spin_lock_irq(&callback_lock);
list_del_init(&cs->remote_sibling);
isolcpus_updated = partition_xcpus_del(cs->partition_root_state,
- NULL, tmp->new_cpus);
- cs->partition_root_state = -cs->partition_root_state;
- if (!cs->prs_err)
- cs->prs_err = PERR_INVCPUS;
+ NULL, cs->effective_xcpus);
+ if (cs->prs_err)
+ cs->partition_root_state = -cs->partition_root_state;
+ else
+ cs->partition_root_state = PRS_MEMBER;
+
+ /* effective_xcpus may need to be changed */
+ compute_effective_exclusive_cpumask(cs, NULL, NULL);
reset_partition_data(cs);
spin_unlock_irq(&callback_lock);
update_unbound_workqueue_cpumask(isolcpus_updated);
+ cpuset_force_rebuild();
/*
* Propagate changes in top_cpuset's effective_cpus down the hierarchy.
@@ -1453,14 +1508,15 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
/*
* remote_cpus_update - cpus_exclusive change of remote partition
* @cs: the cpuset to be updated
- * @newmask: the new effective_xcpus mask
+ * @xcpus: the new exclusive_cpus mask, if non-NULL
+ * @excpus: the new effective_xcpus mask
* @tmp: temporary masks
*
* top_cpuset and subpartitions_cpus will be updated or partition can be
* invalidated.
*/
-static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
- struct tmpmasks *tmp)
+static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
+ struct cpumask *excpus, struct tmpmasks *tmp)
{
bool adding, deleting;
int prs = cs->partition_root_state;
@@ -1471,29 +1527,45 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
- if (cpumask_empty(newmask))
+ if (cpumask_empty(excpus)) {
+ cs->prs_err = PERR_CPUSEMPTY;
goto invalidate;
+ }
- adding = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus);
- deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask);
+ adding = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus);
+ deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus);
/*
* Additions of remote CPUs is only allowed if those CPUs are
* not allocated to other partitions and there are effective_cpus
* left in the top cpuset.
*/
- if (adding && (!capable(CAP_SYS_ADMIN) ||
- cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
- cpumask_subset(top_cpuset.effective_cpus, tmp->addmask)))
- goto invalidate;
+ if (adding) {
+ if (!capable(CAP_SYS_ADMIN))
+ cs->prs_err = PERR_ACCESS;
+ else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
+ cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))
+ cs->prs_err = PERR_NOCPUS;
+ if (cs->prs_err)
+ goto invalidate;
+ }
spin_lock_irq(&callback_lock);
if (adding)
isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask);
if (deleting)
isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask);
+ /*
+ * Need to update effective_xcpus and exclusive_cpus now as
+ * update_sibling_cpumasks() below may iterate back to the same cs.
+ */
+ cpumask_copy(cs->effective_xcpus, excpus);
+ if (xcpus)
+ cpumask_copy(cs->exclusive_cpus, xcpus);
spin_unlock_irq(&callback_lock);
update_unbound_workqueue_cpumask(isolcpus_updated);
+ if (adding || deleting)
+ cpuset_force_rebuild();
/*
* Propagate changes in top_cpuset's effective_cpus down the hierarchy.
@@ -1507,47 +1579,6 @@ invalidate:
}
/*
- * remote_partition_check - check if a child remote partition needs update
- * @cs: the cpuset to be updated
- * @newmask: the new effective_xcpus mask
- * @delmask: temporary mask for deletion (not in tmp)
- * @tmp: temporary masks
- *
- * This should be called before the given cs has updated its cpus_allowed
- * and/or effective_xcpus.
- */
-static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
- struct cpumask *delmask, struct tmpmasks *tmp)
-{
- struct cpuset *child, *next;
- int disable_cnt = 0;
-
- /*
- * Compute the effective exclusive CPUs that will be deleted.
- */
- if (!cpumask_andnot(delmask, cs->effective_xcpus, newmask) ||
- !cpumask_intersects(delmask, subpartitions_cpus))
- return; /* No deletion of exclusive CPUs in partitions */
-
- /*
- * Searching the remote children list to look for those that will
- * be impacted by the deletion of exclusive CPUs.
- *
- * Since a cpuset must be removed from the remote children list
- * before it can go offline and holding cpuset_mutex will prevent
- * any change in cpuset status. RCU read lock isn't needed.
- */
- lockdep_assert_held(&cpuset_mutex);
- list_for_each_entry_safe(child, next, &remote_children, remote_sibling)
- if (cpumask_intersects(child->effective_cpus, delmask)) {
- remote_partition_disable(child, tmp);
- disable_cnt++;
- }
- if (disable_cnt)
- cpuset_force_rebuild();
-}
-
-/*
* prstate_housekeeping_conflict - check for partition & housekeeping conflicts
* @prstate: partition root state to be checked
* @new_cpus: cpu mask
@@ -1601,7 +1632,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
* The partcmd_update command is used by update_cpumasks_hier() with newmask
* NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
* by update_cpumask() with NULL newmask. In both cases, the callers won't
- * check for error and so partition_root_state and prs_error will be updated
+ * check for error and so partition_root_state and prs_err will be updated
* directly.
*/
static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
@@ -1614,11 +1645,12 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
int old_prs, new_prs;
int part_error = PERR_NONE; /* Partition error? */
int subparts_delta = 0;
- struct cpumask *xcpus; /* cs effective_xcpus */
int isolcpus_updated = 0;
+ struct cpumask *xcpus = user_xcpus(cs);
bool nocpu;
lockdep_assert_held(&cpuset_mutex);
+ WARN_ON_ONCE(is_remote_partition(cs));
/*
* new_prs will only be changed for the partcmd_update and
@@ -1626,7 +1658,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
*/
adding = deleting = false;
old_prs = new_prs = cs->partition_root_state;
- xcpus = user_xcpus(cs);
if (cmd == partcmd_invalidate) {
if (is_prs_invalid(old_prs))
@@ -1661,12 +1692,19 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) {
/*
+ * Need to call compute_effective_exclusive_cpumask() in case
+ * exclusive_cpus not set. Sibling conflict should only happen
+ * if exclusive_cpus isn't set.
+ */
+ xcpus = tmp->new_cpus;
+ if (compute_effective_exclusive_cpumask(cs, xcpus, NULL))
+ WARN_ON_ONCE(!cpumask_empty(cs->exclusive_cpus));
+
+ /*
* Enabling partition root is not allowed if its
- * effective_xcpus is empty or doesn't overlap with
- * parent's effective_xcpus.
+ * effective_xcpus is empty.
*/
- if (cpumask_empty(xcpus) ||
- !cpumask_intersects(xcpus, parent->effective_xcpus))
+ if (cpumask_empty(xcpus))
return PERR_INVCPUS;
if (prstate_housekeeping_conflict(new_prs, xcpus))
@@ -1679,19 +1717,22 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
if (nocpu)
return PERR_NOCPUS;
- cpumask_copy(tmp->delmask, xcpus);
- deleting = true;
- subparts_delta++;
+ deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus);
+ if (deleting)
+ subparts_delta++;
new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
} else if (cmd == partcmd_disable) {
/*
- * May need to add cpus to parent's effective_cpus for
- * valid partition root.
+ * May need to add cpus back to parent's effective_cpus
+ * (and maybe removed from subpartitions_cpus/isolated_cpus)
+ * for valid partition root. xcpus may contain CPUs that
+ * shouldn't be removed from the two global cpumasks.
*/
- adding = !is_prs_invalid(old_prs) &&
- cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus);
- if (adding)
+ if (is_partition_valid(cs)) {
+ cpumask_copy(tmp->addmask, cs->effective_xcpus);
+ adding = true;
subparts_delta--;
+ }
new_prs = PRS_MEMBER;
} else if (newmask) {
/*
@@ -1701,6 +1742,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
part_error = PERR_CPUSEMPTY;
goto write_error;
}
+
/* Check newmask again, whether cpus are available for parent/cs */
nocpu |= tasks_nocpu_error(parent, cs, newmask);
@@ -1829,7 +1871,7 @@ write_error:
* CPU lists in cs haven't been updated yet. So defer it to later.
*/
if ((old_prs != new_prs) && (cmd != partcmd_update)) {
- int err = update_partition_exclusive(cs, new_prs);
+ int err = update_partition_exclusive_flag(cs, new_prs);
if (err)
return err;
@@ -1867,7 +1909,7 @@ write_error:
update_unbound_workqueue_cpumask(isolcpus_updated);
if ((old_prs != new_prs) && (cmd == partcmd_update))
- update_partition_exclusive(cs, new_prs);
+ update_partition_exclusive_flag(cs, new_prs);
if (adding || deleting) {
cpuset_update_tasks_cpumask(parent, tmp->addmask);
@@ -1917,7 +1959,7 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
* 2) All the effective_cpus will be used up and cp
* has tasks
*/
- compute_effective_exclusive_cpumask(cs, new_ecpus);
+ compute_effective_exclusive_cpumask(cs, new_ecpus, NULL);
cpumask_and(new_ecpus, new_ecpus, cpu_active_mask);
rcu_read_lock();
@@ -1925,6 +1967,11 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
if (!is_partition_valid(child))
continue;
+ /*
+ * There shouldn't be a remote partition underneath another
+ * partition root.
+ */
+ WARN_ON_ONCE(is_remote_partition(child));
child->prs_err = 0;
if (!cpumask_subset(child->effective_xcpus,
cs->effective_xcpus))
@@ -1980,32 +2027,39 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
bool remote = is_remote_partition(cp);
bool update_parent = false;
+ old_prs = new_prs = cp->partition_root_state;
+
/*
- * Skip descendent remote partition that acquires CPUs
- * directly from top cpuset unless it is cs.
+ * For child remote partition root (!= cs), we need to call
+ * remote_cpus_update() if effective_xcpus will be changed.
+ * Otherwise, we can skip the whole subtree.
+ *
+ * remote_cpus_update() will reuse tmp->new_cpus only after
+ * its value is being processed.
*/
if (remote && (cp != cs)) {
- pos_css = css_rightmost_descendant(pos_css);
- continue;
- }
+ compute_effective_exclusive_cpumask(cp, tmp->new_cpus, NULL);
+ if (cpumask_equal(cp->effective_xcpus, tmp->new_cpus)) {
+ pos_css = css_rightmost_descendant(pos_css);
+ continue;
+ }
+ rcu_read_unlock();
+ remote_cpus_update(cp, NULL, tmp->new_cpus, tmp);
+ rcu_read_lock();
- /*
- * Update effective_xcpus if exclusive_cpus set.
- * The case when exclusive_cpus isn't set is handled later.
- */
- if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) {
- spin_lock_irq(&callback_lock);
- compute_effective_exclusive_cpumask(cp, NULL);
- spin_unlock_irq(&callback_lock);
+ /* Remote partition may be invalidated */
+ new_prs = cp->partition_root_state;
+ remote = (new_prs == old_prs);
}
- old_prs = new_prs = cp->partition_root_state;
- if (remote || (is_partition_valid(parent) &&
- is_partition_valid(cp)))
+ if (remote || (is_partition_valid(parent) && is_partition_valid(cp)))
compute_partition_effective_cpumask(cp, tmp->new_cpus);
else
compute_effective_cpumask(tmp->new_cpus, cp, parent);
+ if (remote)
+ goto get_css; /* Ready to update cpuset data */
+
/*
* A partition with no effective_cpus is allowed as long as
* there is no task associated with it. Call
@@ -2025,9 +2079,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus))
cpumask_copy(tmp->new_cpus, parent->effective_cpus);
- if (remote)
- goto get_css;
-
/*
* Skip the whole subtree if
* 1) the cpumask remains the same,
@@ -2088,6 +2139,9 @@ get_css:
spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, tmp->new_cpus);
cp->partition_root_state = new_prs;
+ if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs))
+ compute_effective_exclusive_cpumask(cp, NULL, NULL);
+
/*
* Make sure effective_xcpus is properly set for a valid
* partition root.
@@ -2174,7 +2228,14 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
parent);
if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus))
continue;
+ } else if (is_remote_partition(sibling)) {
+ /*
+ * Change in a sibling cpuset won't affect a remote
+ * partition root.
+ */
+ continue;
}
+
if (!css_tryget_online(&sibling->css))
continue;
@@ -2231,8 +2292,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
* trialcs->effective_xcpus is used as a temporary cpumask
* for checking validity of the partition root.
*/
+ trialcs->partition_root_state = PRS_MEMBER;
if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs))
- compute_effective_exclusive_cpumask(trialcs, NULL);
+ compute_effective_exclusive_cpumask(trialcs, NULL, cs);
}
/* Nothing to do if the cpus didn't change */
@@ -2305,19 +2367,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
* Call remote_cpus_update() to handle valid remote partition
*/
if (is_remote_partition(cs))
- remote_cpus_update(cs, xcpus, &tmp);
+ remote_cpus_update(cs, NULL, xcpus, &tmp);
else if (invalidate)
update_parent_effective_cpumask(cs, partcmd_invalidate,
NULL, &tmp);
else
update_parent_effective_cpumask(cs, partcmd_update,
xcpus, &tmp);
- } else if (!cpumask_empty(cs->exclusive_cpus)) {
- /*
- * Use trialcs->effective_cpus as a temp cpumask
- */
- remote_partition_check(cs, trialcs->effective_xcpus,
- trialcs->effective_cpus, &tmp);
}
spin_lock_irq(&callback_lock);
@@ -2369,8 +2425,15 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus))
return 0;
- if (*buf)
- compute_effective_exclusive_cpumask(trialcs, NULL);
+ if (*buf) {
+ trialcs->partition_root_state = PRS_MEMBER;
+ /*
+ * Reject the change if there is exclusive CPUs conflict with
+ * the siblings.
+ */
+ if (compute_effective_exclusive_cpumask(trialcs, NULL, cs))
+ return -EINVAL;
+ }
/*
* Check all the descendants in update_cpumasks_hier() if
@@ -2401,8 +2464,8 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (invalidate)
remote_partition_disable(cs, &tmp);
else
- remote_cpus_update(cs, trialcs->effective_xcpus,
- &tmp);
+ remote_cpus_update(cs, trialcs->exclusive_cpus,
+ trialcs->effective_xcpus, &tmp);
} else if (invalidate) {
update_parent_effective_cpumask(cs, partcmd_invalidate,
NULL, &tmp);
@@ -2410,12 +2473,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
update_parent_effective_cpumask(cs, partcmd_update,
trialcs->effective_xcpus, &tmp);
}
- } else if (!cpumask_empty(trialcs->exclusive_cpus)) {
- /*
- * Use trialcs->effective_cpus as a temp cpumask
- */
- remote_partition_check(cs, trialcs->effective_xcpus,
- trialcs->effective_cpus, &tmp);
}
spin_lock_irq(&callback_lock);
cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus);
@@ -2782,7 +2839,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
int err = PERR_NONE, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask;
- bool new_xcpus_state = false;
+ bool isolcpus_updated = false;
if (old_prs == new_prs)
return 0;
@@ -2796,18 +2853,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM;
- /*
- * Setup effective_xcpus if not properly set yet, it will be cleared
- * later if partition becomes invalid.
- */
- if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) {
- spin_lock_irq(&callback_lock);
- cpumask_and(cs->effective_xcpus,
- cs->cpus_allowed, parent->effective_xcpus);
- spin_unlock_irq(&callback_lock);
- }
-
- err = update_partition_exclusive(cs, new_prs);
+ err = update_partition_exclusive_flag(cs, new_prs);
if (err)
goto out;
@@ -2821,6 +2867,19 @@ static int update_prstate(struct cpuset *cs, int new_prs)
}
/*
+ * We don't support the creation of a new local partition with
+ * a remote partition underneath it. This unsupported
+ * setting can happen only if parent is the top_cpuset because
+ * a remote partition cannot be created underneath an existing
+ * local or remote partition.
+ */
+ if ((parent == &top_cpuset) &&
+ cpumask_intersects(cs->exclusive_cpus, subpartitions_cpus)) {
+ err = PERR_REMOTE;
+ goto out;
+ }
+
+ /*
* If parent is valid partition, enable local partiion.
* Otherwise, enable a remote partition.
*/
@@ -2835,8 +2894,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
} else if (old_prs && new_prs) {
/*
* A change in load balance state only, no change in cpumasks.
+ * Need to update isolated_cpus.
*/
- new_xcpus_state = true;
+ isolcpus_updated = true;
} else {
/*
* Switching back to member is always allowed even if it
@@ -2860,7 +2920,7 @@ out:
*/
if (err) {
new_prs = -new_prs;
- update_partition_exclusive(cs, new_prs);
+ update_partition_exclusive_flag(cs, new_prs);
}
spin_lock_irq(&callback_lock);
@@ -2868,14 +2928,18 @@ out:
WRITE_ONCE(cs->prs_err, err);
if (!is_partition_valid(cs))
reset_partition_data(cs);
- else if (new_xcpus_state)
- partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
+ else if (isolcpus_updated)
+ isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
spin_unlock_irq(&callback_lock);
- update_unbound_workqueue_cpumask(new_xcpus_state);
+ update_unbound_workqueue_cpumask(isolcpus_updated);
- /* Force update if switching back to member */
+ /* Force update if switching back to member & update effective_xcpus */
update_cpumasks_hier(cs, &tmpmask, !new_prs);
+ /* A newly created partition must have effective_xcpus set */
+ WARN_ON_ONCE(!old_prs && (new_prs > 0)
+ && cpumask_empty(cs->effective_xcpus));
+
/* Update sched domains and load balance flag */
update_partition_sd_lb(cs, old_prs);
@@ -3208,7 +3272,7 @@ int cpuset_common_seq_show(struct seq_file *sf, void *v)
return ret;
}
-static int sched_partition_show(struct seq_file *seq, void *v)
+static int cpuset_partition_show(struct seq_file *seq, void *v)
{
struct cpuset *cs = css_cs(seq_css(seq));
const char *err, *type = NULL;
@@ -3239,7 +3303,7 @@ static int sched_partition_show(struct seq_file *seq, void *v)
return 0;
}
-static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
+static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct cpuset *cs = css_cs(of_css(of));
@@ -3260,11 +3324,8 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
css_get(&cs->css);
cpus_read_lock();
mutex_lock(&cpuset_mutex);
- if (!is_cpuset_online(cs))
- goto out_unlock;
-
- retval = update_prstate(cs, val);
-out_unlock:
+ if (is_cpuset_online(cs))
+ retval = update_prstate(cs, val);
mutex_unlock(&cpuset_mutex);
cpus_read_unlock();
css_put(&cs->css);
@@ -3308,8 +3369,8 @@ static struct cftype dfl_files[] = {
{
.name = "cpus.partition",
- .seq_show = sched_partition_show,
- .write = sched_partition_write,
+ .seq_show = cpuset_partition_show,
+ .write = cpuset_partition_write,
.private = FILE_PARTITION_ROOT,
.flags = CFTYPE_NOT_ON_ROOT,
.file_offset = offsetof(struct cpuset, partition_file),
@@ -3475,9 +3536,6 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
cpus_read_lock();
mutex_lock(&cpuset_mutex);
- if (is_partition_valid(cs))
- update_prstate(cs, 0);
-
if (!cpuset_v2() && is_sched_load_balance(cs))
cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
@@ -3488,6 +3546,22 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
cpus_read_unlock();
}
+static void cpuset_css_killed(struct cgroup_subsys_state *css)
+{
+ struct cpuset *cs = css_cs(css);
+
+ cpus_read_lock();
+ mutex_lock(&cpuset_mutex);
+
+ /* Reset valid partition back to member */
+ if (is_partition_valid(cs))
+ update_prstate(cs, PRS_MEMBER);
+
+ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+
+}
+
static void cpuset_css_free(struct cgroup_subsys_state *css)
{
struct cpuset *cs = css_cs(css);
@@ -3609,6 +3683,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.css_alloc = cpuset_css_alloc,
.css_online = cpuset_css_online,
.css_offline = cpuset_css_offline,
+ .css_killed = cpuset_css_killed,
.css_free = cpuset_css_free,
.can_attach = cpuset_can_attach,
.cancel_attach = cpuset_cancel_attach,
@@ -3739,10 +3814,10 @@ retry:
if (remote && cpumask_empty(&new_cpus) &&
partition_is_populated(cs, NULL)) {
+ cs->prs_err = PERR_HOTPLUG;
remote_partition_disable(cs, tmp);
compute_effective_cpumask(&new_cpus, cs, parent);
remote = false;
- cpuset_force_rebuild();
}
/*
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 4bb587d5d34f..b2239156b7de 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -318,10 +318,11 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
might_sleep();
for_each_possible_cpu(cpu) {
- struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);
+ struct cgroup *pos;
/* Reacquire for each CPU to avoid disabling IRQs too long */
__cgroup_rstat_lock(cgrp, cpu);
+ pos = cgroup_rstat_updated_list(cgrp, cpu);
for (; pos; pos = pos->rstat_flush_next) {
struct cgroup_subsys_state *css;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 33082c4e8154..95c6e3473a76 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -89,8 +89,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node)
{
lockdep_assert_held(&fprobe_mutex);
- WRITE_ONCE(node->fp, NULL);
- hlist_del_rcu(&node->hlist);
+ /* Avoid double deleting */
+ if (READ_ONCE(node->fp) != NULL) {
+ WRITE_ONCE(node->fp, NULL);
+ hlist_del_rcu(&node->hlist);
+ }
return !!find_first_fprobe_node(node->addr);
}
@@ -411,6 +414,102 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
+#ifdef CONFIG_MODULES
+
+#define FPROBE_IPS_BATCH_INIT 8
+/* instruction pointer address list */
+struct fprobe_addr_list {
+ int index;
+ int size;
+ unsigned long *addrs;
+};
+
+static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr)
+{
+ unsigned long *addrs;
+
+ if (alist->index >= alist->size)
+ return -ENOMEM;
+
+ alist->addrs[alist->index++] = addr;
+ if (alist->index < alist->size)
+ return 0;
+
+ /* Expand the address list */
+ addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+
+ memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs));
+ alist->size *= 2;
+ kfree(alist->addrs);
+ alist->addrs = addrs;
+
+ return 0;
+}
+
+static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
+ struct fprobe_addr_list *alist)
+{
+ struct fprobe_hlist_node *node;
+ int ret = 0;
+
+ hlist_for_each_entry_rcu(node, head, hlist) {
+ if (!within_module(node->addr, mod))
+ continue;
+ if (delete_fprobe_node(node))
+ continue;
+ /*
+ * If failed to update alist, just continue to update hlist.
+ * Therefore, at list user handler will not hit anymore.
+ */
+ if (!ret)
+ ret = fprobe_addr_list_add(alist, node->addr);
+ }
+}
+
+/* Handle module unloading to manage fprobe_ip_table. */
+static int fprobe_module_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
+ struct module *mod = data;
+ int i;
+
+ if (val != MODULE_STATE_GOING)
+ return NOTIFY_DONE;
+
+ alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
+ /* If failed to alloc memory, we can not remove ips from hash. */
+ if (!alist.addrs)
+ return NOTIFY_DONE;
+
+ mutex_lock(&fprobe_mutex);
+ for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
+ fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
+
+ if (alist.index < alist.size && alist.index > 0)
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops,
+ alist.addrs, alist.index, 1, 0);
+ mutex_unlock(&fprobe_mutex);
+
+ kfree(alist.addrs);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block fprobe_module_nb = {
+ .notifier_call = fprobe_module_callback,
+ .priority = 0,
+};
+
+static int __init init_fprobe_module(void)
+{
+ return register_module_notifier(&fprobe_module_nb);
+}
+early_initcall(init_fprobe_module);
+#endif
+
static int symbols_cmp(const void *a, const void *b)
{
const char **str_a = (const char **) a;
@@ -445,6 +544,7 @@ struct filter_match_data {
size_t index;
size_t size;
unsigned long *addrs;
+ struct module **mods;
};
static int filter_match_callback(void *data, const char *name, unsigned long addr)
@@ -458,30 +558,47 @@ static int filter_match_callback(void *data, const char *name, unsigned long add
if (!ftrace_location(addr))
return 0;
- if (match->addrs)
- match->addrs[match->index] = addr;
+ if (match->addrs) {
+ struct module *mod = __module_text_address(addr);
+
+ if (mod && !try_module_get(mod))
+ return 0;
+ match->mods[match->index] = mod;
+ match->addrs[match->index] = addr;
+ }
match->index++;
return match->index == match->size;
}
/*
* Make IP list from the filter/no-filter glob patterns.
- * Return the number of matched symbols, or -ENOENT.
+ * Return the number of matched symbols, or errno.
+ * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
+ * is passed with an array, we need to pass the an @mods array of the same size
+ * to increment the module refcount for each symbol.
+ * This means we also need to call `module_put` for each element of @mods after
+ * using the @addrs.
*/
-static int ip_list_from_filter(const char *filter, const char *notfilter,
- unsigned long *addrs, size_t size)
+static int get_ips_from_filter(const char *filter, const char *notfilter,
+ unsigned long *addrs, struct module **mods,
+ size_t size)
{
struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
- .index = 0, .size = size, .addrs = addrs};
+ .index = 0, .size = size, .addrs = addrs, .mods = mods};
int ret;
+ if (addrs && !mods)
+ return -EINVAL;
+
ret = kallsyms_on_each_symbol(filter_match_callback, &match);
if (ret < 0)
return ret;
- ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
- if (ret < 0)
- return ret;
+ if (IS_ENABLED(CONFIG_MODULES)) {
+ ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
+ if (ret < 0)
+ return ret;
+ }
return match.index ?: -ENOENT;
}
@@ -543,24 +660,35 @@ static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
*/
int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
{
- unsigned long *addrs;
- int ret;
+ unsigned long *addrs __free(kfree) = NULL;
+ struct module **mods __free(kfree) = NULL;
+ int ret, num;
if (!fp || !filter)
return -EINVAL;
- ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX);
- if (ret < 0)
- return ret;
+ num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
+ if (num < 0)
+ return num;
- addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL);
+ addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
- ret = ip_list_from_filter(filter, notfilter, addrs, ret);
- if (ret > 0)
- ret = register_fprobe_ips(fp, addrs, ret);
- kfree(addrs);
+ mods = kcalloc(num, sizeof(*mods), GFP_KERNEL);
+ if (!mods)
+ return -ENOMEM;
+
+ ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
+ if (ret < 0)
+ return ret;
+
+ ret = register_fprobe_ips(fp, addrs, ret);
+
+ for (int i = 0; i < num; i++) {
+ if (mods[i])
+ module_put(mods[i]);
+ }
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe);
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 5d7ca80173ea..b40fa59159ac 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -919,9 +919,15 @@ static void __find_tracepoint_module_cb(struct tracepoint *tp, struct module *mo
struct __find_tracepoint_cb_data *data = priv;
if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
- data->tpoint = tp;
- if (!data->mod)
+ /* If module is not specified, try getting module refcount. */
+ if (!data->mod && mod) {
+ /* If failed to get refcount, ignore this tracepoint. */
+ if (!try_module_get(mod))
+ return;
+
data->mod = mod;
+ }
+ data->tpoint = tp;
}
}
@@ -933,7 +939,11 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
data->tpoint = tp;
}
-/* Find a tracepoint from kernel and module. */
+/*
+ * Find a tracepoint from kernel and module. If the tracepoint is on the module,
+ * the module's refcount is incremented and returned as *@tp_mod. Thus, if it is
+ * not NULL, caller must call module_put(*tp_mod) after used the tracepoint.
+ */
static struct tracepoint *find_tracepoint(const char *tp_name,
struct module **tp_mod)
{
@@ -962,7 +972,10 @@ static void reenable_trace_fprobe(struct trace_fprobe *tf)
}
}
-/* Find a tracepoint from specified module. */
+/*
+ * Find a tracepoint from specified module. In this case, this does not get the
+ * module's refcount. The caller must ensure the module is not freed.
+ */
static struct tracepoint *find_tracepoint_in_module(struct module *mod,
const char *tp_name)
{
@@ -1169,11 +1182,6 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
if (is_tracepoint) {
ctx->flags |= TPARG_FL_TPOINT;
tpoint = find_tracepoint(symbol, &tp_mod);
- /* lock module until register this tprobe. */
- if (tp_mod && !try_module_get(tp_mod)) {
- tpoint = NULL;
- tp_mod = NULL;
- }
if (tpoint) {
ctx->funcname = kallsyms_lookup(
(unsigned long)tpoint->probestub,
diff --git a/lib/Kconfig b/lib/Kconfig
index 61cce0686b53..6c1b8f184267 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -139,27 +139,22 @@ config TRACE_MMIO_ACCESS
source "lib/crypto/Kconfig"
config CRC_CCITT
- tristate "CRC-CCITT functions"
+ tristate
help
- This option is provided for the case where no in-kernel-tree
- modules require CRC-CCITT functions, but a module built outside
- the kernel tree does. Such modules that use library CRC-CCITT
- functions require M here.
+ The CRC-CCITT library functions. Select this if your module uses any
+ of the functions from <linux/crc-ccitt.h>.
config CRC16
- tristate "CRC16 functions"
+ tristate
help
- This option is provided for the case where no in-kernel-tree
- modules require CRC16 functions, but a module built outside
- the kernel tree does. Such modules that use library CRC16
- functions require M here.
+ The CRC16 library functions. Select this if your module uses any of
+ the functions from <linux/crc16.h>.
config CRC_T10DIF
- tristate "CRC calculation for the T10 Data Integrity Field"
+ tristate
help
- This option is only needed if a module that's not in the
- kernel tree needs to calculate CRC checks for use with the
- SCSI data integrity subsystem.
+ The CRC-T10DIF library functions. Select this if your module uses
+ any of the functions from <linux/crc-t10dif.h>.
config ARCH_HAS_CRC_T10DIF
bool
@@ -169,22 +164,17 @@ config CRC_T10DIF_ARCH
default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS
config CRC_ITU_T
- tristate "CRC ITU-T V.41 functions"
+ tristate
help
- This option is provided for the case where no in-kernel-tree
- modules require CRC ITU-T V.41 functions, but a module built outside
- the kernel tree does. Such modules that use library CRC ITU-T V.41
- functions require M here.
+ The CRC-ITU-T library functions. Select this if your module uses
+ any of the functions from <linux/crc-itu-t.h>.
config CRC32
- tristate "CRC32/CRC32c functions"
- default y
+ tristate
select BITREVERSE
help
- This option is provided for the case where no in-kernel-tree
- modules require CRC32/CRC32c functions, but a module built outside
- the kernel tree does. Such modules that use library CRC32/CRC32c
- functions require M here.
+ The CRC32 library functions. Select this if your module uses any of
+ the functions from <linux/crc32.h> or <linux/crc32c.h>.
config ARCH_HAS_CRC32
bool
@@ -195,6 +185,9 @@ config CRC32_ARCH
config CRC64
tristate
+ help
+ The CRC64 library functions. Select this if your module uses any of
+ the functions from <linux/crc64.h>.
config ARCH_HAS_CRC64
bool
@@ -205,19 +198,21 @@ config CRC64_ARCH
config CRC4
tristate
+ help
+ The CRC4 library functions. Select this if your module uses any of
+ the functions from <linux/crc4.h>.
config CRC7
tristate
-
-config LIBCRC32C
- tristate
- select CRC32
help
- This option just selects CRC32 and is provided for compatibility
- purposes until the users are updated to select CRC32 directly.
+ The CRC7 library functions. Select this if your module uses any of
+ the functions from <linux/crc7.h>.
config CRC8
tristate
+ help
+ The CRC8 library functions. Select this if your module uses any of
+ the functions from <linux/crc8.h>.
config CRC_OPTIMIZATIONS
bool "Enable optimized CRC implementations" if EXPERT
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 860a0786bc1e..20b316207f9a 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -9,7 +9,7 @@
config BATMAN_ADV
tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
- select LIBCRC32C
+ select CRC32
help
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
a routing protocol for multi-hop ad-hoc mesh networks. The
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index c5c4eef3a9ff..0aa21fcbf6ec 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -2,7 +2,7 @@
config CEPH_LIB
tristate "Ceph core library"
depends on INET
- select LIBCRC32C
+ select CRC32
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_GCM
diff --git a/net/core/dev.c b/net/core/dev.c
index 0608605cfc24..75e104322ad5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1518,15 +1518,7 @@ void netdev_features_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_features_change);
-/**
- * netdev_state_change - device changes state
- * @dev: device to cause notification
- *
- * Called to indicate a device has changed state. This function calls
- * the notifier chains for netdev_chain and sends a NEWLINK message
- * to the routing socket.
- */
-void netdev_state_change(struct net_device *dev)
+void netif_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
struct netdev_notifier_change_info change_info = {
@@ -1538,7 +1530,6 @@ void netdev_state_change(struct net_device *dev)
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
}
}
-EXPORT_SYMBOL(netdev_state_change);
/**
* __netdev_notify_peers - notify network peers about existence of @dev,
diff --git a/net/core/dev_api.c b/net/core/dev_api.c
index 90bafb0b1b8c..90898cd540ce 100644
--- a/net/core/dev_api.c
+++ b/net/core/dev_api.c
@@ -327,3 +327,19 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
return ret;
}
EXPORT_SYMBOL_GPL(dev_xdp_propagate);
+
+/**
+ * netdev_state_change() - device changes state
+ * @dev: device to cause notification
+ *
+ * Called to indicate a device has changed state. This function calls
+ * the notifier chains for netdev_chain and sends a NEWLINK message
+ * to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+ netdev_lock_ops(dev);
+ netif_state_change(dev);
+ netdev_unlock_ops(dev);
+}
+EXPORT_SYMBOL(netdev_state_change);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index cb04ef2b9807..864f3bbc3a4c 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -183,7 +183,7 @@ static void linkwatch_do_dev(struct net_device *dev)
else
dev_deactivate(dev);
- netdev_state_change(dev);
+ netif_state_change(dev);
}
/* Note: our callers are responsible for calling netdev_tracker_free().
* This is the reason we use __dev_put() instead of dev_put().
@@ -240,7 +240,9 @@ static void __linkwatch_run_queue(int urgent_only)
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
spin_unlock_irq(&lweventlist_lock);
+ netdev_lock_ops(dev);
linkwatch_do_dev(dev);
+ netdev_unlock_ops(dev);
do_dev--;
spin_lock_irq(&lweventlist_lock);
}
@@ -253,25 +255,41 @@ static void __linkwatch_run_queue(int urgent_only)
spin_unlock_irq(&lweventlist_lock);
}
-void linkwatch_sync_dev(struct net_device *dev)
+static bool linkwatch_clean_dev(struct net_device *dev)
{
unsigned long flags;
- int clean = 0;
+ bool clean = false;
spin_lock_irqsave(&lweventlist_lock, flags);
if (!list_empty(&dev->link_watch_list)) {
list_del_init(&dev->link_watch_list);
- clean = 1;
+ clean = true;
/* We must release netdev tracker under
* the spinlock protection.
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
- if (clean)
+
+ return clean;
+}
+
+void __linkwatch_sync_dev(struct net_device *dev)
+{
+ netdev_ops_assert_locked(dev);
+
+ if (linkwatch_clean_dev(dev))
linkwatch_do_dev(dev);
}
+void linkwatch_sync_dev(struct net_device *dev)
+{
+ if (linkwatch_clean_dev(dev)) {
+ netdev_lock_ops(dev);
+ linkwatch_do_dev(dev);
+ netdev_unlock_ops(dev);
+ }
+}
/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c
index b7f22dc92a6f..941e26c1343d 100644
--- a/net/core/lock_debug.c
+++ b/net/core/lock_debug.c
@@ -20,11 +20,11 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event,
switch (cmd) {
case NETDEV_REGISTER:
case NETDEV_UP:
+ case NETDEV_CHANGE:
netdev_ops_assert_locked(dev);
fallthrough;
case NETDEV_DOWN:
case NETDEV_REBOOT:
- case NETDEV_CHANGE:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c23852835050..39a5b72e861f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,7 +1043,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
-void netdev_set_operstate(struct net_device *dev, int newstate)
+void netif_set_operstate(struct net_device *dev, int newstate)
{
unsigned int old = READ_ONCE(dev->operstate);
@@ -1052,9 +1052,9 @@ void netdev_set_operstate(struct net_device *dev, int newstate)
return;
} while (!try_cmpxchg(&dev->operstate, &old, newstate));
- netdev_state_change(dev);
+ netif_state_change(dev);
}
-EXPORT_SYMBOL(netdev_set_operstate);
+EXPORT_SYMBOL(netif_set_operstate);
static void set_operstate(struct net_device *dev, unsigned char transition)
{
@@ -1080,7 +1080,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
break;
}
- netdev_set_operstate(dev, operstate);
+ netif_set_operstate(dev, operstate);
}
static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
@@ -3027,7 +3027,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
err = validate_linkmsg(dev, tb, extack);
if (err < 0)
- goto errout;
+ return err;
if (tb[IFLA_IFNAME])
nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -3396,7 +3396,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
errout:
if (status & DO_SETLINK_MODIFIED) {
if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
- netdev_state_change(dev);
+ netif_state_change(dev);
if (err < 0)
net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
@@ -3676,8 +3676,11 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
nla_len(tb[IFLA_BROADCAST]));
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- if (tb[IFLA_OPERSTATE])
+ if (tb[IFLA_OPERSTATE]) {
+ netdev_lock_ops(dev);
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+ netdev_unlock_ops(dev);
+ }
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
diff --git a/net/core/sock.c b/net/core/sock.c
index f67a3c5b0988..e54449c9ab0b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2130,6 +2130,8 @@ lenout:
*/
static inline void sock_lock_init(struct sock *sk)
{
+ sk_owner_clear(sk);
+
if (sk->sk_kern_sock)
sock_lock_init_class_and_name(
sk,
@@ -2226,6 +2228,9 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
cgroup_sk_free(&sk->sk_cgrp_data);
mem_cgroup_sk_free(sk);
security_sk_free(sk);
+
+ sk_owner_put(sk);
+
if (slab != NULL)
kmem_cache_free(slab, sk);
else
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
index 1e790413db0e..4a9a946cabf0 100644
--- a/net/ethtool/cmis.h
+++ b/net/ethtool/cmis.h
@@ -101,7 +101,6 @@ struct ethtool_cmis_cdb_rpl {
};
u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs);
-u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs);
void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl,
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
index d159dc121bde..0e2691ccb0df 100644
--- a/net/ethtool/cmis_cdb.c
+++ b/net/ethtool/cmis_cdb.c
@@ -16,15 +16,6 @@ u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs)
return 8 * (1 + min_t(u8, num_of_byte_octs, 15));
}
-/* For accessing the EPL field on page 9Fh, the allowable length extension is
- * min(i, 255) byte octets where i specifies the allowable additional number of
- * byte octets in a READ or a WRITE.
- */
-u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs)
-{
- return 8 * (1 + min_t(u8, num_of_byte_octs, 255));
-}
-
void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl,
u8 lpl_len, u8 *epl, u16 epl_len,
@@ -33,19 +24,16 @@ void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
{
args->req.id = cpu_to_be16(cmd);
args->req.lpl_len = lpl_len;
- if (lpl) {
+ if (lpl)
memcpy(args->req.payload, lpl, args->req.lpl_len);
- args->read_write_len_ext =
- ethtool_cmis_get_max_lpl_size(read_write_len_ext);
- }
if (epl) {
args->req.epl_len = cpu_to_be16(epl_len);
args->req.epl = epl;
- args->read_write_len_ext =
- ethtool_cmis_get_max_epl_size(read_write_len_ext);
}
args->max_duration = max_duration;
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_lpl_size(read_write_len_ext);
args->msleep_pre_rpl = msleep_pre_rpl;
args->rpl_exp_len = rpl_exp_len;
args->flags = flags;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 0cb6da1f692a..49bea6b45bd5 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -830,6 +830,7 @@ void ethtool_ringparam_get_cfg(struct net_device *dev,
/* Driver gives us current state, we want to return current config */
kparam->tcp_data_split = dev->cfg->hds_config;
+ kparam->hds_thresh = dev->cfg->hds_thresh;
}
static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 221639407c72..8262cc10f98d 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -60,7 +60,7 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev)
u32 ethtool_op_get_link(struct net_device *dev)
{
/* Synchronize carrier state with link watch, see also rtnl_getlink() */
- linkwatch_sync_dev(dev);
+ __linkwatch_sync_dev(dev);
return netif_carrier_ok(dev) ? 1 : 0;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index a163d40c6431..977beeaaa2f9 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -500,7 +500,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
netdev_unlock_ops(req_info->dev);
rtnl_unlock();
if (ret < 0)
- goto err_cleanup;
+ goto err_dev;
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
@@ -560,7 +560,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
netdev_unlock_ops(dev);
rtnl_unlock();
if (ret < 0)
- goto out;
+ goto out_cancel;
ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
if (ret < 0)
goto out;
@@ -569,6 +569,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
out:
if (ctx->ops->cleanup_data)
ctx->ops->cleanup_data(ctx->reply_data);
+out_cancel:
ctx->reply_data->dev = NULL;
if (ret < 0)
genlmsg_cancel(skb, ehdr);
@@ -793,7 +794,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
ethnl_init_reply_data(reply_data, ops, dev);
ret = ops->prepare_data(req_info, reply_data, &info);
if (ret < 0)
- goto err_cleanup;
+ goto err_rep;
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
@@ -828,6 +829,7 @@ err_skb:
err_cleanup:
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
+err_rep:
kfree(reply_data);
kfree(req_info);
return;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 439cfb7ad5d1..1b1b700ec05e 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -33,14 +33,14 @@ static void hsr_set_operstate(struct hsr_port *master, bool has_carrier)
struct net_device *dev = master->dev;
if (!is_admin_up(dev)) {
- netdev_set_operstate(dev, IF_OPER_DOWN);
+ netif_set_operstate(dev, IF_OPER_DOWN);
return;
}
if (has_carrier)
- netdev_set_operstate(dev, IF_OPER_UP);
+ netif_set_operstate(dev, IF_OPER_UP);
else
- netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
+ netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
}
static bool hsr_check_carrier(struct hsr_port *master)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2cffb8f4a2bc..9ba83f0c9928 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3154,12 +3154,13 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
- netdev_lock_ops(dev);
- if (dev)
+ if (dev) {
+ netdev_lock_ops(dev);
err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
- else
+ netdev_unlock_ops(dev);
+ } else {
err = -ENODEV;
- netdev_unlock_ops(dev);
+ }
rtnl_net_unlock(net);
return err;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ab12b816ab94..210b84cecc24 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -470,10 +470,10 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
goto out;
hash = fl6->mp_hash;
- if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) &&
- rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
- strict) >= 0) {
- match = first;
+ if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) {
+ if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+ strict) >= 0)
+ match = first;
goto out;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 409bd415ef1d..24c2de1891bd 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -899,13 +899,17 @@ create_child:
goto dispose_child;
}
- if (!subflow_hmac_valid(req, &mp_opt) ||
- !mptcp_can_accept_new_subflow(subflow_req->msk)) {
+ if (!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
goto dispose_child;
}
+ if (!mptcp_can_accept_new_subflow(owner)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ goto dispose_child;
+ }
+
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index df2dc21304ef..047ba81865ed 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -212,7 +212,7 @@ config NF_CT_PROTO_SCTP
bool 'SCTP protocol connection tracking support'
depends on NETFILTER_ADVANCED
default y
- select LIBCRC32C
+ select CRC32
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on SCTP connections.
@@ -475,7 +475,7 @@ endif # NF_CONNTRACK
config NF_TABLES
select NETFILTER_NETLINK
- select LIBCRC32C
+ select CRC32
tristate "Netfilter nf_tables support"
help
nftables is the new packet classification framework that intends to
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 2a3017b9c001..8c5b1fe12d07 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -105,7 +105,7 @@ config IP_VS_PROTO_AH
config IP_VS_PROTO_SCTP
bool "SCTP load balancing support"
- select LIBCRC32C
+ select CRC32
help
This option enables support for load balancing SCTP transport
protocol. Say Y if unsure.
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index b8d3c3213efe..c15db28c5ebc 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -994,8 +994,9 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize);
NFT_PIPAPO_AVX2_AND(6, 2, 3);
+ NFT_PIPAPO_AVX2_AND(3, 4, 7);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize);
- NFT_PIPAPO_AVX2_AND(0, 4, 5);
+ NFT_PIPAPO_AVX2_AND(0, 3, 5);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize);
NFT_PIPAPO_AVX2_AND(2, 6, 7);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 2535f3f9f462..5481bd561eb4 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -11,7 +11,7 @@ config OPENVSWITCH
(!NF_NAT || NF_NAT) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
depends on PSAMPLE || !PSAMPLE
- select LIBCRC32C
+ select CRC32
select MPLS
select NET_MPLS_GSO
select DST_CACHE
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 8180d0c12fce..a800127effcd 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -784,7 +784,7 @@ config NET_ACT_SKBEDIT
config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
- select LIBCRC32C
+ select CRC32
help
Say Y here to update some common checksum after some direct
packet alterations.
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 4f648af8cfaa..ecec0a1e1c1a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2057,6 +2057,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
+ int ret = -EMSGSIZE;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
@@ -2101,11 +2102,45 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
return skb->len;
+cls_op_not_supp:
+ ret = -EOPNOTSUPP;
out_nlmsg_trim:
nla_put_failure:
-cls_op_not_supp:
nlmsg_trim(skb, b);
- return -1;
+ return ret;
+}
+
+static struct sk_buff *tfilter_notify_prep(struct net *net,
+ struct sk_buff *oskb,
+ struct nlmsghdr *n,
+ struct tcf_proto *tp,
+ struct tcf_block *block,
+ struct Qdisc *q, u32 parent,
+ void *fh, int event,
+ u32 portid, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE;
+ struct sk_buff *skb;
+ int ret;
+
+retry:
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOBUFS);
+
+ ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+ n->nlmsg_seq, n->nlmsg_flags, event, false,
+ rtnl_held, extack);
+ if (ret <= 0) {
+ kfree_skb(skb);
+ if (ret == -EMSGSIZE) {
+ size += NLMSG_GOODSIZE;
+ goto retry;
+ }
+ return ERR_PTR(-EINVAL);
+ }
+ return skb;
}
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
@@ -2121,16 +2156,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, event,
- false, rtnl_held, extack) <= 0) {
- kfree_skb(skb);
- return -EINVAL;
- }
+ skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event,
+ portid, rtnl_held, extack);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
if (unicast)
err = rtnl_unicast(skb, net, portid);
@@ -2153,16 +2182,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return tp->ops->delete(tp, fh, last, rtnl_held, extack);
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- false, rtnl_held, extack) <= 0) {
+ skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh,
+ RTM_DELTFILTER, portid, rtnl_held, extack);
+ if (IS_ERR(skb)) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
- kfree_skb(skb);
- return -EINVAL;
+ return PTR_ERR(skb);
}
err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 81189d02fee7..12dd71139da3 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -65,10 +65,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
&q->stats, qdisc_pkt_len, codel_get_enqueue_time,
drop_func, dequeue_func);
- /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
- * or HTB crashes. Defer it for next round.
- */
- if (q->stats.drop_count && sch->q.qlen) {
+ if (q->stats.drop_count) {
qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
q->stats.drop_count = 0;
q->stats.drop_len = 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index c69b999fae17..e0a81d313aa7 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -105,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -ENOBUFS;
gnet_stats_basic_sync_init(&cl->bstats);
+ INIT_LIST_HEAD(&cl->alist);
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -229,7 +230,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
}
static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
@@ -390,7 +391,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
if (unlikely(skb == NULL))
goto out;
if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
bstats_update(&cl->bstats, skb);
qdisc_bstats_update(sch, skb);
@@ -431,7 +432,7 @@ static void drr_reset_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
if (cl->qdisc->q.qlen)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
qdisc_reset(cl->qdisc);
}
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 516038a44163..c3bdeb14185b 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -293,7 +293,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
* to remove them.
*/
if (!ets_class_is_strict(q, cl) && sch->q.qlen)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
}
static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
@@ -488,7 +488,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
if (unlikely(!skb))
goto out;
if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
return ets_qdisc_dequeue_skb(sch, skb);
}
@@ -657,7 +657,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
}
for (i = q->nbands; i < oldbands; i++) {
if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
- list_del(&q->classes[i].alist);
+ list_del_init(&q->classes[i].alist);
qdisc_tree_flush_backlog(q->classes[i].qdisc);
}
WRITE_ONCE(q->nstrict, nstrict);
@@ -713,7 +713,7 @@ static void ets_qdisc_reset(struct Qdisc *sch)
for (band = q->nstrict; band < q->nbands; band++) {
if (q->classes[band].qdisc->q.qlen)
- list_del(&q->classes[band].alist);
+ list_del_init(&q->classes[band].alist);
}
for (band = 0; band < q->nbands; band++)
qdisc_reset(q->classes[band].qdisc);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 799f5397ad4c..6c9029f71e88 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -315,10 +315,8 @@ begin:
}
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
- /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
- * or HTB crashes. Defer it for next round.
- */
- if (q->cstats.drop_count && sch->q.qlen) {
+
+ if (q->cstats.drop_count) {
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
q->cstats.drop_len);
q->cstats.drop_count = 0;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c287bf8423b4..ce5045eea065 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -203,7 +203,10 @@ eltree_insert(struct hfsc_class *cl)
static inline void
eltree_remove(struct hfsc_class *cl)
{
- rb_erase(&cl->el_node, &cl->sched->eligible);
+ if (!RB_EMPTY_NODE(&cl->el_node)) {
+ rb_erase(&cl->el_node, &cl->sched->eligible);
+ RB_CLEAR_NODE(&cl->el_node);
+ }
}
static inline void
@@ -1220,7 +1223,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
/* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
* needs to be called explicitly to remove a class from vttree.
*/
- update_vf(cl, 0, 0);
+ if (cl->cl_nactive)
+ update_vf(cl, 0, 0);
if (cl->cl_flags & HFSC_RSC)
eltree_remove(cl);
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c31bc5489bdd..4b9a639b642e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1485,6 +1485,8 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
+ if (!cl->prio_activity)
+ return;
htb_deactivate(qdisc_priv(sch), cl);
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 2cfbc977fe6d..687a932eb9b2 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -347,7 +347,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
struct qfq_aggregate *agg = cl->agg;
- list_del(&cl->alist); /* remove from RR queue of the aggregate */
+ list_del_init(&cl->alist); /* remove from RR queue of the aggregate */
if (list_empty(&agg->active)) /* agg is now inactive */
qfq_deactivate_agg(q, agg);
}
@@ -474,6 +474,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
gnet_stats_basic_sync_init(&cl->bstats);
cl->common.classid = classid;
cl->deficit = lmax;
+ INIT_LIST_HEAD(&cl->alist);
cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
classid, NULL);
@@ -982,7 +983,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
cl->deficit -= (int) len;
if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
@@ -1415,6 +1416,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
+ if (list_empty(&cl->alist))
+ return;
qfq_deactivate_class(q, cl);
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 9ed197e96396..b912ad99aa15 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
struct red_parms *p = NULL;
struct sk_buff *to_free = NULL;
struct sk_buff *tail = NULL;
+ unsigned int maxflows;
+ unsigned int quantum;
+ unsigned int divisor;
+ int perturb_period;
+ u8 headdrop;
+ u8 maxdepth;
+ int limit;
+ u8 flags;
+
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
return -EINVAL;
@@ -652,39 +661,64 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
if (!p)
return -ENOMEM;
}
- if (ctl->limit == 1) {
- NL_SET_ERR_MSG_MOD(extack, "invalid limit");
- return -EINVAL;
- }
+
sch_tree_lock(sch);
+
+ limit = q->limit;
+ divisor = q->divisor;
+ headdrop = q->headdrop;
+ maxdepth = q->maxdepth;
+ maxflows = q->maxflows;
+ perturb_period = q->perturb_period;
+ quantum = q->quantum;
+ flags = q->flags;
+
+ /* update and validate configuration */
if (ctl->quantum)
- q->quantum = ctl->quantum;
- WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+ quantum = ctl->quantum;
+ perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
- q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+ maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
- q->divisor = ctl->divisor;
- q->maxflows = min_t(u32, q->maxflows, q->divisor);
+ divisor = ctl->divisor;
+ maxflows = min_t(u32, maxflows, divisor);
}
if (ctl_v1) {
if (ctl_v1->depth)
- q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+ maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
if (p) {
- swap(q->red_parms, p);
- red_set_parms(q->red_parms,
+ red_set_parms(p,
ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog,
ctl_v1->Plog, ctl_v1->Scell_log,
NULL,
ctl_v1->max_P);
}
- q->flags = ctl_v1->flags;
- q->headdrop = ctl_v1->headdrop;
+ flags = ctl_v1->flags;
+ headdrop = ctl_v1->headdrop;
}
if (ctl->limit) {
- q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
- q->maxflows = min_t(u32, q->maxflows, q->limit);
+ limit = min_t(u32, ctl->limit, maxdepth * maxflows);
+ maxflows = min_t(u32, maxflows, limit);
}
+ if (limit == 1) {
+ sch_tree_unlock(sch);
+ kfree(p);
+ NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+ return -EINVAL;
+ }
+
+ /* commit configuration */
+ q->limit = limit;
+ q->divisor = divisor;
+ q->headdrop = headdrop;
+ q->maxdepth = maxdepth;
+ q->maxflows = maxflows;
+ WRITE_ONCE(q->perturb_period, perturb_period);
+ q->quantum = quantum;
+ q->flags = flags;
+ if (p)
+ swap(q->red_parms, p);
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit) {
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 5da599ff84a9..d18a72df3654 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -7,10 +7,10 @@ menuconfig IP_SCTP
tristate "The SCTP Protocol"
depends on INET
depends on IPV6 || IPV6=n
+ select CRC32
select CRYPTO
select CRYPTO_HMAC
select CRYPTO_SHA1
- select LIBCRC32C
select NET_UDP_TUNNEL
help
Stream Control Transmission Protocol
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 36ee34f483d7..53725ee7ba06 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -72,8 +72,9 @@
/* Forward declarations for internal helper functions. */
static bool sctp_writeable(const struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc,
+ struct sctp_transport *transport,
+ long *timeo_p, size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1828,7 +1829,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+ err = sctp_wait_for_sndbuf(asoc, transport, &timeo, msg_len);
if (err)
goto err;
if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) {
@@ -9214,8 +9215,9 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
-static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc,
+ struct sctp_transport *transport,
+ long *timeo_p, size_t msg_len)
{
struct sock *sk = asoc->base.sk;
long current_timeo = *timeo_p;
@@ -9225,7 +9227,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
*timeo_p, msg_len);
- /* Increment the association's refcnt. */
+ /* Increment the transport and association's refcnt. */
+ if (transport)
+ sctp_transport_hold(transport);
sctp_association_hold(asoc);
/* Wait on the association specific sndbuf space. */
@@ -9234,7 +9238,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
TASK_INTERRUPTIBLE);
if (asoc->base.dead)
goto do_dead;
- if (!*timeo_p)
+ if ((!*timeo_p) || (transport && transport->dead))
goto do_nonblock;
if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
@@ -9259,7 +9263,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
out:
finish_wait(&asoc->wait, &wait);
- /* Release the association's refcnt. */
+ /* Release the transport and association's refcnt. */
+ if (transport)
+ sctp_transport_put(transport);
sctp_association_put(asoc);
return err;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 59675f6d9e7d..6946c1462793 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -117,6 +117,8 @@ fail:
*/
void sctp_transport_free(struct sctp_transport *transport)
{
+ transport->dead = 1;
+
/* Try to delete the heartbeat timer. */
if (timer_delete(&transport->hb_timer))
sctp_transport_put(transport);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 50c2e0846ea4..18be6ff4c3db 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1046,6 +1046,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
if (imp == TIPC_SYSTEM_IMPORTANCE) {
pr_warn("%s<%s>, link overflow", link_rst_msg, l->name);
+ __skb_queue_purge(list);
return -ENOBUFS;
}
rc = link_schedule_user(l, hdr);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index cb86b0bf9a53..a3ccb3135e51 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -852,6 +852,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
return do_tls_setsockopt(sk, optname, optval, optlen);
}
+static int tls_disconnect(struct sock *sk, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
struct tls_context *tls_ctx_create(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -947,6 +952,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_BASE][TLS_BASE] = *base;
prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt;
prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt;
+ prot[TLS_BASE][TLS_BASE].disconnect = tls_disconnect;
prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close;
prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig
index fae3598fd601..dc1d21de4ab7 100644
--- a/sound/soc/sof/intel/Kconfig
+++ b/sound/soc/sof/intel/Kconfig
@@ -345,6 +345,7 @@ endif ## SND_SOC_SOF_HDA_GENERIC
config SND_SOF_SOF_HDA_SDW_BPT
tristate
+ select SND_HDA_EXT_CORE
help
This option is not user-selectable but automagically handled by
'select' statements at a higher level.
diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..85aa327245c6 100644
--- a/tools/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
@@ -4,6 +4,8 @@
#include <asm-generic/int-ll64.h>
+#ifndef __ASSEMBLER__
+
/* copied from linux:include/uapi/linux/types.h */
#define __bitwise
typedef __u16 __bitwise __le16;
@@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 33d861c04ebd..3ce7b54003c2 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -522,7 +522,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
case INAT_PFX_REPNE:
if (modrm == 0xca)
/* eretu/erets */
- insn->type = INSN_CONTEXT_SWITCH;
+ insn->type = INSN_SYSRET;
break;
default:
if (modrm == 0xca)
@@ -535,11 +535,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
insn->type = INSN_JUMP_CONDITIONAL;
- } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
- op2 == 0x35) {
+ } else if (op2 == 0x05 || op2 == 0x34) {
- /* sysenter, sysret */
- insn->type = INSN_CONTEXT_SWITCH;
+ /* syscall, sysenter */
+ insn->type = INSN_SYSCALL;
+
+ } else if (op2 == 0x07 || op2 == 0x35) {
+
+ /* sysret, sysexit */
+ insn->type = INSN_SYSRET;
} else if (op2 == 0x0b || op2 == 0xb9) {
@@ -676,7 +680,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
case 0xca: /* retf */
case 0xcb: /* retf */
- insn->type = INSN_CONTEXT_SWITCH;
+ insn->type = INSN_SYSRET;
break;
case 0xe0: /* loopne */
@@ -721,7 +725,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
} else if (modrm_reg == 5) {
/* jmpf */
- insn->type = INSN_CONTEXT_SWITCH;
+ insn->type = INSN_SYSRET;
} else if (modrm_reg == 6) {
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 403e587676f1..06ca4a2659a4 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -126,7 +126,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
* indicates a rare GCC quirk/bug which can leave dead
* code behind.
*/
- if (reloc_type(text_reloc) == R_X86_64_PC32) {
+ if (!file->ignore_unreachables && reloc_type(text_reloc) == R_X86_64_PC32) {
WARN_INSN(insn, "ignoring unreachables due to jump table quirk");
file->ignore_unreachables = true;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4a1f6c3169b3..b649049b6a11 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3505,6 +3505,34 @@ next_orig:
return next_insn_same_sec(file, alt_group->orig_group->last_insn);
}
+static bool skip_alt_group(struct instruction *insn)
+{
+ struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL;
+
+ /* ANNOTATE_IGNORE_ALTERNATIVE */
+ if (insn->alt_group && insn->alt_group->ignore)
+ return true;
+
+ /*
+ * For NOP patched with CLAC/STAC, only follow the latter to avoid
+ * impossible code paths combining patched CLAC with unpatched STAC
+ * or vice versa.
+ *
+ * ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus
+ * requested not to do that to avoid hurting .s file readability
+ * around CLAC/STAC alternative sites.
+ */
+
+ if (!alt_insn)
+ return false;
+
+ /* Don't override ASM_{CLAC,STAC}_UNSAFE */
+ if (alt_insn->alt_group && alt_insn->alt_group->ignore)
+ return false;
+
+ return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC;
+}
+
/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
@@ -3625,7 +3653,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
}
- if (insn->alt_group && insn->alt_group->ignore)
+ if (skip_alt_group(insn))
return 0;
if (handle_insn_ops(insn, next_insn, &state))
@@ -3684,14 +3712,20 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
- case INSN_CONTEXT_SWITCH:
- if (func) {
- if (!next_insn || !next_insn->hint) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
- }
- break;
+ case INSN_SYSCALL:
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
+
+ break;
+
+ case INSN_SYSRET:
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
}
+
return 0;
case INSN_STAC:
@@ -3886,6 +3920,12 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
WARN_INSN(insn, "RET before UNTRAIN");
return 1;
+ case INSN_SYSCALL:
+ break;
+
+ case INSN_SYSRET:
+ return 0;
+
case INSN_NOP:
if (insn->retpoline_safe)
return 0;
@@ -3895,6 +3935,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
break;
}
+ if (insn->dead_end)
+ return 0;
+
if (!next) {
WARN_INSN(insn, "teh end!");
return 1;
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 089a1acc48a8..01ef6f415adf 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -19,7 +19,8 @@ enum insn_type {
INSN_CALL,
INSN_CALL_DYNAMIC,
INSN_RETURN,
- INSN_CONTEXT_SWITCH,
+ INSN_SYSCALL,
+ INSN_SYSRET,
INSN_BUG,
INSN_NOP,
INSN_STAC,
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 99bf905ade81..b74ed916057e 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
-\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other".
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
@@ -158,16 +158,22 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
.PP
-\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default.
+.PP
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default.
+.PP
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default.
.PP
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
+\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/*
+.PP
\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
@@ -199,6 +205,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics
\fBUncMHz\fP per-package uncore MHz, instantaneous sample.
.PP
\fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages.
+Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0.
+For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group.
.SH TOO MUCH INFORMATION EXAMPLE
By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 26057af6b5a1..0170d3cc6819 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -153,7 +153,7 @@ struct msr_counter bic[] = {
{ 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
- { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
@@ -206,6 +206,7 @@ struct msr_counter bic[] = {
{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -219,7 +220,7 @@ struct msr_counter bic[] = {
#define BIC_TSC_MHz (1ULL << 7)
#define BIC_IRQ (1ULL << 8)
#define BIC_SMI (1ULL << 9)
-#define BIC_sysfs (1ULL << 10)
+#define BIC_cpuidle (1ULL << 10)
#define BIC_CPU_c1 (1ULL << 11)
#define BIC_CPU_c3 (1ULL << 12)
#define BIC_CPU_c6 (1ULL << 13)
@@ -272,17 +273,20 @@ struct msr_counter bic[] = {
#define BIC_Sys_J (1ULL << 60)
#define BIC_NMI (1ULL << 61)
#define BIC_CPU_c1e (1ULL << 62)
-
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
-#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
-#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_pct_idle (1ULL << 63)
+
+#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
+#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle )
+#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle)
#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
@@ -1121,7 +1125,7 @@ end:
int backwards_count;
char *progname;
-#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
+#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
@@ -2211,7 +2215,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
-int probe_msr(int cpu, off_t offset)
+int probe_rapl_msr(int cpu, off_t offset, int index)
{
ssize_t retval;
unsigned long long value;
@@ -2220,13 +2224,22 @@ int probe_msr(int cpu, off_t offset)
retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
- /*
- * Expect MSRs to accumulate some non-zero value since the system was powered on.
- * Treat zero as a read failure.
- */
- if (retval != sizeof(value) || value == 0)
+ /* if the read failed, the probe fails */
+ if (retval != sizeof(value))
return 1;
+ /* If an Energy Status Counter MSR returns 0, the probe fails */
+ switch (index) {
+ case RAPL_RCI_INDEX_ENERGY_PKG:
+ case RAPL_RCI_INDEX_ENERGY_CORES:
+ case RAPL_RCI_INDEX_DRAM:
+ case RAPL_RCI_INDEX_GFX:
+ case RAPL_RCI_INDEX_ENERGY_PLATFORM:
+ if (value == 0)
+ return 1;
+ }
+
+ /* PKG,DRAM_PERF_STATUS MSRs, can return any value */
return 0;
}
@@ -2345,16 +2358,25 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
retval |= ~0;
break;
} else if (!strcmp(name_list, "topology")) {
- retval |= BIC_TOPOLOGY;
+ retval |= BIC_GROUP_TOPOLOGY;
break;
} else if (!strcmp(name_list, "power")) {
- retval |= BIC_THERMAL_PWR;
+ retval |= BIC_GROUP_THERMAL_PWR;
break;
} else if (!strcmp(name_list, "idle")) {
- retval |= BIC_IDLE;
+ retval |= BIC_GROUP_IDLE;
+ break;
+ } else if (!strcmp(name_list, "swidle")) {
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "hwidle")) {
+ retval |= BIC_GROUP_HW_IDLE;
break;
} else if (!strcmp(name_list, "frequency")) {
- retval |= BIC_FREQUENCY;
+ retval |= BIC_GROUP_FREQUENCY;
break;
} else if (!strcmp(name_list, "other")) {
retval |= BIC_OTHER;
@@ -2363,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
}
if (i == MAX_BIC) {
+ fprintf(stderr, "deferred %s\n", name_list);
if (mode == SHOW_LIST) {
deferred_add_names[deferred_add_index++] = name_list;
if (deferred_add_index >= MAX_DEFERRED) {
@@ -3476,7 +3499,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
- old->core_throt_cnt = new->core_throt_cnt;
+ old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
@@ -6030,6 +6053,7 @@ int snapshot_graphics(int idx)
int retval;
rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
switch (idx) {
case GFX_rc6:
@@ -6703,7 +6727,18 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/current_freq_khz", path_base);
sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
+ /*
+ * Once add_couter() is called, that counter is always read
+ * and reported -- So it is effectively (enabled & present).
+ * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz)
+ * is (enabled). Since we are in this routine, we
+ * know we will not probe and set (present) the legacy counter.
+ *
+ * This allows "--show/--hide UncMHz" to be effective for
+ * the clustered MHz counters, as a group.
+ */
+ if BIC_IS_ENABLED(BIC_UNCORE_MHZ)
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
if (quiet)
continue;
@@ -7896,7 +7931,7 @@ void rapl_perf_init(void)
rci->flags[cai->rci_index] = cai->flags;
/* Use MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
rci->msr[cai->rci_index] = cai->msr;
rci->msr_mask[cai->rci_index] = cai->msr_mask;
@@ -8034,7 +8069,7 @@ void msr_perf_init_(void)
cai->present = true;
/* User MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
cci->msr_mask[cai->rci_index] = cai->msr_mask;
@@ -8148,7 +8183,7 @@ void cstate_perf_init_(bool soft_c1)
/* User MSR for this counter */
} else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit
- && probe_msr(cpu, cai->msr) == 0) {
+ && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
}
@@ -9559,7 +9594,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2025.04.06 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -9592,7 +9627,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
for (mp = head; mp; mp = mp->next) {
if (debug)
fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
- if (!strncmp(name, mp->name, strlen(mp->name)))
+ if (!strcmp(name, mp->name))
return mp;
}
return NULL;
@@ -10239,14 +10274,18 @@ int is_deferred_skip(char *name)
return 0;
}
-void probe_sysfs(void)
+void probe_cpuidle_residency(void)
{
char path[64];
char name_buf[16];
FILE *input;
int state;
+ int min_state = 1024, max_state = 0;
char *sp;
+ if (!DO_BIC(BIC_pct_idle))
+ return;
+
for (state = 10; state >= 0; --state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
@@ -10269,14 +10308,32 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/time", state);
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
+
+ if (state > max_state)
+ max_state = state;
+ if (state < min_state)
+ min_state = state;
}
+}
+
+void probe_cpuidle_counts(void)
+{
+ char path[64];
+ char name_buf[16];
+ FILE *input;
+ int state;
+ int min_state = 1024, max_state = 0;
+ char *sp;
+
+ if (!DO_BIC(BIC_cpuidle))
+ return;
for (state = 10; state >= 0; --state) {
@@ -10286,26 +10343,52 @@ void probe_sysfs(void)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
- sp = strchr(name_buf, '-');
- if (!sp)
- sp = strchrnul(name_buf, '\n');
- *sp = '\0';
fclose(input);
remove_underbar(name_buf);
- sprintf(path, "cpuidle/state%d/usage", state);
-
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+
+ /*
+ * The 'below' sysfs file always contains 0 for the deepest state (largest index),
+ * do not add it.
+ */
+ if (state != max_state) {
+ /*
+ * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for
+ * the last state, so do not add it.
+ */
+
+ *sp = '+';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/below", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+
+ *sp = '\0';
+ sprintf(path, "cpuidle/state%d/usage", state);
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
- }
+ /*
+ * The 'above' sysfs file always contains 0 for the shallowest state (smallest
+ * index), do not add it.
+ */
+ if (state != min_state) {
+ *sp = '-';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/above", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+ }
}
/*
@@ -10549,7 +10632,8 @@ skip_cgroup_setting:
print_bootcmd();
}
- probe_sysfs();
+ probe_cpuidle_residency();
+ probe_cpuidle_counts();
if (!getuid())
set_rlimit();
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index da53a709773a..c176487356e6 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
test.log.extend(parse_diagnostic(lines))
if test.name != "" and not peek_test_name_match(lines, test):
test.add_error(printer, 'missing subtest result line!')
+ elif not lines:
+ print_log(test.log, printer)
+ test.status = TestStatus.NO_TESTS
+ test.add_error(printer, 'No more test results!')
else:
parse_test_result(lines, test, expected_num, printer)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 5ff4f6ffd873..bbba921e0eac 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -371,8 +371,8 @@ class KUnitParserTest(unittest.TestCase):
"""
result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
# Missing test results after test plan should alert a suspected test crash.
- self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status)
- self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1))
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2))
def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
return kunit_parser.LineStream(enumerate(strs, start=1))
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index cb24124ac5b9..674aaa02e396 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -4,7 +4,6 @@ gpiogpio-hammer
gpioinclude/
gpiolsgpio
kselftest_install/
-tpm2/SpaceTest.log
# Python bytecode and cache
__pycache__/
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5680befae8c6..5e713ef7caa3 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 400a696a0d21..a17256d9f88a 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus
# If isolated CPUs have been reserved at boot time (as shown in
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of
-# the tests may fail incorrectly. These pre-isolated CPUs should stay in
-# an isolated state throughout the testing process for now.
+# the tests may fail incorrectly. Wait a bit and retry again just in case
+# these isolated CPUs are leftover from previous run and have just been
+# cleaned up earlier in this script.
+#
+# These pre-isolated CPUs should stay in an isolated state throughout the
+# testing process for now.
#
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+[[ -n "$BOOT_ISOLCPUS" ]] && {
+ sleep 0.5
+ BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+}
if [[ -n "$BOOT_ISOLCPUS" ]]
then
[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi
+
cleanup()
{
online_cpus
cd $CGROUP2
- rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- rmdir test > /dev/null 2>&1
+ rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1
+ rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \
+ rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1
[[ -n "$SCHED_DEBUG" ]] &&
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
}
@@ -173,14 +183,22 @@ test_add_proc()
#
# Cgroup test hierarchy
#
-# root -- A1 -- A2 -- A3
-# +- B1
+# root
+# |
+# +------+------+
+# | |
+# A1 B1
+# |
+# A2
+# |
+# A3
#
# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
# C<l> = add cpu-list to cpuset.cpus
# X<l> = add cpu-list to cpuset.cpus.exclusive
# S<p> = use prefix in subtree_control
# T = put a task into cgroup
+# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive
# O<c>=<v> = Write <v> to CPU online file of <c>
#
# ECPUs - effective CPUs of cpusets
@@ -207,130 +225,129 @@ TEST_MATRIX=(
" C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
" C0-1 . . C2-3:P1 . . . C2 0 "
" C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
- "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
- "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
- "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
- "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
- "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3"
+ "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0"
+ "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2"
+ "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1"
+ "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# CPU offlining cases:
- " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3"
- "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
- "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
- "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
#
# Remote partition and cpuset.cpus.exclusive tests
#
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2"
" C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3"
# Nested remote/local partition tests
- " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \
- A1:P0,A2:P1,A3:P0,B1:P1"
- " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-4,3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \
- A1:P0,A2:P2,A3:P1 2-4,2-3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \
- A1:P0,A2:P2,A3:P1 2"
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P0|B1:P1"
+ " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \
+ A1:P0|A2:P2|A3:P1 2-4|2-3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \
+ A1:P0|A2:P2|A3:P1 2"
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
- . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \
- A1:P0,A2:P-2,A3:P-1"
+ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \
+ A1:P0|A2:P-2|A3:P-1 ."
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
- . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \
- A1:P0,A2:P2,A3:P-1 2-4"
+ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \
+ A1:P0|A2:P2|A3:P-1 2-4"
# Remote partition offline tests
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3,"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|"
# An invalidated remote partition cannot self-recover from hotplug
- " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2"
+ " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ."
# cpus.exclusive.effective clearing test
- " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:"
+ " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:"
# Invalid to valid remote partition transition test
- " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2"
+ " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ."
" C0-3:S+ C1-3:X3:P2
- . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3"
+ . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3"
# Invalid to valid local partition direct transition tests
- " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3"
- " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
- " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
- " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
- " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
+ " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
+ " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
# Local partition invalidation tests
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3"
+ . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
# Local partition CPU change tests
- " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
- " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
+ " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2"
+ " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3"
# cpus_allowed/exclusive_cpus update tests
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
- A1:P0,A3:P-2"
+ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \
+ A1:P0|A3:P-2 ."
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
- A1:P0,A3:P-2"
+ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \
+ A1:P0|A3:P-2 ."
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
- A1:P0,A3:P2 3"
+ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \
+ A1:P0|A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
- A1:P0,A3:P-2"
+ . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \
+ A1:P0|A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \
- A1:P0,A3:P-2"
+ . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \
+ A1:P0|A3:P-2"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
@@ -339,68 +356,127 @@ TEST_MATRIX=(
#
# Adding CPUs to partition root that are not in parent's
# cpuset.cpus is allowed, but those extra CPUs are ignored.
- "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1"
# Taking away all CPUs from parent or itself if there are tasks
# will make the partition invalid.
- "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
- " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1"
+ " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# Changing a partition root to member makes child partitions invalid
- "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
- "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+ "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1"
+ "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1"
# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
# as they overlap.
- "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
# Deletion of CPUs distributed to child cgroup is allowed.
- "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
+ "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5"
# To become a valid partition root, cpuset.cpus must overlap parent's
# cpuset.cpus.
- " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+ " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1"
# Enabling partition with child cpusets is allowed
- " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
+ " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1"
- # A partition root with non-partition root parent is invalid, but it
+ # A partition root with non-partition root parent is invalid| but it
# can be made valid if its parent becomes a partition root too.
- " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
- " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
+ " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
+ " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
# A non-exclusive cpuset.cpus change will invalidate partition and its siblings
- " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
- " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
- " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1"
# cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
- " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5"
# Child partition root that try to take all CPUs from parent partition
# with tasks will remain invalid.
- " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
- " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1"
- " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
+ " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+ " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1"
+ " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
# Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't
# affect cpuset.cpus.exclusive.effective.
- " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3"
+ " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3"
+
+ # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive
+ # but creating a local partition out of it is not allowed. Similarly and change
+ # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will
+ # invalidate it.
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \
+ A1:P0|A2:P2:B1:P1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
# A task cannot be added to a partition with no cpu
- "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1"
# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
- " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5"
# cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive
- " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5"
+)
+
+#
+# Cpuset controller remote partition test matrix.
+#
+# Cgroup test hierarchy
+#
+# root
+# |
+# rtest (cpuset.cpus.exclusive=1-7)
+# |
+# +------+------+
+# | |
+# p1 p2
+# +--+--+ +--+--+
+# | | | |
+# c11 c12 c21 c22
+#
+# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX.
+# Only CPUs 1-7 should be used.
+#
+REMOTE_TEST_MATRIX=(
+ # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22
+ # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS
+ # ------ ------ ------- ------- ------- ------- ----- ------ --------
+ " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \
+ . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \
+ c11:P2|c12:P2|c21:P2|c22:P2 1-6"
+ " CX1-4:S+ . X1-2:P2 C3 . . \
+ . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \
+ p1:P0|c11:P2|c12:P0 1-2"
+ " CX1-4:S+ . X1-2:P2 . . . \
+ X2-4 . . . . . p1:1,3-4|c11:2 \
+ p1:P0|c11:P2 2"
+ " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \
+ X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X2 . . . p1:1|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ # p1 as member, will get its effective CPUs from its parent rtest
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \
+ p1:P0|c11:P2|c12:P1 1"
+ " CX1-4:S+ X5-6:P1:S+ . . . . \
+ . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
+ p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \
+ 1-2,4-6|1-2,5-6"
)
#
@@ -453,25 +529,26 @@ set_ctrl_state()
PFILE=$CGRP/cpuset.cpus.partition
CFILE=$CGRP/cpuset.cpus
XFILE=$CGRP/cpuset.cpus.exclusive
- S=$(expr substr $CMD 1 1)
- if [[ $S = S ]]
- then
- PREFIX=${CMD#?}
+ case $CMD in
+ S*) PREFIX=${CMD#?}
COMM="echo ${PREFIX}${CTRL} > $SFILE"
eval $COMM $REDIRECT
- elif [[ $S = X ]]
- then
+ ;;
+ X*)
CPUS=${CMD#?}
COMM="echo $CPUS > $XFILE"
eval $COMM $REDIRECT
- elif [[ $S = C ]]
- then
- CPUS=${CMD#?}
+ ;;
+ CX*)
+ CPUS=${CMD#??}
+ COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
+ ;;
+ C*) CPUS=${CMD#?}
COMM="echo $CPUS > $CFILE"
eval $COMM $REDIRECT
- elif [[ $S = P ]]
- then
- VAL=${CMD#?}
+ ;;
+ P*) VAL=${CMD#?}
case $VAL in
0) VAL=member
;;
@@ -486,15 +563,17 @@ set_ctrl_state()
esac
COMM="echo $VAL > $PFILE"
eval $COMM $REDIRECT
- elif [[ $S = O ]]
- then
- VAL=${CMD#?}
+ ;;
+ O*) VAL=${CMD#?}
write_cpu_online $VAL
- elif [[ $S = T ]]
- then
- COMM="echo 0 > $TFILE"
+ ;;
+ T*) COMM="echo 0 > $TFILE"
eval $COMM $REDIRECT
- fi
+ ;;
+ *) echo "Unknown command: $CMD"
+ exit 1
+ ;;
+ esac
RET=$?
[[ $RET -ne 0 ]] && {
[[ -n "$SHOWERR" ]] && {
@@ -532,21 +611,18 @@ online_cpus()
}
#
-# Return 1 if the list of effective cpus isn't the same as the initial list.
+# Remove all the test cgroup directories
#
reset_cgroup_states()
{
echo 0 > $CGROUP2/cgroup.procs
online_cpus
- rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- pause 0.02
- set_ctrl_state . R-
- pause 0.01
+ rmdir $RESET_LIST > /dev/null 2>&1
}
dump_states()
{
- for DIR in . A1 A1/A2 A1/A2/A3 B1
+ for DIR in $CGROUP_LIST
do
CPUS=$DIR/cpuset.cpus
ECPUS=$DIR/cpuset.cpus.effective
@@ -566,17 +642,33 @@ dump_states()
}
#
+# Set the actual cgroup directory into $CGRP_DIR
+# $1 - cgroup name
+#
+set_cgroup_dir()
+{
+ CGRP_DIR=$1
+ [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2
+ [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3
+ [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11
+ [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12
+ [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21
+ [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22
+}
+
+#
# Check effective cpus
-# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]*
+# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]*
#
check_effective_cpus()
{
CHK_STR=$1
- for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
- CPUS=$2
+ EXPECTED_CPUS=$2
+ ACTUAL_CPUS=
if [[ $CGRP = X* ]]
then
CGRP=${CGRP#X}
@@ -584,41 +676,39 @@ check_effective_cpus()
else
FILE=cpuset.cpus.effective
fi
- [[ $CGRP = A2 ]] && CGRP=A1/A2
- [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
- [[ -e $CGRP/$FILE ]] || return 1
- [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1
+ set_cgroup_dir $CGRP
+ [[ -e $CGRP_DIR/$FILE ]] || return 1
+ ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE)
+ [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1
done
}
#
# Check cgroup states
-# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]*
+# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]*
#
check_cgroup_states()
{
CHK_STR=$1
- for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
- CGRP_DIR=$CGRP
- STATE=$2
+ EXPECTED_STATE=$2
FILE=
- EVAL=$(expr substr $STATE 2 2)
- [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2
- [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3
+ EVAL=$(expr substr $EXPECTED_STATE 2 2)
- case $STATE in
+ set_cgroup_dir $CGRP
+ case $EXPECTED_STATE in
P*) FILE=$CGRP_DIR/cpuset.cpus.partition
;;
- *) echo "Unknown state: $STATE!"
+ *) echo "Unknown state: $EXPECTED_STATE!"
exit 1
;;
esac
- VAL=$(cat $FILE)
+ ACTUAL_STATE=$(cat $FILE)
- case "$VAL" in
+ case "$ACTUAL_STATE" in
member) VAL=0
;;
root) VAL=1
@@ -642,7 +732,7 @@ check_cgroup_states()
[[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && {
DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective)
[[ -n "$DOMS" ]] &&
- echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE
+ echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE
}
done
return 0
@@ -665,22 +755,22 @@ check_cgroup_states()
#
check_isolcpus()
{
- EXPECT_VAL=$1
- ISOLCPUS=
+ EXPECTED_ISOLCPUS=$1
+ ISCPUS=${CGROUP2}/cpuset.cpus.isolated
+ ISOLCPUS=$(cat $ISCPUS)
LASTISOLCPU=
SCHED_DOMAINS=/sys/kernel/debug/sched/domains
- ISCPUS=${CGROUP2}/cpuset.cpus.isolated
- if [[ $EXPECT_VAL = . ]]
+ if [[ $EXPECTED_ISOLCPUS = . ]]
then
- EXPECT_VAL=
- EXPECT_VAL2=
- elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]]
+ EXPECTED_ISOLCPUS=
+ EXPECTED_SDOMAIN=
+ elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]]
then
- set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g")
- EXPECT_VAL=$1
- EXPECT_VAL2=$2
+ set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g")
+ EXPECTED_ISOLCPUS=$2
+ EXPECTED_SDOMAIN=$1
else
- EXPECT_VAL2=$EXPECT_VAL
+ EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS
fi
#
@@ -689,20 +779,21 @@ check_isolcpus()
# to make appending those CPUs easier.
#
[[ -n "$BOOT_ISOLCPUS" ]] && {
- EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
- EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
+ EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS}
+ EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS}
}
#
# Check cpuset.cpus.isolated cpumask
#
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && {
# Take a 50ms pause and try again
pause 0.05
ISOLCPUS=$(cat $ISCPUS)
}
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1
ISOLCPUS=
+ EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN
#
# Use the sched domain in debugfs to check isolated CPUs, if available
@@ -736,7 +827,7 @@ check_isolcpus()
done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
- [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
+ [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]]
}
test_fail()
@@ -774,6 +865,63 @@ null_isolcpus_check()
}
#
+# Check state transition test result
+# $1 - Test number
+# $2 - Expected effective CPU values
+# $3 - Expected partition states
+# $4 - Expected isolated CPUs
+#
+check_test_results()
+{
+ _NR=$1
+ _ECPUS="$2"
+ _PSTATES="$3"
+ _ISOLCPUS="$4"
+
+ [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && {
+ check_effective_cpus $_ECPUS
+ [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \
+ "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS"
+ }
+
+ [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && {
+ check_cgroup_states $_PSTATES
+ [[ $? -ne 0 ]] && test_fail $_NR states \
+ "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE"
+ }
+
+ # Compare the expected isolated CPUs with the actual ones,
+ # if available
+ [[ -n "$_ISOLCPUS" ]] && {
+ check_isolcpus $_ISOLCPUS
+ [[ $? -ne 0 ]] && {
+ [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS}
+ test_fail $_NR "isolated CPU" \
+ "Expect $_ISOLCPUS, get $ISOLCPUS instead"
+ }
+ }
+ reset_cgroup_states
+ #
+ # Check to see if effective cpu list changes
+ #
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ RETRY=0
+ while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]]
+ do
+ # Wait a bit longer & recheck a few times
+ pause 0.02
+ ((RETRY++))
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ done
+ [[ $_NEWLIST != $CPULIST ]] && {
+ echo "Effective cpus changed to $_NEWLIST after test $_NR!"
+ exit 1
+ }
+ null_isolcpus_check
+ [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+}
+
+#
# Run cpuset state transition test
# $1 - test matrix name
#
@@ -785,6 +933,8 @@ run_state_test()
{
TEST=$1
CONTROLLER=cpuset
+ CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1"
+ RESET_LIST="A1/A2/A3 A1/A2 A1 B1"
I=0
eval CNT="\${#$TEST[@]}"
@@ -812,10 +962,11 @@ run_state_test()
STATES=${11}
ICPUS=${12}
- set_ctrl_state_noerr B1 $OLD_B1
set_ctrl_state_noerr A1 $OLD_A1
set_ctrl_state_noerr A1/A2 $OLD_A2
set_ctrl_state_noerr A1/A2/A3 $OLD_A3
+ set_ctrl_state_noerr B1 $OLD_B1
+
RETVAL=0
set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
@@ -824,47 +975,79 @@ run_state_test()
[[ $RETVAL -ne $RESULT ]] && test_fail $I result
- [[ -n "$ECPUS" && "$ECPUS" != . ]] && {
- check_effective_cpus $ECPUS
- [[ $? -ne 0 ]] && test_fail $I "effective CPU"
- }
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
+ ((I++))
+ done
+ echo "All $I tests of $TEST PASSED."
+}
- [[ -n "$STATES" && "$STATES" != . ]] && {
- check_cgroup_states $STATES
- [[ $? -ne 0 ]] && test_fail $I states
- }
+#
+# Run cpuset remote partition state transition test
+# $1 - test matrix name
+#
+run_remote_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ [[ -d rtest ]] || mkdir rtest
+ cd rtest
+ echo +cpuset > cgroup.subtree_control
+ echo "1-7" > cpuset.cpus
+ echo "1-7" > cpuset.cpus.exclusive
+ CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22"
+ RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2"
+ I=0
+ eval CNT="\${#$TEST[@]}"
- # Compare the expected isolated CPUs with the actual ones,
- # if available
- [[ -n "$ICPUS" ]] && {
- check_isolcpus $ICPUS
- [[ $? -ne 0 ]] && {
- [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
- test_fail $I "isolated CPU" \
- "Expect $ICPUS, get $ISOLCPUS instead"
- }
- }
- reset_cgroup_states
- #
- # Check to see if effective cpu list changes
- #
- NEWLIST=$(cat cpuset.cpus.effective)
- RETRY=0
- while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]]
- do
- # Wait a bit longer & recheck a few times
- pause 0.02
- ((RETRY++))
- NEWLIST=$(cat cpuset.cpus.effective)
- done
- [[ $NEWLIST != $CPULIST ]] && {
- echo "Effective cpus changed to $NEWLIST after test $I!"
- exit 1
+ reset_cgroup_states
+ console_msg "Running remote partition state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > $CONSOLE
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
}
- null_isolcpus_check
- [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+ eval set -- "\${$TEST[$I]}"
+ OLD_p1=$1
+ OLD_p2=$2
+ OLD_c11=$3
+ OLD_c12=$4
+ OLD_c21=$5
+ OLD_c22=$6
+ NEW_p1=$7
+ NEW_p2=$8
+ NEW_c11=$9
+ NEW_c12=${10}
+ NEW_c21=${11}
+ NEW_c22=${12}
+ ECPUS=${13}
+ STATES=${14}
+ ICPUS=${15}
+
+ set_ctrl_state_noerr p1 $OLD_p1
+ set_ctrl_state_noerr p2 $OLD_p2
+ set_ctrl_state_noerr p1/c11 $OLD_c11
+ set_ctrl_state_noerr p1/c12 $OLD_c12
+ set_ctrl_state_noerr p2/c21 $OLD_c21
+ set_ctrl_state_noerr p2/c22 $OLD_c22
+
+ RETVAL=0
+ set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?))
+ set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?))
+ set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?))
+ set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?))
+ set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?))
+ set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?))
+
+ [[ $RETVAL -ne 0 ]] && test_fail $I result
+
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
((I++))
done
+ cd ..
+ rmdir rtest
echo "All $I tests of $TEST PASSED."
}
@@ -932,6 +1115,7 @@ test_isolated()
echo $$ > $CGROUP2/cgroup.procs
[[ -d A1 ]] && rmdir A1
null_isolcpus_check
+ pause 0.05
}
#
@@ -997,10 +1181,13 @@ test_inotify()
else
echo "Inotify test PASSED"
fi
+ echo member > cpuset.cpus.partition
+ echo "" > cpuset.cpus
}
trap cleanup 0 2 3 6
run_state_test TEST_MATRIX
+run_remote_state_test REMOTE_TEST_MATRIX
test_isolated
test_inotify
echo "All tests PASSED."
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index 8b7f6acad15f..7c90a040ce45 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -6,7 +6,7 @@ import os
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import defer, ethtool, ip
+from lib.py import defer, ethtool, ip, random
def _get_hds_mode(cfg, netnl) -> str:
@@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None:
ksft_eq(0, rings['hds-thresh'])
+def set_hds_thresh_random(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+
+ if rings['hds-thresh-max'] < 2:
+ raise KsftSkipEx('hds-thresh-max is too small')
+ elif rings['hds-thresh-max'] == 2:
+ hds_thresh = 1
+ else:
+ while True:
+ hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1)
+ if hds_thresh != rings['hds-thresh']:
+ break
+
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(hds_thresh, rings['hds-thresh'])
+
def set_hds_thresh_max(cfg, netnl) -> None:
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
@@ -243,6 +273,7 @@ def main() -> None:
get_hds_thresh,
set_hds_disable,
set_hds_enable,
+ set_hds_thresh_random,
set_hds_thresh_zero,
set_hds_thresh_max,
set_hds_thresh_gt,
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 7d7a6a06cdb7..2d8230da9064 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -98,7 +98,7 @@ int main(int argc, char *argv[])
info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
if (!res || errno != EWOULDBLOCK) {
- ksft_test_result_pass("futex_waitv returned: %d %s\n",
+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
ret = RET_FAIL;
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index 45b5570441ce..b1f40857307d 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_CRYPTO_SEQIV=y
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f773f8f99249..f62b0a5aba35 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
@@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
TEST_GEN_PROGS_x86 += memslot_modification_stress_test
TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
@@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
TEST_GEN_PROGS_arm64 += memslot_perf_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
TEST_GEN_PROGS_riscv += steal_time
SPLIT_TESTS += arch_timer
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..dc6559dad9d8 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -199,7 +199,7 @@ static bool guest_set_ha(void)
if (hadbs == 0)
return false;
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..b0fc0f945766 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -62,6 +62,67 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -102,12 +163,6 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
uint32_t *ipa16k, uint32_t *ipa64k);
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 7ba3aa3755f3..9d69904cb608 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
uint64_t pte;
if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
} else {
- pte = pa & GENMASK(47, vm->page_shift);
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
}
pte |= attrs;
@@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
uint64_t pa;
if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
} else {
- pa = pte & GENMASK(47, vm->page_shift);
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
}
return pa;
@@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t flags)
{
- uint8_t attr_idx = flags & 7;
+ uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ uint64_t pg_attr;
uint64_t *ptep;
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -167,7 +171,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, paddr, pg_attr);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -293,20 +301,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P48V48_64K:
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= TCR_TG0_64K;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ tcr_el1 |= TCR_TG0_16K;
break;
case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= TCR_TG0_4K;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
@@ -319,35 +327,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P52V48_4K:
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ tcr_el1 |= TCR_IPS_52_BITS;
ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ tcr_el1 |= TCR_IPS_48_BITS;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ tcr_el1 |= TCR_IPS_40_BITS;
break;
case VM_MODE_P36V48_4K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V48_64K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ tcr_el1 |= TCR_IPS_36_BITS;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
+ tcr_el1 |= TCR_DS;
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 279ad8946040..815bc45dd8dc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2019,9 +2019,8 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
};
/*
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..1375fca80bcd 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,25 +196,27 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
+ case 'l':
+ latency = atoi_paranoid(optarg);
break;
case 'h':
default:
@@ -243,6 +245,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +294,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +312,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..390ae2d87493 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -35,11 +38,19 @@ do { \
testcase, vector); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
@@ -74,56 +72,64 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
- TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index e949a43a6145..efabfcbe0b49 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -261,9 +261,6 @@ TEST(check_file_mmap)
TH_LOG("No read-ahead pages found in memory");
}
- EXPECT_LT(i, vec_size) {
- TH_LOG("Read-ahead pages reached the end of the file");
- }
/*
* End of the readahead window. The rest of the pages shouldn't
* be in memory.
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 13a3b68181ee..befa66f5a366 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1441,6 +1441,15 @@ chk_join_nr()
fi
fi
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack HMAC"
+ fail_test "got $count JOIN[s] synack HMAC failure expected 0"
+ fi
+
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
@@ -1450,6 +1459,15 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack rx expected $ack_nr"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "ack HMAC"
+ fail_test "got $count JOIN[s] ack HMAC failure expected 0"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 47088b005390..1f5979c1510c 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -387,6 +387,25 @@ race_repeat 0
perf_duration 0
"
+
+TYPE_avx2_mismatch="
+display avx2 false match
+type_spec inet_proto . ipv6_addr
+chain_spec meta l4proto . ip6 daddr
+dst proto addr6
+src
+start 1
+count 1
+src_delta 1
+tools ping
+proto icmp6
+
+race_repeat 0
+
+perf_duration 0
+"
+
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1629,6 +1648,24 @@ test_bug_net_port_proto_match() {
nft flush ruleset
}
+test_bug_avx2_mismatch()
+{
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001"
+ local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001"
+
+ nft "add element inet filter test { icmpv6 . $a1 }"
+
+ dst_addr6="$a2"
+ send_icmp6
+
+ if [ "$(count_packets)" -gt "0" ]; then
+ err "False match for $a2"
+ return 1
+ fi
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 9a85f93c33d8..5ded3b3a7538 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1753,6 +1753,42 @@ TEST_F(tls_basic, rekey_tx)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls_basic, disconnect)
+{
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ struct tls_crypto_info_keys key;
+ struct sockaddr_in addr;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &key, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ /* Pre-queue the data so that setsockopt parses it but doesn't
+ * dequeue it from the TCP socket. recvmsg would dequeue.
+ */
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ addr.sin_family = AF_UNSPEC;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+ ret = connect(self->cfd, &addr, sizeof(addr));
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+}
+
TEST_F(tls, rekey)
{
char const *test_str_1 = "test_message_before_rekey";
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 25454fd95537..d4ea9cd845a3 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -158,5 +158,160 @@
"$TC qdisc del dev $DUMMY handle 1: root",
"$IP addr del 10.10.10.10/24 dev $DUMMY || true"
]
+ },
+ {
+ "id": "a4bb",
+ "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root htb default 10",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4be",
+ "name": "Test FQ_CODEL with QFQ parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4bf",
+ "name": "Test FQ_CODEL with HFSC parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4c0",
+ "name": "Test FQ_CODEL with DRR parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root drr",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4c1",
+ "name": "Test FQ_CODEL with ETS parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "ets"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1",
+ "$TC class change dev $DUMMY parent 1: classid 1:1 ets",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 50e8d72781cb..28c6ce6da7db 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -228,5 +228,41 @@
"matchCount": "0",
"teardown": [
]
+ },
+ {
+ "id": "7f8f",
+ "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 flows 1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "5168",
+ "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 divisor 1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tpm2/.gitignore b/tools/testing/selftests/tpm2/.gitignore
new file mode 100644
index 000000000000..6d6165c5e35d
--- /dev/null
+++ b/tools/testing/selftests/tpm2/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+AsyncTest.log
+SpaceTest.log
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 168f4b166234..3a60e6c6f5c9 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -6,6 +6,6 @@ ksft_skip=4
[ -e /dev/tpm0 ] || exit $ksft_skip
read tpm_version < /sys/class/tpm/tpm0/tpm_version_major
-[ "$tpm_version" == 2 ] || exit $ksft_skip
+[ "$tpm_version" = 2 ] || exit $ksft_skip
python3 -m unittest -v tpm2_tests.SmokeTest 2>&1
diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh
new file mode 100755
index 000000000000..1f2b642381d1
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_04.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_04"
+ERR_CODE=0
+
+_prep_test "stripe" "mkfs & mount & umount on zero copy"
+
+backfile_0=$(_create_backfile 256M)
+backfile_1=$(_create_backfile 256M)
+dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1")
+_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+
+_remove_backfile "$backfile_0"
+_remove_backfile "$backfile_1"
+
+_show_result $TID $ERR_CODE
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 746e1f466aa6..727b542074e7 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -75,7 +75,7 @@ config KVM_COMPAT
depends on KVM && COMPAT && !(S390 || ARM64 || RISCV)
config HAVE_KVM_IRQ_BYPASS
- bool
+ tristate
select IRQ_BYPASS_MANAGER
config HAVE_KVM_VCPU_ASYNC_IOCTL
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 249ba5b72e9b..11e5d1e3f12e 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -149,7 +149,7 @@ irqfd_shutdown(struct work_struct *work)
/*
* It is now safe to release the object's resources
*/
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
irq_bypass_unregister_consumer(&irqfd->consumer);
#endif
eventfd_ctx_put(irqfd->eventfd);
@@ -274,7 +274,7 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
write_seqcount_end(&irqfd->irq_entry_sc);
}
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
struct irq_bypass_consumer *cons)
{
@@ -424,7 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
if (events & EPOLLIN)
schedule_work(&irqfd->inject);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
if (kvm_arch_has_irq_bypass()) {
irqfd->consumer.token = (void *)irqfd->eventfd;
irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
@@ -609,14 +609,14 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
/* Under irqfds.lock, so can read irq_entry safely */
struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
#endif
irqfd_update(kvm, irqfd);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
if (irqfd->producer &&
kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
int ret = kvm_arch_update_irqfd_routing(