summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt3
-rw-r--r--Documentation/devicetree/bindings/dma/arm,dma-350.yaml44
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,edma.yaml4
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml2
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml2
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml107
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml5
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,tphy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml16
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt84
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml49
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml8
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml4
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-pcie-phy.yaml45
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml116
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt36
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,exynos2200-eusb2-phy.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml40
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml4
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml1
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,s32g-rtc.yaml72
-rw-r--r--Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml6
-rw-r--r--Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml8
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.yaml13
-rw-r--r--Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml2
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst4
-rw-r--r--Documentation/netlink/specs/rt-link.yaml68
-rw-r--r--Documentation/rust/coding-guidelines.rst29
-rw-r--r--Documentation/rust/quick-start.rst44
-rw-r--r--Documentation/rust/testing.rst80
-rw-r--r--Documentation/virt/uml/user_mode_linux_howto_v2.rst47
-rw-r--r--MAINTAINERS42
-rw-r--r--arch/arm/include/asm/simd.h3
-rw-r--r--arch/arm/mm/ioremap.c4
-rw-r--r--arch/arm/vfp/vfpmodule.c1
-rw-r--r--arch/arm64/boot/dts/renesas/r9a09g057.dtsi165
-rw-r--r--arch/arm64/include/asm/el2_setup.h24
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h2
-rw-r--r--arch/arm64/include/asm/tlbflush.h9
-rw-r--r--arch/arm64/kernel/cpufeature.c7
-rw-r--r--arch/arm64/kernel/cpuinfo.c7
-rw-r--r--arch/arm64/kernel/image-vars.h17
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c3
-rw-r--r--arch/um/Kconfig6
-rw-r--r--arch/um/configs/i386_defconfig7
-rw-r--r--arch/um/configs/x86_64_defconfig7
-rw-r--r--arch/um/drivers/Kconfig204
-rw-r--r--arch/um/drivers/Makefile22
-rw-r--r--arch/um/drivers/chan_kern.c10
-rw-r--r--arch/um/drivers/daemon.h29
-rw-r--r--arch/um/drivers/daemon_kern.c95
-rw-r--r--arch/um/drivers/daemon_user.c194
-rw-r--r--arch/um/drivers/net_kern.c889
-rw-r--r--arch/um/drivers/net_user.c271
-rw-r--r--arch/um/drivers/slip.h21
-rw-r--r--arch/um/drivers/slip_common.c55
-rw-r--r--arch/um/drivers/slip_common.h106
-rw-r--r--arch/um/drivers/slip_kern.c93
-rw-r--r--arch/um/drivers/slip_user.c252
-rw-r--r--arch/um/drivers/slirp.h34
-rw-r--r--arch/um/drivers/slirp_kern.c120
-rw-r--r--arch/um/drivers/slirp_user.c124
-rw-r--r--arch/um/drivers/umcast.h27
-rw-r--r--arch/um/drivers/umcast_kern.c188
-rw-r--r--arch/um/drivers/umcast_user.c184
-rw-r--r--arch/um/drivers/vde.h32
-rw-r--r--arch/um/drivers/vde_kern.c129
-rw-r--r--arch/um/drivers/vde_user.c125
-rw-r--r--arch/um/drivers/vector_kern.c48
-rw-r--r--arch/um/drivers/vfio_kern.c642
-rw-r--r--arch/um/drivers/vfio_user.c327
-rw-r--r--arch/um/drivers/vfio_user.h44
-rw-r--r--arch/um/drivers/virt-pci.c15
-rw-r--r--arch/um/drivers/xterm.c11
-rw-r--r--arch/um/include/asm/asm-prototypes.h5
-rw-r--r--arch/um/include/asm/irq.h5
-rw-r--r--arch/um/include/asm/mmu.h3
-rw-r--r--arch/um/include/shared/common-offsets.h4
-rw-r--r--arch/um/include/shared/irq_user.h2
-rw-r--r--arch/um/include/shared/net_kern.h69
-rw-r--r--arch/um/include/shared/net_user.h52
-rw-r--r--arch/um/include/shared/os.h4
-rw-r--r--arch/um/include/shared/skas/mm_id.h9
-rw-r--r--arch/um/include/shared/skas/skas.h1
-rw-r--r--arch/um/include/shared/skas/stub-data.h20
-rw-r--r--arch/um/kernel/Makefile1
-rw-r--r--arch/um/kernel/ioport.c13
-rw-r--r--arch/um/kernel/irq.c6
-rw-r--r--arch/um/kernel/skas/mmu.c89
-rw-r--r--arch/um/kernel/skas/stub.c130
-rw-r--r--arch/um/kernel/skas/stub_exe.c159
-rw-r--r--arch/um/kernel/time.c13
-rw-r--r--arch/um/kernel/trap.c130
-rw-r--r--arch/um/os-Linux/Makefile2
-rw-r--r--arch/um/os-Linux/drivers/Makefile13
-rw-r--r--arch/um/os-Linux/drivers/etap.h21
-rw-r--r--arch/um/os-Linux/drivers/ethertap_kern.c100
-rw-r--r--arch/um/os-Linux/drivers/ethertap_user.c248
-rw-r--r--arch/um/os-Linux/drivers/tuntap.h21
-rw-r--r--arch/um/os-Linux/drivers/tuntap_kern.c86
-rw-r--r--arch/um/os-Linux/drivers/tuntap_user.c215
-rw-r--r--arch/um/os-Linux/file.c15
-rw-r--r--arch/um/os-Linux/internal.h5
-rw-r--r--arch/um/os-Linux/process.c31
-rw-r--r--arch/um/os-Linux/registers.c4
-rw-r--r--arch/um/os-Linux/sigio.c3
-rw-r--r--arch/um/os-Linux/signal.c19
-rw-r--r--arch/um/os-Linux/skas/mem.c103
-rw-r--r--arch/um/os-Linux/skas/process.c482
-rw-r--r--arch/um/os-Linux/start_up.c195
-rw-r--r--arch/x86/um/asm/checksum.h3
-rw-r--r--arch/x86/um/asm/processor.h8
-rw-r--r--arch/x86/um/os-Linux/mcontext.c218
-rw-r--r--arch/x86/um/ptrace.c76
-rw-r--r--arch/x86/um/shared/sysdep/kernel-offsets.h2
-rw-r--r--arch/x86/um/shared/sysdep/mcontext.h9
-rw-r--r--arch/x86/um/shared/sysdep/stub-data.h23
-rw-r--r--arch/x86/um/shared/sysdep/stub.h2
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h13
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h17
-rw-r--r--arch/x86/um/tls_32.c26
-rw-r--r--drivers/base/power/main.c16
-rw-r--r--drivers/bluetooth/btnxpuart.c2
-rw-r--r--drivers/bluetooth/hci_qca.c14
-rw-r--r--drivers/dma/Kconfig8
-rw-r--r--drivers/dma/Makefile1
-rw-r--r--drivers/dma/amd/ptdma/ptdma-dmaengine.c23
-rw-r--r--drivers/dma/amd/ptdma/ptdma.h1
-rw-r--r--drivers/dma/arm-dma350.c660
-rw-r--r--drivers/dma/at_xdmac.c6
-rw-r--r--drivers/dma/dw-edma/dw-edma-pcie.c5
-rw-r--r--drivers/dma/fsl-edma-common.c30
-rw-r--r--drivers/dma/fsl-edma-common.h18
-rw-r--r--drivers/dma/fsl-edma-main.c114
-rw-r--r--drivers/dma/fsldma.c20
-rw-r--r--drivers/dma/fsldma.h1
-rw-r--r--drivers/dma/idxd/cdev.c10
-rw-r--r--drivers/dma/idxd/idxd.h2
-rw-r--r--drivers/dma/idxd/sysfs.c6
-rw-r--r--drivers/dma/sh/rz-dmac.c84
-rw-r--r--drivers/dma/tegra210-adma.c185
-rw-r--r--drivers/dma/ti/k3-udma.c3
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c4
-rw-r--r--drivers/irqchip/irq-renesas-rzv2h.c35
-rw-r--r--drivers/net/can/kvaser_pciefd.c3
-rw-r--r--drivers/net/dsa/b53/b53_common.c58
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c2
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.h1
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c54
-rw-r--r--drivers/net/ethernet/airoha/airoha_regs.h10
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic3/hinic3_main.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h1
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c29
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c289
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c47
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c181
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c18
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c9
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c45
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.h8
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.h1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_est.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c2
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_stats.c8
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c2
-rw-r--r--drivers/net/hyperv/netvsc_drv.c4
-rw-r--r--drivers/net/ovpn/io.c8
-rw-r--r--drivers/net/ovpn/netlink.c16
-rw-r--r--drivers/net/ovpn/peer.c4
-rw-r--r--drivers/net/ovpn/socket.c68
-rw-r--r--drivers/net/ovpn/socket.h4
-rw-r--r--drivers/net/ovpn/tcp.c73
-rw-r--r--drivers/net/ovpn/tcp.h3
-rw-r--r--drivers/net/ovpn/udp.c46
-rw-r--r--drivers/net/ovpn/udp.h4
-rw-r--r--drivers/net/usb/aqc111.c8
-rw-r--r--drivers/net/usb/ch9200.c7
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c26
-rw-r--r--drivers/net/wireguard/device.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/fw.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mld.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c2
-rw-r--r--drivers/net/wwan/mhi_wwan_mbim.c9
-rw-r--r--drivers/net/wwan/t7xx/t7xx_netdev.c11
-rw-r--r--drivers/phy/Kconfig8
-rw-r--r--drivers/phy/Makefile1
-rw-r--r--drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c10
-rw-r--r--drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c10
-rw-r--r--drivers/phy/amlogic/phy-meson-axg-pcie.c14
-rw-r--r--drivers/phy/amlogic/phy-meson-g12a-usb2.c10
-rw-r--r--drivers/phy/amlogic/phy-meson-gxl-usb2.c11
-rw-r--r--drivers/phy/amlogic/phy-meson8b-usb2.c35
-rw-r--r--drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c61
-rw-r--r--drivers/phy/broadcom/phy-brcm-usb-init.h1
-rw-r--r--drivers/phy/broadcom/phy-brcm-usb.c14
-rw-r--r--drivers/phy/freescale/phy-fsl-imx8m-pcie.c21
-rw-r--r--drivers/phy/freescale/phy-fsl-imx8mq-usb.c84
-rw-r--r--drivers/phy/freescale/phy-fsl-samsung-hdmi.c117
-rw-r--r--drivers/phy/marvell/Kconfig4
-rw-r--r--drivers/phy/mediatek/phy-mtk-xsphy.c85
-rw-r--r--drivers/phy/phy-snps-eusb2.c627
-rw-r--r--drivers/phy/qualcomm/Kconfig9
-rw-r--r--drivers/phy/qualcomm/Makefile1
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-pcie.c90
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-usb.c6
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qusb2.c27
-rw-r--r--drivers/phy/qualcomm/phy-qcom-snps-eusb2.c442
-rw-r--r--drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c45
-rw-r--r--drivers/phy/renesas/phy-rcar-gen3-usb2.c38
-rw-r--r--drivers/phy/rockchip/phy-rockchip-inno-usb2.c81
-rw-r--r--drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c294
-rw-r--r--drivers/phy/samsung/Kconfig2
-rw-r--r--drivers/phy/samsung/phy-exynos5-usbdrd.c617
-rw-r--r--drivers/phy/tegra/Kconfig2
-rw-r--r--drivers/phy/xilinx/phy-zynqmp.c70
-rw-r--r--drivers/rtc/Kconfig25
-rw-r--r--drivers/rtc/Makefile2
-rw-r--r--drivers/rtc/class.c2
-rw-r--r--drivers/rtc/interface.c2
-rw-r--r--drivers/rtc/lib.c24
-rw-r--r--drivers/rtc/lib_test.c27
-rw-r--r--drivers/rtc/rtc-at91rm9200.c1
-rw-r--r--drivers/rtc/rtc-cpcap.c1
-rw-r--r--drivers/rtc/rtc-cv1800.c218
-rw-r--r--drivers/rtc/rtc-da9063.c31
-rw-r--r--drivers/rtc/rtc-jz4740.c1
-rw-r--r--drivers/rtc/rtc-loongson.c8
-rw-r--r--drivers/rtc/rtc-m41t80.c78
-rw-r--r--drivers/rtc/rtc-mt6397.c1
-rw-r--r--drivers/rtc/rtc-pcf8563.c2
-rw-r--r--drivers/rtc/rtc-pm8xxx.c18
-rw-r--r--drivers/rtc/rtc-rzn1.c71
-rw-r--r--drivers/rtc/rtc-s32g.c385
-rw-r--r--drivers/rtc/rtc-s3c.c1
-rw-r--r--drivers/rtc/rtc-sh.c285
-rw-r--r--drivers/rtc/rtc-stm32.c1
-rw-r--r--drivers/soundwire/bus.c31
-rw-r--r--drivers/soundwire/bus_type.c10
-rw-r--r--drivers/soundwire/generic_bandwidth_allocation.c7
-rw-r--r--drivers/soundwire/intel.h2
-rw-r--r--drivers/soundwire/intel_ace2x_debugfs.c6
-rw-r--r--drivers/soundwire/intel_init.c1
-rw-r--r--drivers/soundwire/irq.c6
-rw-r--r--fs/bcachefs/alloc_background.c79
-rw-r--r--fs/bcachefs/alloc_background.h9
-rw-r--r--fs/bcachefs/alloc_foreground.c108
-rw-r--r--fs/bcachefs/alloc_foreground.h8
-rw-r--r--fs/bcachefs/backpointers.c72
-rw-r--r--fs/bcachefs/backpointers.h5
-rw-r--r--fs/bcachefs/bcachefs.h72
-rw-r--r--fs/bcachefs/btree_cache.c24
-rw-r--r--fs/bcachefs/btree_gc.c57
-rw-r--r--fs/bcachefs/btree_io.c43
-rw-r--r--fs/bcachefs/btree_iter.c78
-rw-r--r--fs/bcachefs/btree_iter.h31
-rw-r--r--fs/bcachefs/btree_journal_iter.c19
-rw-r--r--fs/bcachefs/btree_key_cache.c28
-rw-r--r--fs/bcachefs/btree_locking.c56
-rw-r--r--fs/bcachefs/btree_node_scan.c2
-rw-r--r--fs/bcachefs/btree_trans_commit.c36
-rw-r--r--fs/bcachefs/btree_types.h2
-rw-r--r--fs/bcachefs/btree_update.c59
-rw-r--r--fs/bcachefs/btree_update.h14
-rw-r--r--fs/bcachefs/btree_update_interior.c104
-rw-r--r--fs/bcachefs/btree_write_buffer.c6
-rw-r--r--fs/bcachefs/buckets.c163
-rw-r--r--fs/bcachefs/buckets.h12
-rw-r--r--fs/bcachefs/buckets_waiting_for_journal.c3
-rw-r--r--fs/bcachefs/chardev.c9
-rw-r--r--fs/bcachefs/checksum.c8
-rw-r--r--fs/bcachefs/clock.c47
-rw-r--r--fs/bcachefs/clock.h1
-rw-r--r--fs/bcachefs/compress.c20
-rw-r--r--fs/bcachefs/darray.h46
-rw-r--r--fs/bcachefs/data_update.c174
-rw-r--r--fs/bcachefs/debug.c30
-rw-r--r--fs/bcachefs/dirent.c169
-rw-r--r--fs/bcachefs/dirent.h16
-rw-r--r--fs/bcachefs/disk_accounting.c38
-rw-r--r--fs/bcachefs/disk_accounting.h6
-rw-r--r--fs/bcachefs/disk_groups.c37
-rw-r--r--fs/bcachefs/ec.c108
-rw-r--r--fs/bcachefs/errcode.c4
-rw-r--r--fs/bcachefs/errcode.h15
-rw-r--r--fs/bcachefs/error.c93
-rw-r--r--fs/bcachefs/error.h12
-rw-r--r--fs/bcachefs/extents.c63
-rw-r--r--fs/bcachefs/fs-io-buffered.c30
-rw-r--r--fs/bcachefs/fs-io-pagecache.c2
-rw-r--r--fs/bcachefs/fs-io.c12
-rw-r--r--fs/bcachefs/fs-ioctl.c4
-rw-r--r--fs/bcachefs/fs.c40
-rw-r--r--fs/bcachefs/fsck.c149
-rw-r--r--fs/bcachefs/fsck.h6
-rw-r--r--fs/bcachefs/inode.c86
-rw-r--r--fs/bcachefs/inode.h9
-rw-r--r--fs/bcachefs/io_misc.c2
-rw-r--r--fs/bcachefs/io_read.c35
-rw-r--r--fs/bcachefs/io_read.h6
-rw-r--r--fs/bcachefs/io_write.c26
-rw-r--r--fs/bcachefs/journal.c117
-rw-r--r--fs/bcachefs/journal.h5
-rw-r--r--fs/bcachefs/journal_io.c281
-rw-r--r--fs/bcachefs/journal_io.h1
-rw-r--r--fs/bcachefs/journal_reclaim.c44
-rw-r--r--fs/bcachefs/journal_sb.c2
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c4
-rw-r--r--fs/bcachefs/lru.c6
-rw-r--r--fs/bcachefs/migrate.c4
-rw-r--r--fs/bcachefs/move.c132
-rw-r--r--fs/bcachefs/movinggc.c26
-rw-r--r--fs/bcachefs/movinggc.h3
-rw-r--r--fs/bcachefs/namei.c21
-rw-r--r--fs/bcachefs/printbuf.h8
-rw-r--r--fs/bcachefs/quota.c6
-rw-r--r--fs/bcachefs/rebalance.c27
-rw-r--r--fs/bcachefs/rebalance.h8
-rw-r--r--fs/bcachefs/rebalance_types.h1
-rw-r--r--fs/bcachefs/recovery.c6
-rw-r--r--fs/bcachefs/recovery_passes.c92
-rw-r--r--fs/bcachefs/recovery_passes.h5
-rw-r--r--fs/bcachefs/recovery_passes_format.h2
-rw-r--r--fs/bcachefs/reflink.c9
-rw-r--r--fs/bcachefs/replicas.c35
-rw-r--r--fs/bcachefs/sb-counters_format.h1
-rw-r--r--fs/bcachefs/sb-downgrade.c2
-rw-r--r--fs/bcachefs/sb-errors.c22
-rw-r--r--fs/bcachefs/sb-errors.h1
-rw-r--r--fs/bcachefs/sb-errors_format.h4
-rw-r--r--fs/bcachefs/sb-members.c21
-rw-r--r--fs/bcachefs/sb-members.h32
-rw-r--r--fs/bcachefs/six.c7
-rw-r--r--fs/bcachefs/snapshot.c148
-rw-r--r--fs/bcachefs/snapshot.h85
-rw-r--r--fs/bcachefs/str_hash.c243
-rw-r--r--fs/bcachefs/str_hash.h24
-rw-r--r--fs/bcachefs/subvolume.c45
-rw-r--r--fs/bcachefs/super-io.c8
-rw-r--r--fs/bcachefs/super.c106
-rw-r--r--fs/bcachefs/sysfs.c24
-rw-r--r--fs/bcachefs/trace.h69
-rw-r--r--fs/smb/server/Kconfig1
-rw-r--r--fs/smb/server/auth.c34
-rw-r--r--fs/smb/server/auth.h2
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/crypto_ctx.c8
-rw-r--r--fs/smb/server/crypto_ctx.h4
-rw-r--r--fs/smb/server/server.c1
-rw-r--r--fs/smb/server/smb2pdu.c72
-rw-r--r--fs/smb/server/smb2pdu.h3
-rw-r--r--fs/smb/server/vfs.c20
-rw-r--r--include/linux/ieee80211.h79
-rw-r--r--include/linux/irqchip/irq-renesas-rzv2h.h23
-rw-r--r--include/linux/phy/phy-hdmi.h21
-rw-r--r--include/linux/phy/phy.h7
-rw-r--r--include/linux/soc/samsung/exynos-regs-pmu.h5
-rw-r--r--include/linux/soundwire/sdw.h6
-rw-r--r--include/linux/soundwire/sdw_intel.h5
-rw-r--r--include/net/checksum.h2
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--init/Kconfig3
-rw-r--r--kernel/sched/ext_idle.c37
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/mgmt.c3
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/devmem.h3
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/page_pool.c27
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/utils.c4
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv6/ila/ila_common.c6
-rw-r--r--net/ipv6/seg6_local.c6
-rw-r--r--net/mac80211/mlme.c7
-rw-r--r--net/mac80211/scan.c11
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c21
-rw-r--r--net/netlabel/netlabel_kapi.c6
-rw-r--r--net/rxrpc/insecure.c5
-rw-r--r--net/tipc/crypto.c6
-rw-r--r--net/wireless/scan.c18
-rw-r--r--rust/Makefile19
-rw-r--r--rust/bindings/bindings_helper.h28
-rw-r--r--rust/ffi.rs2
-rw-r--r--rust/helpers/helpers.c1
-rw-r--r--rust/helpers/xarray.c28
-rw-r--r--rust/kernel/alloc.rs4
-rw-r--r--rust/kernel/alloc/allocator_test.rs2
-rw-r--r--rust/kernel/alloc/kbox.rs80
-rw-r--r--rust/kernel/alloc/kvec.rs433
-rw-r--r--rust/kernel/alloc/kvec/errors.rs61
-rw-r--r--rust/kernel/auxiliary.rs8
-rw-r--r--rust/kernel/block/mq/gen_disk.rs2
-rw-r--r--rust/kernel/configfs.rs4
-rw-r--r--rust/kernel/cpufreq.rs4
-rw-r--r--rust/kernel/device.rs26
-rw-r--r--rust/kernel/device_id.rs4
-rw-r--r--rust/kernel/dma.rs2
-rw-r--r--rust/kernel/drm/device.rs2
-rw-r--r--rust/kernel/drm/gem/mod.rs6
-rw-r--r--rust/kernel/kunit.rs37
-rw-r--r--rust/kernel/lib.rs45
-rw-r--r--rust/kernel/list.rs115
-rw-r--r--rust/kernel/list/arc.rs6
-rw-r--r--rust/kernel/miscdevice.rs12
-rw-r--r--rust/kernel/page.rs2
-rw-r--r--rust/kernel/pci.rs17
-rw-r--r--rust/kernel/platform.rs11
-rw-r--r--rust/kernel/prelude.rs7
-rw-r--r--rust/kernel/print.rs27
-rw-r--r--rust/kernel/rbtree.rs23
-rw-r--r--rust/kernel/static_assert.rs9
-rw-r--r--rust/kernel/std_vendor.rs2
-rw-r--r--rust/kernel/str.rs82
-rw-r--r--rust/kernel/sync/arc.rs25
-rw-r--r--rust/kernel/time.rs167
-rw-r--r--rust/kernel/time/hrtimer.rs24
-rw-r--r--rust/kernel/time/hrtimer/arc.rs2
-rw-r--r--rust/kernel/time/hrtimer/pin.rs2
-rw-r--r--rust/kernel/time/hrtimer/pin_mut.rs4
-rw-r--r--rust/kernel/time/hrtimer/tbox.rs2
-rw-r--r--rust/kernel/types.rs46
-rw-r--r--rust/kernel/uaccess.rs6
-rw-r--r--rust/kernel/workqueue.rs50
-rw-r--r--rust/kernel/xarray.rs275
-rw-r--r--rust/macros/helpers.rs17
-rw-r--r--rust/macros/kunit.rs58
-rw-r--r--rust/macros/lib.rs15
-rw-r--r--rust/macros/module.rs31
-rw-r--r--rust/pin-init/README.md14
-rw-r--r--rust/pin-init/examples/linked_list.rs1
-rw-r--r--rust/pin-init/examples/mutex.rs1
-rw-r--r--rust/pin-init/examples/pthread_mutex.rs4
-rw-r--r--rust/pin-init/examples/static_init.rs1
-rw-r--r--rust/pin-init/internal/src/lib.rs6
-rw-r--r--rust/pin-init/internal/src/zeroable.rs27
-rw-r--r--rust/pin-init/src/lib.rs144
-rw-r--r--rust/pin-init/src/macros.rs91
-rw-r--r--scripts/Makefile.build9
-rwxr-xr-xscripts/generate_rust_analyzer.py13
-rw-r--r--scripts/generate_rust_target.rs4
-rw-r--r--scripts/rustdoc_test_builder.rs8
-rw-r--r--scripts/rustdoc_test_gen.rs16
-rw-r--r--sound/soc/sof/intel/hda.c3
-rw-r--r--tools/include/uapi/linux/bpf.h2
-rw-r--r--tools/objtool/check.c3
-rw-r--r--tools/testing/kunit/configs/all_tests.config1
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c6
-rw-r--r--tools/testing/selftests/drivers/net/hw/config5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh102
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh81
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c1
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
478 files changed, 13040 insertions, 8756 deletions
diff --git a/.mailmap b/.mailmap
index 0f2fe42000a7..074082ce9299 100644
--- a/.mailmap
+++ b/.mailmap
@@ -135,6 +135,7 @@ Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
+Benno Lossin <lossin@kernel.org> <benno.lossin@proton.me>
Bingwu Zhang <xtex@aosc.io> <xtexchooser@duck.com>
Bingwu Zhang <xtex@aosc.io> <xtex@xtexx.eu.org>
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a3ea40b22fb9..f1f2c0874da9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -458,6 +458,9 @@
arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
Set instructions support
+ arm64.nompam [ARM64] Unconditionally disable Memory Partitioning And
+ Monitoring support
+
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
support
diff --git a/Documentation/devicetree/bindings/dma/arm,dma-350.yaml b/Documentation/devicetree/bindings/dma/arm,dma-350.yaml
new file mode 100644
index 000000000000..429f682f15d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/arm,dma-350.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/arm,dma-350.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreLink DMA-350 Controller
+
+maintainers:
+ - Robin Murphy <robin.murphy@arm.com>
+
+allOf:
+ - $ref: dma-controller.yaml#
+
+properties:
+ compatible:
+ const: arm,dma-350
+
+ reg:
+ items:
+ - description: Base and size of the full register map
+
+ interrupts:
+ minItems: 1
+ items:
+ - description: Channel 0 interrupt
+ - description: Channel 1 interrupt
+ - description: Channel 2 interrupt
+ - description: Channel 3 interrupt
+ - description: Channel 4 interrupt
+ - description: Channel 5 interrupt
+ - description: Channel 6 interrupt
+ - description: Channel 7 interrupt
+
+ "#dma-cells":
+ const: 1
+ description: The cell is the trigger input number
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/dma/fsl,edma.yaml b/Documentation/devicetree/bindings/dma/fsl,edma.yaml
index 950e8fa4f4ab..fa4248e2f1b9 100644
--- a/Documentation/devicetree/bindings/dma/fsl,edma.yaml
+++ b/Documentation/devicetree/bindings/dma/fsl,edma.yaml
@@ -48,11 +48,11 @@ properties:
interrupts:
minItems: 1
- maxItems: 64
+ maxItems: 65
interrupt-names:
minItems: 1
- maxItems: 64
+ maxItems: 65
"#dma-cells":
description: |
diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml
index d3f8c269916c..da0235e451d6 100644
--- a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml
+++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml
@@ -19,6 +19,7 @@ properties:
- enum:
- nvidia,tegra210-adma
- nvidia,tegra186-adma
+ - nvidia,tegra264-adma
- items:
- enum:
- nvidia,tegra234-adma
@@ -92,6 +93,7 @@ allOf:
contains:
enum:
- nvidia,tegra186-adma
+ - nvidia,tegra264-adma
then:
anyOf:
- properties:
diff --git a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml
index 3ad0d9b1fbc5..f2f87f0f545b 100644
--- a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml
@@ -42,6 +42,8 @@ properties:
interrupts:
maxItems: 1
+ dma-coherent: true
+
iommus:
minItems: 1
maxItems: 6
diff --git a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
index b356251de5a8..92b12762c472 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
@@ -11,19 +11,23 @@ maintainers:
properties:
compatible:
- items:
- - enum:
- - renesas,r7s72100-dmac # RZ/A1H
- - renesas,r9a07g043-dmac # RZ/G2UL and RZ/Five
- - renesas,r9a07g044-dmac # RZ/G2{L,LC}
- - renesas,r9a07g054-dmac # RZ/V2L
- - renesas,r9a08g045-dmac # RZ/G3S
- - const: renesas,rz-dmac
+ oneOf:
+ - items:
+ - enum:
+ - renesas,r7s72100-dmac # RZ/A1H
+ - renesas,r9a07g043-dmac # RZ/G2UL and RZ/Five
+ - renesas,r9a07g044-dmac # RZ/G2{L,LC}
+ - renesas,r9a07g054-dmac # RZ/V2L
+ - renesas,r9a08g045-dmac # RZ/G3S
+ - const: renesas,rz-dmac
+
+ - const: renesas,r9a09g057-dmac # RZ/V2H(P)
reg:
items:
- description: Control and channel register block
- description: DMA extended resource selector block
+ minItems: 1
interrupts:
maxItems: 17
@@ -52,6 +56,7 @@ properties:
items:
- description: DMA main clock
- description: DMA register access clock
+ minItems: 1
clock-names:
items:
@@ -61,10 +66,10 @@ properties:
'#dma-cells':
const: 1
description:
- The cell specifies the encoded MID/RID values of the DMAC port
- connected to the DMA client and the slave channel configuration
- parameters.
- bits[0:9] - Specifies MID/RID value
+ The cell specifies the encoded MID/RID or the REQ No values of
+ the DMAC port connected to the DMA client and the slave channel
+ configuration parameters.
+ bits[0:9] - Specifies the MID/RID or the REQ No value
bit[10] - Specifies DMA request high enable (HIEN)
bit[11] - Specifies DMA request detection type (LVL)
bits[12:14] - Specifies DMAACK output mode (AM)
@@ -80,12 +85,26 @@ properties:
items:
- description: Reset for DMA ARESETN reset terminal
- description: Reset for DMA RST_ASYNC reset terminal
+ minItems: 1
reset-names:
items:
- const: arst
- const: rst_async
+ renesas,icu:
+ description:
+ It must contain the phandle to the ICU and the index of the DMAC as seen
+ from the ICU.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: Phandle to the ICU node.
+ - description:
+ The number of the DMAC as seen from the ICU, i.e. parameter k from
+ register ICU_DMkSELy. This may differ from the actual DMAC instance
+ number.
+
required:
- compatible
- reg
@@ -98,13 +117,25 @@ allOf:
- $ref: dma-controller.yaml#
- if:
- not:
- properties:
- compatible:
- contains:
- enum:
- - renesas,r7s72100-dmac
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,r9a07g043-dmac
+ - renesas,r9a07g044-dmac
+ - renesas,r9a07g054-dmac
+ - renesas,r9a08g045-dmac
then:
+ properties:
+ reg:
+ minItems: 2
+ clocks:
+ minItems: 2
+ resets:
+ minItems: 2
+
+ renesas,icu: false
+
required:
- clocks
- clock-names
@@ -112,6 +143,46 @@ allOf:
- resets
- reset-names
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r7s72100-dmac
+ then:
+ properties:
+ reg:
+ minItems: 2
+
+ clocks: false
+ clock-names: false
+ power-domains: false
+ resets: false
+ reset-names: false
+ renesas,icu: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r9a09g057-dmac
+ then:
+ properties:
+ reg:
+ maxItems: 1
+ clocks:
+ maxItems: 1
+ resets:
+ maxItems: 1
+
+ clock-names: false
+ reset-names: false
+
+ required:
+ - clocks
+ - power-domains
+ - renesas,icu
+ - resets
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml
index 580fbe37b37f..843d04027c30 100644
--- a/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml
@@ -18,6 +18,7 @@ properties:
- brcm,bcm4908-usb-phy
- brcm,bcm7211-usb-phy
- brcm,bcm7216-usb-phy
+ - brcm,bcm74110-usb-phy
- brcm,brcmstb-usb-phy
reg:
@@ -139,7 +140,9 @@ allOf:
properties:
compatible:
contains:
- const: brcm,bcm7216-usb-phy
+ enum:
+ - brcm,bcm7216-usb-phy
+ - brcm,bcm74110-usb-phy
then:
properties:
reg:
diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
index daee0c0fc915..22dd91591a09 100644
--- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
@@ -43,15 +43,15 @@ properties:
fsl,phy-tx-vref-tune-percent:
description:
Tunes the HS DC level relative to the nominal level
- minimum: 94
+ minimum: 90
maximum: 124
fsl,phy-tx-rise-tune-percent:
description:
Adjusts the rise/fall time duration of the HS waveform relative to
its nominal value
- minimum: 97
- maximum: 103
+ minimum: 90
+ maximum: 120
fsl,phy-tx-preemp-amp-tune-microamp:
description:
@@ -63,8 +63,7 @@ properties:
fsl,phy-tx-vboost-level-microvolt:
description:
Adjust the boosted transmit launch pk-pk differential amplitude
- minimum: 880
- maximum: 1120
+ enum: [844, 1008, 1156]
fsl,phy-comp-dis-tune-percent:
description:
@@ -113,6 +112,34 @@ allOf:
maxItems: 1
- if:
+ properties:
+ compatible:
+ enum:
+ - fsl,imx8mq-usb-phy
+ - fsl,imx8mp-usb-phy
+ then:
+ properties:
+ fsl,phy-tx-vref-tune-percent:
+ minimum: 94
+ fsl,phy-tx-rise-tune-percent:
+ minimum: 97
+ maximum: 103
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx95-usb-phy
+ then:
+ properties:
+ fsl,phy-tx-vref-tune-percent:
+ maximum: 108
+ fsl,phy-comp-dis-tune-percent:
+ minimum: 94
+ maximum: 104
+
+ - if:
required:
- orientation-switch
then:
diff --git a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
index f6e494d0d89b..acdbce937b0a 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
@@ -30,6 +30,7 @@ properties:
- const: mediatek,mt8173-mipi-tx
- items:
- enum:
+ - mediatek,mt6893-mipi-tx
- mediatek,mt8188-mipi-tx
- mediatek,mt8195-mipi-tx
- mediatek,mt8365-mipi-tx
diff --git a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
index 6be3aa4557e5..b2218c151939 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
@@ -78,6 +78,7 @@ properties:
- items:
- enum:
- mediatek,mt2712-tphy
+ - mediatek,mt6893-tphy
- mediatek,mt7629-tphy
- mediatek,mt7986-tphy
- mediatek,mt8183-tphy
diff --git a/Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml
index a9e3139fd421..0bed847bb4ad 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml
@@ -49,6 +49,7 @@ properties:
- enum:
- mediatek,mt3611-xsphy
- mediatek,mt3612-xsphy
+ - mediatek,mt7988-xsphy
- const: mediatek,xsphy
reg:
@@ -150,6 +151,21 @@ patternProperties:
minimum: 1
maximum: 31
+ mediatek,syscon-type:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ A phandle to syscon used to access the register of type switch,
+ the field should always be 3 cells long.
+ items:
+ - items:
+ - description:
+ Phandle to phy type configuration system controller
+ - description:
+ Phy type configuration register offset
+ - description:
+ Index of config segment
+ enum: [0, 1, 2, 3]
+
required:
- reg
- clocks
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
index 15dc8efe6ffe..9af39b33646a 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
@@ -99,8 +99,7 @@ patternProperties:
Specifies the type of PHY for which the group of PHY lanes is used.
Refer include/dt-bindings/phy/phy.h. Constants from the header should be used.
$ref: /schemas/types.yaml#/definitions/uint32
- minimum: 1
- maximum: 9
+ enum: [1, 2, 3, 4, 5, 6, 7, 8, 9, 12]
cdns,num-lanes:
description:
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml
index 888e6b2aac5a..3e101c3c5ea9 100644
--- a/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml
@@ -42,6 +42,9 @@ properties:
- const: phy
- const: apb
+ phy-supply:
+ description: Single PHY regulator
+
rockchip,enable-ssc:
type: boolean
description:
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt
deleted file mode 100644
index 960da7fcaa9e..000000000000
--- a/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-* ROCKCHIP type-c PHY
----------------------
-
-Required properties:
- - compatible : must be "rockchip,rk3399-typec-phy"
- - reg: Address and length of the usb phy control register set
- - rockchip,grf : phandle to the syscon managing the "general
- register files"
- - clocks : phandle + clock specifier for the phy clocks
- - clock-names : string, clock name, must be "tcpdcore", "tcpdphy-ref";
- - assigned-clocks: main clock, should be <&cru SCLK_UPHY0_TCPDCORE> or
- <&cru SCLK_UPHY1_TCPDCORE>;
- - assigned-clock-rates : the phy core clk frequency, shall be: 50000000
- - resets : a list of phandle + reset specifier pairs
- - reset-names : string reset name, must be:
- "uphy", "uphy-pipe", "uphy-tcphy"
-
-Optional properties:
- - extcon : extcon specifier for the Power Delivery
-
-Required nodes : a sub-node is required for each port the phy provides.
- The sub-node name is used to identify dp or usb3 port,
- and shall be the following entries:
- * "dp-port" : the name of DP port.
- * "usb3-port" : the name of USB3 port.
-
-Required properties (port (child) node):
-- #phy-cells : must be 0, See ./phy-bindings.txt for details.
-
-Deprecated properties, do not use in new device tree sources, these
-properties are determined by the compatible value:
- - rockchip,typec-conn-dir
- - rockchip,usb3tousb2-en
- - rockchip,external-psm
- - rockchip,pipe-status
-
-Example:
- tcphy0: phy@ff7c0000 {
- compatible = "rockchip,rk3399-typec-phy";
- reg = <0x0 0xff7c0000 0x0 0x40000>;
- rockchip,grf = <&grf>;
- extcon = <&fusb0>;
- clocks = <&cru SCLK_UPHY0_TCPDCORE>,
- <&cru SCLK_UPHY0_TCPDPHY_REF>;
- clock-names = "tcpdcore", "tcpdphy-ref";
- assigned-clocks = <&cru SCLK_UPHY0_TCPDCORE>;
- assigned-clock-rates = <50000000>;
- resets = <&cru SRST_UPHY0>,
- <&cru SRST_UPHY0_PIPE_L00>,
- <&cru SRST_P_UPHY0_TCPHY>;
- reset-names = "uphy", "uphy-pipe", "uphy-tcphy";
-
- tcphy0_dp: dp-port {
- #phy-cells = <0>;
- };
-
- tcphy0_usb3: usb3-port {
- #phy-cells = <0>;
- };
- };
-
- tcphy1: phy@ff800000 {
- compatible = "rockchip,rk3399-typec-phy";
- reg = <0x0 0xff800000 0x0 0x40000>;
- rockchip,grf = <&grf>;
- extcon = <&fusb1>;
- clocks = <&cru SCLK_UPHY1_TCPDCORE>,
- <&cru SCLK_UPHY1_TCPDPHY_REF>;
- clock-names = "tcpdcore", "tcpdphy-ref";
- assigned-clocks = <&cru SCLK_UPHY1_TCPDCORE>;
- assigned-clock-rates = <50000000>;
- resets = <&cru SRST_UPHY1>,
- <&cru SRST_UPHY1_PIPE_L00>,
- <&cru SRST_P_UPHY1_TCPHY>;
- reset-names = "uphy", "uphy-pipe", "uphy-tcphy";
-
- tcphy1_dp: dp-port {
- #phy-cells = <0>;
- };
-
- tcphy1_usb3: usb3-port {
- #phy-cells = <0>;
- };
- };
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml
index b42f1272903d..8b7059d5b182 100644
--- a/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml
@@ -47,6 +47,9 @@ properties:
- const: pcs_apb
- const: pma_apb
+ phy-supply:
+ description: Single PHY regulator
+
rockchip,dp-lane-mux:
$ref: /schemas/types.yaml#/definitions/uint32-array
minItems: 2
diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml
index e39168d55d23..6e9df81441e9 100644
--- a/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml
@@ -11,26 +11,24 @@ maintainers:
- Varadarajan Narayanan <quic_varada@quicinc.com>
description:
- PCIe and USB combo PHY found in Qualcomm IPQ5332 SoC
+ PCIe and USB combo PHY found in Qualcomm IPQ5018 & IPQ5332 SoCs
properties:
compatible:
enum:
+ - qcom,ipq5018-uniphy-pcie-phy
- qcom,ipq5332-uniphy-pcie-phy
reg:
maxItems: 1
clocks:
- items:
- - description: pcie pipe clock
- - description: pcie ahb clock
+ minItems: 1
+ maxItems: 2
resets:
- items:
- - description: phy reset
- - description: ahb reset
- - description: cfg reset
+ minItems: 2
+ maxItems: 3
"#phy-cells":
const: 0
@@ -53,6 +51,41 @@ required:
additionalProperties: false
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,ipq5018-uniphy-pcie-phy
+ then:
+ properties:
+ clocks:
+ items:
+ - description: pcie pipe clock
+ resets:
+ items:
+ - description: phy reset
+ - description: cfg reset
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,ipq5332-uniphy-pcie-phy
+ then:
+ properties:
+ clocks:
+ items:
+ - description: pcie pipe clock
+ - description: pcie ahb clock
+ resets:
+ items:
+ - description: phy reset
+ - description: ahb reset
+ - description: cfg reset
+
examples:
- |
#include <dt-bindings/clock/qcom,ipq5332-gcc.h>
diff --git a/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml b/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml
index af275cea3456..2822dce8d9f4 100644
--- a/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml
@@ -16,6 +16,7 @@ properties:
- enum:
- renesas,usb2-phy-r8a77470 # RZ/G1C
- renesas,usb2-phy-r9a08g045 # RZ/G3S
+ - renesas,usb2-phy-r9a09g057 # RZ/V2H(P)
- items:
- enum:
@@ -105,8 +106,13 @@ allOf:
properties:
compatible:
contains:
- const: renesas,rzg2l-usb2-phy
+ enum:
+ - renesas,usb2-phy-r9a09g057
+ - renesas,rzg2l-usb2-phy
then:
+ properties:
+ clocks:
+ minItems: 2
required:
- resets
diff --git a/Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml
index 6a7ef556414c..58e735b5dd05 100644
--- a/Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml
+++ b/Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml
@@ -13,12 +13,14 @@ properties:
compatible:
enum:
- rockchip,px30-usb2phy
+ - rockchip,rk3036-usb2phy
- rockchip,rk3128-usb2phy
- rockchip,rk3228-usb2phy
- rockchip,rk3308-usb2phy
- rockchip,rk3328-usb2phy
- rockchip,rk3366-usb2phy
- rockchip,rk3399-usb2phy
+ - rockchip,rk3562-usb2phy
- rockchip,rk3568-usb2phy
- rockchip,rk3576-usb2phy
- rockchip,rk3588-usb2phy
@@ -184,12 +186,14 @@ allOf:
contains:
enum:
- rockchip,px30-usb2phy
+ - rockchip,rk3036-usb2phy
- rockchip,rk3128-usb2phy
- rockchip,rk3228-usb2phy
- rockchip,rk3308-usb2phy
- rockchip,rk3328-usb2phy
- rockchip,rk3366-usb2phy
- rockchip,rk3399-usb2phy
+ - rockchip,rk3562-usb2phy
- rockchip,rk3568-usb2phy
- rockchip,rk3588-usb2phy
- rockchip,rv1108-usb2phy
diff --git a/Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml
index ba67dca5a446..d7de8b527c5c 100644
--- a/Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml
@@ -46,6 +46,9 @@ properties:
reset-names:
const: phy
+ phy-supply:
+ description: Single PHY regulator
+
rockchip,phy-grf:
$ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the syscon managing the phy "general register files"
diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3399-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3399-pcie-phy.yaml
new file mode 100644
index 000000000000..f46f065e5dbe
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/rockchip,rk3399-pcie-phy.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/rockchip,rk3399-pcie-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3399 PCIE PHY
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ compatible:
+ const: rockchip,rk3399-pcie-phy
+
+ '#phy-cells':
+ oneOf:
+ - const: 0
+ deprecated: true
+ - const: 1
+ description: One lane per phy mode
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: refclk
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: phy
+
+required:
+ - compatible
+ - '#phy-cells'
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml
new file mode 100644
index 000000000000..91c011f68cd0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/rockchip,rk3399-typec-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Type-C PHY
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ compatible:
+ const: rockchip,rk3399-typec-phy
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: tcpdcore
+ - const: tcpdphy-ref
+
+ extcon: true
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 3
+
+ reset-names:
+ items:
+ - const: uphy
+ - const: uphy-pipe
+ - const: uphy-tcphy
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF).
+
+ dp-port:
+ type: object
+ additionalProperties: false
+
+ properties:
+ '#phy-cells':
+ const: 0
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to USB Type-C connector
+
+ required:
+ - '#phy-cells'
+
+ usb3-port:
+ type: object
+ additionalProperties: false
+
+ properties:
+ '#phy-cells':
+ const: 0
+
+ orientation-switch: true
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to USB Type-C connector SS port
+
+ required:
+ - '#phy-cells'
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+ - dp-port
+ - usb3-port
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rk3399-cru.h>
+
+ phy@ff7c0000 {
+ compatible = "rockchip,rk3399-typec-phy";
+ reg = <0xff7c0000 0x40000>;
+ rockchip,grf = <&grf>;
+ extcon = <&fusb0>;
+ clocks = <&cru SCLK_UPHY0_TCPDCORE>,
+ <&cru SCLK_UPHY0_TCPDPHY_REF>;
+ clock-names = "tcpdcore", "tcpdphy-ref";
+ resets = <&cru SRST_UPHY0>,
+ <&cru SRST_UPHY0_PIPE_L00>,
+ <&cru SRST_P_UPHY0_TCPHY>;
+ reset-names = "uphy", "uphy-pipe", "uphy-tcphy";
+
+ dp-port {
+ #phy-cells = <0>;
+ };
+
+ usb3-port {
+ #phy-cells = <0>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
deleted file mode 100644
index b496042f1f44..000000000000
--- a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Rockchip PCIE PHY
------------------------
-
-Required properties:
- - compatible: rockchip,rk3399-pcie-phy
- - clocks: Must contain an entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
- - clock-names: Must be "refclk"
- - resets: Must contain an entry in reset-names.
- See ../reset/reset.txt for details.
- - reset-names: Must be "phy"
-
-Required properties for legacy PHY mode (deprecated):
- - #phy-cells: must be 0
-
-Required properties for per-lane PHY mode (preferred):
- - #phy-cells: must be 1
-
-Example:
-
-grf: syscon@ff770000 {
- compatible = "rockchip,rk3399-grf", "syscon", "simple-mfd";
- #address-cells = <1>;
- #size-cells = <1>;
-
- ...
-
- pcie_phy: pcie-phy {
- compatible = "rockchip,rk3399-pcie-phy";
- #phy-cells = <0>;
- clocks = <&cru SCLK_PCIEPHY_REF>;
- clock-names = "refclk";
- resets = <&cru SRST_PCIEPHY>;
- reset-names = "phy";
- };
-};
diff --git a/Documentation/devicetree/bindings/phy/samsung,exynos2200-eusb2-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,exynos2200-eusb2-phy.yaml
new file mode 100644
index 000000000000..5e7e1bc2e39a
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/samsung,exynos2200-eusb2-phy.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/samsung,exynos2200-eusb2-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos2200 eUSB2 phy controller
+
+maintainers:
+ - Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
+
+description:
+ Samsung Exynos2200 eUSB2 phy, based on Synopsys eUSB2 IP block, supports
+ LS/FS/HS usb connectivity.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos2200-eusb2-phy
+
+ reg:
+ maxItems: 1
+
+ "#phy-cells":
+ const: 0
+
+ clocks:
+ items:
+ - description: Reference clock
+ - description: Bus (APB) clock
+ - description: Control clock
+
+ clock-names:
+ items:
+ - const: ref
+ - const: bus
+ - const: ctrl
+
+ resets:
+ maxItems: 1
+
+ phys:
+ maxItems: 1
+ description:
+ Phandle to eUSB2 to USB 2.0 repeater
+
+ vdd-supply:
+ description:
+ Phandle to 0.88V regulator supply to PHY digital circuit.
+
+ vdda12-supply:
+ description:
+ Phandle to 1.2V regulator supply to PHY refclk pll block.
+
+required:
+ - compatible
+ - reg
+ - "#phy-cells"
+ - clocks
+ - clock-names
+ - vdd-supply
+ - vdda12-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ usb_hsphy: phy@10ab0000 {
+ compatible = "samsung,exynos2200-eusb2-phy";
+ reg = <0x10ab0000 0x10000>;
+ #phy-cells = <0>;
+
+ clocks = <&cmu_hsi0 7>,
+ <&cmu_hsi0 5>,
+ <&cmu_hsi0 8>;
+ clock-names = "ref", "bus", "ctrl";
+
+ vdd-supply = <&vreg_0p88>;
+ vdda12-supply = <&vreg_1p2>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml
index 27295acbba76..cc60d2f6f70e 100644
--- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml
@@ -26,31 +26,41 @@ properties:
compatible:
enum:
- google,gs101-usb31drd-phy
+ - samsung,exynos2200-usb32drd-phy
- samsung,exynos5250-usbdrd-phy
- samsung,exynos5420-usbdrd-phy
- samsung,exynos5433-usbdrd-phy
- samsung,exynos7-usbdrd-phy
+ - samsung,exynos7870-usbdrd-phy
- samsung,exynos850-usbdrd-phy
clocks:
- minItems: 2
+ minItems: 1
maxItems: 5
clock-names:
- minItems: 2
+ minItems: 1
maxItems: 5
description: |
- At least two clocks::
+ Typically two clocks:
- Main PHY clock (same as USB DRD controller i.e. DWC3 IP clock), used
for register access.
- PHY reference clock (usually crystal clock), used for PHY operations,
associated by phy name. It is used to determine bit values for clock
settings register. For Exynos5420 this is given as 'sclk_usbphy30'
- in the CMU.
+ in the CMU. It's not needed for Exynos2200.
"#phy-cells":
const: 1
+ phys:
+ maxItems: 1
+ description:
+ USBDRD-underlying high-speed PHY
+
+ phy-names:
+ const: hs
+
port:
$ref: /schemas/graph.yaml#/properties/port
description:
@@ -155,6 +165,27 @@ allOf:
compatible:
contains:
enum:
+ - samsung,exynos2200-usb32drd-phy
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ items:
+ - const: phy
+ reg:
+ maxItems: 1
+ reg-names:
+ maxItems: 1
+ required:
+ - phys
+ - phy-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
- samsung,exynos5433-usbdrd-phy
- samsung,exynos7-usbdrd-phy
then:
@@ -184,6 +215,7 @@ allOf:
enum:
- samsung,exynos5250-usbdrd-phy
- samsung,exynos5420-usbdrd-phy
+ - samsung,exynos7870-usbdrd-phy
- samsung,exynos850-usbdrd-phy
then:
properties:
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml
index c8bb2eef442d..7c5b13caa40b 100644
--- a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml
@@ -23,7 +23,9 @@ properties:
- microchip,sam9x60-rtc
- microchip,sama7g5-rtc
- items:
- - const: microchip,sam9x7-rtc
+ - enum:
+ - microchip,sam9x7-rtc
+ - microchip,sama7d65-rtc
- const: microchip,sam9x60-rtc
reg:
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml b/Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml
index a7f6c1d1a08a..9c9b981fe38b 100644
--- a/Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml
+++ b/Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml
@@ -22,6 +22,7 @@ properties:
- enum:
- microchip,sam9x60-rtt
- microchip,sam9x7-rtt
+ - microchip,sama7d65-rtt
- const: atmel,at91sam9260-rtt
- items:
- const: microchip,sama7g5-rtt
diff --git a/Documentation/devicetree/bindings/rtc/nxp,s32g-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,s32g-rtc.yaml
new file mode 100644
index 000000000000..40fd2fa298fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/nxp,s32g-rtc.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/nxp,s32g-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP S32G2/S32G3 Real Time Clock (RTC)
+
+maintainers:
+ - Bogdan Hamciuc <bogdan.hamciuc@nxp.com>
+ - Ciprian Marian Costea <ciprianmarian.costea@nxp.com>
+
+description:
+ RTC hardware module present on S32G2/S32G3 SoCs is used as a wakeup source.
+ It is not kept alive during system reset and it is not battery-powered.
+
+allOf:
+ - $ref: rtc.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - nxp,s32g2-rtc
+ - items:
+ - const: nxp,s32g3-rtc
+ - const: nxp,s32g2-rtc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: ipg clock drives the access to the RTC iomapped registers
+ - description: Clock source for the RTC module. Can be selected between
+ 4 different clock sources using an integrated hardware mux.
+ On S32G2/S32G3 SoCs, 'source0' is the SIRC clock (~32KHz) and it is
+ available during standby and runtime. 'source1' is reserved and cannot
+ be used. 'source2' is the FIRC clock and it is only available during
+ runtime providing a better resolution (~48MHz). 'source3' is an external
+ RTC clock source which can be additionally added in hardware.
+
+ clock-names:
+ items:
+ - const: ipg
+ - enum: [ source0, source1, source2, source3 ]
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ rtc@40060000 {
+ compatible = "nxp,s32g3-rtc",
+ "nxp,s32g2-rtc";
+ reg = <0x40060000 0x1000>;
+ interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks 54>, <&clks 55>;
+ clock-names = "ipg", "source0";
+ };
diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
index 68ef3208c886..7497dc3ac5b2 100644
--- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
@@ -55,6 +55,12 @@ properties:
description:
RTC alarm is not owned by the OS
+ qcom,uefi-rtc-info:
+ type: boolean
+ description:
+ RTC offset is stored as a four-byte GPS time offset in a 12-byte UEFI
+ variable 882f8c2b-9646-435f-8de5-f208ff80c1bd-RTCInfo
+
wakeup-source: true
required:
diff --git a/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml b/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml
index f6e0c613af67..f6fdcc7090b6 100644
--- a/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml
@@ -33,10 +33,14 @@ properties:
- const: pps
clocks:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
clock-names:
- const: hclk
+ minItems: 1
+ items:
+ - const: hclk
+ - const: xtal
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
index 8cbf5b6772dd..ccdcc889ba8e 100644
--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
@@ -208,8 +208,8 @@ allOf:
pcie-phy:
type: object
- description:
- Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
+ $ref: /schemas/phy/rockchip,rk3399-pcie-phy.yaml#
+ unevaluatedProperties: false
patternProperties:
"^phy@[0-9a-f]+$":
@@ -333,6 +333,15 @@ examples:
#phy-cells = <0>;
};
+ pcie-phy {
+ compatible = "rockchip,rk3399-pcie-phy";
+ #phy-cells = <1>;
+ clocks = <&cru SCLK_PCIEPHY_REF>;
+ clock-names = "refclk";
+ resets = <&cru SRST_PCIEPHY>;
+ reset-names = "phy";
+ };
+
phy@f780 {
compatible = "rockchip,rk3399-emmc-phy";
reg = <0xf780 0x20>;
diff --git a/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml b/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml
index fba2cb05ecba..fd1b13c0ed6b 100644
--- a/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml
+++ b/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml
@@ -18,7 +18,7 @@ description:
Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml
Type-C PHY
- Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt
+ Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml
select:
properties:
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index 8f0910668ca3..1594598b3317 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -172,8 +172,8 @@ Currently, the types available are:
- It's usually used for copying pixel data between host memory and
memory-mapped GPU device memory, such as found on modern PCI video graphics
cards. The most immediate example is the OpenGL API function
- ``glReadPielx()``, which might require a verbatim copy of a huge framebuffer
- from local device memory onto host memory.
+ ``glReadPixels()``, which might require a verbatim copy of a huge
+ framebuffer from local device memory onto host memory.
- DMA_XOR
diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml
index 5ec3d35b7a38..b41b31eebcae 100644
--- a/Documentation/netlink/specs/rt-link.yaml
+++ b/Documentation/netlink/specs/rt-link.yaml
@@ -1685,15 +1685,19 @@ attribute-sets:
-
name: iflags
type: u16
+ byte-order: big-endian
-
name: oflags
type: u16
+ byte-order: big-endian
-
name: ikey
type: u32
+ byte-order: big-endian
-
name: okey
type: u32
+ byte-order: big-endian
-
name: local
type: binary
@@ -1713,10 +1717,11 @@ attribute-sets:
type: u8
-
name: encap-limit
- type: u32
+ type: u8
-
name: flowinfo
type: u32
+ byte-order: big-endian
-
name: flags
type: u32
@@ -1729,9 +1734,11 @@ attribute-sets:
-
name: encap-sport
type: u16
+ byte-order: big-endian
-
name: encap-dport
type: u16
+ byte-order: big-endian
-
name: collect-metadata
type: flag
@@ -1754,6 +1761,54 @@ attribute-sets:
name: erspan-hwid
type: u16
-
+ name: linkinfo-gre6-attrs
+ subset-of: linkinfo-gre-attrs
+ attributes:
+ -
+ name: link
+ -
+ name: iflags
+ -
+ name: oflags
+ -
+ name: ikey
+ -
+ name: okey
+ -
+ name: local
+ display-hint: ipv6
+ -
+ name: remote
+ display-hint: ipv6
+ -
+ name: ttl
+ -
+ name: encap-limit
+ -
+ name: flowinfo
+ -
+ name: flags
+ -
+ name: encap-type
+ -
+ name: encap-flags
+ -
+ name: encap-sport
+ -
+ name: encap-dport
+ -
+ name: collect-metadata
+ -
+ name: fwmark
+ -
+ name: erspan-index
+ -
+ name: erspan-ver
+ -
+ name: erspan-dir
+ -
+ name: erspan-hwid
+ -
name: linkinfo-vti-attrs
name-prefix: ifla-vti-
header: linux/if_tunnel.h
@@ -1764,9 +1819,11 @@ attribute-sets:
-
name: ikey
type: u32
+ byte-order: big-endian
-
name: okey
type: u32
+ byte-order: big-endian
-
name: local
type: binary
@@ -1816,6 +1873,7 @@ attribute-sets:
-
name: port
type: u16
+ byte-order: big-endian
-
name: collect-metadata
type: flag
@@ -1835,6 +1893,7 @@ attribute-sets:
-
name: label
type: u32
+ byte-order: big-endian
-
name: ttl-inherit
type: u8
@@ -1875,9 +1934,11 @@ attribute-sets:
-
name: flowinfo
type: u32
+ byte-order: big-endian
-
name: flags
type: u16
+ byte-order: big-endian
-
name: proto
type: u8
@@ -1907,9 +1968,11 @@ attribute-sets:
-
name: encap-sport
type: u16
+ byte-order: big-endian
-
name: encap-dport
type: u16
+ byte-order: big-endian
-
name: collect-metadata
type: flag
@@ -2225,6 +2288,9 @@ sub-messages:
value: gretap
attribute-set: linkinfo-gre-attrs
-
+ value: ip6gre
+ attribute-set: linkinfo-gre6-attrs
+ -
value: geneve
attribute-set: linkinfo-geneve-attrs
-
diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst
index 27f2a7bb5a4a..6ff9e754755d 100644
--- a/Documentation/rust/coding-guidelines.rst
+++ b/Documentation/rust/coding-guidelines.rst
@@ -85,6 +85,18 @@ written after the documentation, e.g.:
// ...
}
+This applies to both public and private items. This increases consistency with
+public items, allows changes to visibility with less changes involved and will
+allow us to potentially generate the documentation for private items as well.
+In other words, if documentation is written for a private item, then ``///``
+should still be used. For instance:
+
+.. code-block:: rust
+
+ /// My private function.
+ // TODO: ...
+ fn f() {}
+
One special kind of comments are the ``// SAFETY:`` comments. These must appear
before every ``unsafe`` block, and they explain why the code inside the block is
correct/sound, i.e. why it cannot trigger undefined behavior in any case, e.g.:
@@ -191,6 +203,23 @@ or:
/// [`struct mutex`]: srctree/include/linux/mutex.h
+C FFI types
+-----------
+
+Rust kernel code refers to C types, such as ``int``, using type aliases such as
+``c_int``, which are readily available from the ``kernel`` prelude. Please do
+not use the aliases from ``core::ffi`` -- they may not map to the correct types.
+
+These aliases should generally be referred directly by their identifier, i.e.
+as a single segment path. For instance:
+
+.. code-block:: rust
+
+ fn f(p: *const c_char) -> c_int {
+ // ...
+ }
+
+
Naming
------
diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 6d2607870ba4..155f7107329a 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -90,15 +90,53 @@ they should generally work out of the box, e.g.::
Ubuntu
******
-Ubuntu LTS and non-LTS (interim) releases provide recent Rust releases and thus
-they should generally work out of the box, e.g.::
+25.04
+~~~~~
+
+The latest Ubuntu releases provide recent Rust releases and thus they should
+generally work out of the box, e.g.::
+
+ apt install rustc rust-src bindgen rustfmt rust-clippy
+
+In addition, ``RUST_LIB_SRC`` needs to be set, e.g.::
+
+ RUST_LIB_SRC=/usr/src/rustc-$(rustc --version | cut -d' ' -f2)/library
+
+For convenience, ``RUST_LIB_SRC`` can be exported to the global environment.
- apt install rustc-1.80 rust-1.80-src bindgen-0.65 rustfmt-1.80 rust-1.80-clippy
+
+24.04 LTS and older
+~~~~~~~~~~~~~~~~~~~
+
+Though Ubuntu 24.04 LTS and older versions still provide recent Rust
+releases, they require some additional configuration to be set, using
+the versioned packages, e.g.::
+
+ apt install rustc-1.80 rust-1.80-src bindgen-0.65 rustfmt-1.80 \
+ rust-1.80-clippy
+ ln -s /usr/lib/rust-1.80/bin/rustfmt /usr/bin/rustfmt-1.80
+ ln -s /usr/lib/rust-1.80/bin/clippy-driver /usr/bin/clippy-driver-1.80
+
+None of these packages set their tools as defaults; therefore they should be
+specified explicitly, e.g.::
+
+ make LLVM=1 RUSTC=rustc-1.80 RUSTDOC=rustdoc-1.80 RUSTFMT=rustfmt-1.80 \
+ CLIPPY_DRIVER=clippy-driver-1.80 BINDGEN=bindgen-0.65
+
+Alternatively, modify the ``PATH`` variable to place the Rust 1.80 binaries
+first and set ``bindgen`` as the default, e.g.::
+
+ PATH=/usr/lib/rust-1.80/bin:$PATH
+ update-alternatives --install /usr/bin/bindgen bindgen \
+ /usr/bin/bindgen-0.65 100
+ update-alternatives --set bindgen /usr/bin/bindgen-0.65
``RUST_LIB_SRC`` needs to be set when using the versioned packages, e.g.::
RUST_LIB_SRC=/usr/src/rustc-$(rustc-1.80 --version | cut -d' ' -f2)/library
+For convenience, ``RUST_LIB_SRC`` can be exported to the global environment.
+
In addition, ``bindgen-0.65`` is available in newer releases (24.04 LTS and
24.10), but it may not be available in older ones (20.04 LTS and 22.04 LTS),
thus ``bindgen`` may need to be built manually (please see below).
diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst
index f692494f7b74..f43cb77bcc69 100644
--- a/Documentation/rust/testing.rst
+++ b/Documentation/rust/testing.rst
@@ -133,13 +133,85 @@ please see:
The ``#[test]`` tests
---------------------
-Additionally, there are the ``#[test]`` tests. These can be run using the
-``rusttest`` Make target::
+Additionally, there are the ``#[test]`` tests. Like for documentation tests,
+these are also fairly similar to what you would expect from userspace, and they
+are also mapped to KUnit.
+
+These tests are introduced by the ``kunit_tests`` procedural macro, which takes
+the name of the test suite as an argument.
+
+For instance, assume we want to test the function ``f`` from the documentation
+tests section. We could write, in the same file where we have our function:
+
+.. code-block:: rust
+
+ #[kunit_tests(rust_kernel_mymod)]
+ mod tests {
+ use super::*;
+
+ #[test]
+ fn test_f() {
+ assert_eq!(f(10, 20), 30);
+ }
+ }
+
+And if we run it, the kernel log would look like::
+
+ KTAP version 1
+ # Subtest: rust_kernel_mymod
+ # speed: normal
+ 1..1
+ # test_f.speed: normal
+ ok 1 test_f
+ ok 1 rust_kernel_mymod
+
+Like documentation tests, the ``assert!`` and ``assert_eq!`` macros are mapped
+back to KUnit and do not panic. Similarly, the
+`? <https://doc.rust-lang.org/reference/expressions/operator-expr.html#the-question-mark-operator>`_
+operator is supported, i.e. the test functions may return either nothing (i.e.
+the unit type ``()``) or ``Result`` (i.e. any ``Result<T, E>``). For instance:
+
+.. code-block:: rust
+
+ #[kunit_tests(rust_kernel_mymod)]
+ mod tests {
+ use super::*;
+
+ #[test]
+ fn test_g() -> Result {
+ let x = g()?;
+ assert_eq!(x, 30);
+ Ok(())
+ }
+ }
+
+If we run the test and the call to ``g`` fails, then the kernel log would show::
+
+ KTAP version 1
+ # Subtest: rust_kernel_mymod
+ # speed: normal
+ 1..1
+ # test_g: ASSERTION FAILED at rust/kernel/lib.rs:335
+ Expected is_test_result_ok(test_g()) to be true, but is false
+ # test_g.speed: normal
+ not ok 1 test_g
+ not ok 1 rust_kernel_mymod
+
+If a ``#[test]`` test could be useful as an example for the user, then please
+use a documentation test instead. Even edge cases of an API, e.g. error or
+boundary cases, can be interesting to show in examples.
+
+The ``rusttest`` host tests
+---------------------------
+
+These are userspace tests that can be built and run in the host (i.e. the one
+that performs the kernel build) using the ``rusttest`` Make target::
make LLVM=1 rusttest
-This requires the kernel ``.config``. It runs the ``#[test]`` tests on the host
-(currently) and thus is fairly limited in what these tests can test.
+This requires the kernel ``.config``.
+
+Currently, they are mostly used for testing the ``macros`` crate's examples.
The Kselftests
--------------
diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
index 584000b743f3..c37e8e594d12 100644
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -147,18 +147,12 @@ The image hostname will be set to the same as the host on which you
are creating its image. It is a good idea to change that to avoid
"Oh, bummer, I rebooted the wrong machine".
-UML supports two classes of network devices - the older uml_net ones
-which are scheduled for obsoletion. These are called ethX. It also
-supports the newer vector IO devices which are significantly faster
-and have support for some standard virtual network encapsulations like
-Ethernet over GRE and Ethernet over L2TPv3. These are called vec0.
+UML supports vector I/O high performance network devices which have
+support for some standard virtual network encapsulations like
+Ethernet over GRE and Ethernet over L2TPv3. These are called vecX.
-Depending on which one is in use, ``/etc/network/interfaces`` will
-need entries like::
-
- # legacy UML network devices
- auto eth0
- iface eth0 inet dhcp
+When vector network devices are in use, ``/etc/network/interfaces``
+will need entries like::
# vector UML network devices
auto vec0
@@ -219,16 +213,6 @@ remote UML and other VM instances.
+-----------+--------+------------------------------------+------------+
| vde | vector | dep. on VDE VPN: Virt.Net Locator | varies |
+-----------+--------+------------------------------------+------------+
-| tuntap | legacy | none | ~ 500Mbit |
-+-----------+--------+------------------------------------+------------+
-| daemon | legacy | none | ~ 450Mbit |
-+-----------+--------+------------------------------------+------------+
-| socket | legacy | none | ~ 450Mbit |
-+-----------+--------+------------------------------------+------------+
-| ethertap | legacy | obsolete | ~ 500Mbit |
-+-----------+--------+------------------------------------+------------+
-| vde | legacy | obsolete | ~ 500Mbit |
-+-----------+--------+------------------------------------+------------+
* All transports which have tso and checksum offloads can deliver speeds
approaching 10G on TCP streams.
@@ -236,27 +220,16 @@ remote UML and other VM instances.
* All transports which have multi-packet rx and/or tx can deliver pps
rates of up to 1Mps or more.
-* All legacy transports are generally limited to ~600-700MBit and 0.05Mps.
-
* GRE and L2TPv3 allow connections to all of: local machine, remote
machines, remote network devices and remote UML instances.
-* Socket allows connections only between UML instances.
-
-* Daemon and bess require running a local switch. This switch may be
- connected to the host as well.
-
Network configuration privileges
================================
The majority of the supported networking modes need ``root`` privileges.
-For example, in the legacy tuntap networking mode, users were required
-to be part of the group associated with the tunnel device.
-
-For newer network drivers like the vector transports, ``root`` privilege
-is required to fire an ioctl to setup the tun interface and/or use
-raw sockets where needed.
+For example, for vector transports, ``root`` privilege is required to fire
+an ioctl to setup the tun interface and/or use raw sockets where needed.
This can be achieved by granting the user a particular capability instead
of running UML as root. In case of vector transport, a user can add the
@@ -610,12 +583,6 @@ connect to a local area cloud (all the UML nodes using the same
multicast address running on hosts in the same multicast domain (LAN)
will be automagically connected together to a virtual LAN.
-Configuring Legacy transports
-=============================
-
-Legacy transports are now considered obsolete. Please use the vector
-versions.
-
***********
Running UML
***********
diff --git a/MAINTAINERS b/MAINTAINERS
index e4b4dbaa7348..f2668b81115c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6374,11 +6374,20 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
F: Documentation/crypto/
F: Documentation/devicetree/bindings/crypto/
F: arch/*/crypto/
-F: arch/*/lib/crypto/
F: crypto/
F: drivers/crypto/
F: include/crypto/
F: include/linux/crypto*
+
+CRYPTO LIBRARY
+M: Eric Biggers <ebiggers@kernel.org>
+M: Jason A. Donenfeld <Jason@zx2c4.com>
+M: Ard Biesheuvel <ardb@kernel.org>
+L: linux-crypto@vger.kernel.org
+S: Maintained
+T: git https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git libcrypto-next
+T: git https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git libcrypto-fixes
+F: arch/*/lib/crypto/
F: lib/crypto/
CRYPTO SPEED TEST COMPARE
@@ -10719,20 +10728,23 @@ F: kernel/time/timer_list.c
F: kernel/time/timer_migration.*
F: tools/testing/selftests/timers/
-HIGH-RESOLUTION TIMERS [RUST]
+DELAY, SLEEP, TIMEKEEPING, TIMERS [RUST]
M: Andreas Hindborg <a.hindborg@kernel.org>
R: Boqun Feng <boqun.feng@gmail.com>
+R: FUJITA Tomonori <fujita.tomonori@gmail.com>
R: Frederic Weisbecker <frederic@kernel.org>
R: Lyude Paul <lyude@redhat.com>
R: Thomas Gleixner <tglx@linutronix.de>
R: Anna-Maria Behnsen <anna-maria@linutronix.de>
+R: John Stultz <jstultz@google.com>
+R: Stephen Boyd <sboyd@kernel.org>
L: rust-for-linux@vger.kernel.org
S: Supported
W: https://rust-for-linux.com
B: https://github.com/Rust-for-Linux/linux/issues
-T: git https://github.com/Rust-for-Linux/linux.git hrtimer-next
-F: rust/kernel/time/hrtimer.rs
-F: rust/kernel/time/hrtimer/
+T: git https://github.com/Rust-for-Linux/linux.git timekeeping-next
+F: rust/kernel/time.rs
+F: rust/kernel/time/
HIGH-SPEED SCC DRIVER FOR AX.25
L: linux-hams@vger.kernel.org
@@ -10778,7 +10790,7 @@ F: net/dsa/tag_hellcreek.c
HISILICON DMA DRIVER
M: Zhou Wang <wangzhou1@hisilicon.com>
-M: Jie Hai <haijie1@huawei.com>
+M: Longfang Liu <liulongfang@huawei.com>
L: dmaengine@vger.kernel.org
S: Maintained
F: drivers/dma/hisi_dma.c
@@ -21588,7 +21600,7 @@ M: Alex Gaynor <alex.gaynor@gmail.com>
R: Boqun Feng <boqun.feng@gmail.com>
R: Gary Guo <gary@garyguo.net>
R: Björn Roy Baron <bjorn3_gh@protonmail.com>
-R: Benno Lossin <benno.lossin@proton.me>
+R: Benno Lossin <lossin@kernel.org>
R: Andreas Hindborg <a.hindborg@kernel.org>
R: Alice Ryhl <aliceryhl@google.com>
R: Trevor Gross <tmgross@umich.edu>
@@ -21618,7 +21630,7 @@ F: rust/kernel/alloc.rs
F: rust/kernel/alloc/
RUST [PIN-INIT]
-M: Benno Lossin <benno.lossin@proton.me>
+M: Benno Lossin <lossin@kernel.org>
L: rust-for-linux@vger.kernel.org
S: Maintained
W: https://rust-for-linux.com/pin-init
@@ -25153,13 +25165,12 @@ L: linux-parisc@vger.kernel.org
S: Orphan
F: drivers/net/ethernet/dec/tulip/
-TUN/TAP driver
+TUN/TAP DRIVER
M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
M: Jason Wang <jasowang@redhat.com>
S: Maintained
W: http://vtun.sourceforge.net/tun
F: Documentation/networking/tuntap.rst
-F: arch/um/os-Linux/drivers/
F: drivers/net/tap.c
F: drivers/net/tun*
@@ -26829,6 +26840,17 @@ F: lib/test_xarray.c
F: lib/xarray.c
F: tools/testing/radix-tree
+XARRAY API [RUST]
+M: Tamir Duberstein <tamird@gmail.com>
+M: Andreas Hindborg <a.hindborg@kernel.org>
+L: rust-for-linux@vger.kernel.org
+S: Supported
+W: https://rust-for-linux.com
+B: https://github.com/Rust-for-Linux/linux/issues
+C: https://rust-for-linux.zulipchat.com
+T: git https://github.com/Rust-for-Linux/linux.git xarray-next
+F: rust/kernel/xarray.rs
+
XBOX DVD IR REMOTE
M: Benjamin Valentin <benpicco@googlemail.com>
S: Maintained
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
index d37559762180..be08a8da046f 100644
--- a/arch/arm/include/asm/simd.h
+++ b/arch/arm/include/asm/simd.h
@@ -8,7 +8,8 @@
static __must_check inline bool may_use_simd(void)
{
- return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq();
+ return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq()
+ && !irqs_disabled();
}
#endif /* _ASM_SIMD_H */
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 748698e91a4b..27e64f782cb3 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -515,7 +515,5 @@ void __init early_ioremap_init(void)
bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
unsigned long flags)
{
- unsigned long pfn = PHYS_PFN(offset);
-
- return memblock_is_map_memory(pfn);
+ return memblock_is_map_memory(offset);
}
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 7803d50b90f8..e559ad3cd148 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -877,6 +877,7 @@ void kernel_neon_begin(void)
* the kernel mode NEON register contents never need to be preserved.
*/
BUG_ON(in_hardirq());
+ BUG_ON(irqs_disabled());
cpu = __smp_processor_id();
fpexc = fmrx(FPEXC) | FPEXC_EN;
diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
index 18ab5639b301..0f3501951409 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
@@ -280,6 +280,171 @@
resets = <&cpg 0x30>;
};
+ dmac0: dma-controller@11400000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x11400000 0 0x10000>;
+ interrupts = <GIC_SPI 499 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 91 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 92 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 94 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 95 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 96 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 97 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 98 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 99 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 100 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 101 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 102 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 103 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 104 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x0>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x31>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 4>;
+ };
+
+ dmac1: dma-controller@14830000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x14830000 0 0x10000>;
+ interrupts = <GIC_SPI 495 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 27 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 28 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 29 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 30 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 31 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 32 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 33 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 40 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x1>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x32>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 0>;
+ };
+
+ dmac2: dma-controller@14840000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x14840000 0 0x10000>;
+ interrupts = <GIC_SPI 496 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 41 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 42 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 43 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 44 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 45 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 46 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 47 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 48 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 49 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 50 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 51 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 52 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 53 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 54 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 55 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 56 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x2>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x33>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 1>;
+ };
+
+ dmac3: dma-controller@12000000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x12000000 0 0x10000>;
+ interrupts = <GIC_SPI 497 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 57 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 58 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 60 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 61 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 62 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 63 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 64 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 65 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 66 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 67 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 68 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 69 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 70 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 71 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 72 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x3>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x34>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 2>;
+ };
+
+ dmac4: dma-controller@12010000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x12010000 0 0x10000>;
+ interrupts = <GIC_SPI 498 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 73 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 76 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 78 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 80 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 81 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 82 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 83 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 84 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 85 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 86 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 87 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 88 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x4>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x35>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 3>;
+ };
+
ostm0: timer@11800000 {
compatible = "renesas,r9a09g057-ostm", "renesas,ostm";
reg = <0x0 0x11800000 0x0 0x1000>;
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 1e7c7475e43f..ba5df0df02a4 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -298,19 +298,6 @@
.Lskip_gcs_\@:
.endm
-.macro __init_el2_mpam
- /* Memory Partitioning And Monitoring: disable EL2 traps */
- mrs x1, id_aa64pfr0_el1
- ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4
- cbz x0, .Lskip_mpam_\@ // skip if no MPAM
- msr_s SYS_MPAM2_EL2, xzr // use the default partition
- // and disable lower traps
- mrs_s x0, SYS_MPAMIDR_EL1
- tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
- msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
-.Lskip_mpam_\@:
-.endm
-
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2. Note that everything gets initialised as
@@ -328,7 +315,6 @@
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
- __init_el2_mpam
__init_el2_nvhe_idregs
__init_el2_cptr
__init_el2_fgt
@@ -375,6 +361,16 @@
#endif
.macro finalise_el2_state
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2
+
+.Linit_mpam_\@:
+ msr_s SYS_MPAM2_EL2, xzr // use the default partition
+ // and disable lower traps
+ mrs_s x0, SYS_MPAMIDR_EL1
+ tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
+ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
+
+.Lskip_mpam_\@:
check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
.Linit_sve_\@: /* SVE register access */
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 9e93733523f6..74a4f738c5f5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -58,7 +58,7 @@
#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ EARLY_SEGMENT_EXTRA_PAGES))
-#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1))
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index eba1a98657f1..aa9efee17277 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -323,13 +323,14 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
}
/*
- * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
- * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
- * flush_tlb_batched_pending().
+ * If mprotect/munmap/etc occurs during TLB batched flushing, we need to ensure
+ * all the previously issued TLBIs targeting mm have completed. But since we
+ * can be executing on a remote CPU, a DSB cannot guarantee this like it can
+ * for arch_tlbbatch_flush(). Our only option is to flush the entire mm.
*/
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
- dsb(ish);
+ flush_tlb_mm(mm);
}
/*
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 45ea79cacf46..b34044e20128 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1199,8 +1199,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
cpacr_restore(cpacr);
}
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
+ }
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
@@ -1453,7 +1455,8 @@ void update_cpu_features(int cpu,
cpacr_restore(cpacr);
}
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) {
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
info->reg_mpamidr, boot->reg_mpamidr);
}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 94525abd1c22..c1f2b6b04b41 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -496,8 +496,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
- info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ /*
+ * info->reg_mpamidr deferred to {init,update}_cpu_features because we
+ * don't want to read it (and trigger a trap on buggy firmware) if
+ * using an aa64pfr0_el1 override to unconditionally disable MPAM.
+ */
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 5a69b6eb4090..714b0b5ec5ac 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,6 +10,10 @@
#error This file should only be included in vmlinux.lds.S
#endif
+#if defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 210000
+#define ASSERT(...)
+#endif
+
#define PI_EXPORT_SYM(sym) \
__PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code)
#define __PI_EXPORT_SYM(sym, pisym, msg)\
@@ -140,4 +144,17 @@ KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
_kernel_codesize = ABSOLUTE(__inittext_end - _text);
#endif
+/*
+ * LLD will occasionally error out with a '__init_end does not converge' error
+ * if INIT_IDMAP_DIR_SIZE is defined in terms of _end, as this results in a
+ * circular dependency. Counter this by dimensioning the initial IDMAP page
+ * tables based on kimage_limit, which is defined such that its value should
+ * not change as a result of the initdata segment being pushed over a 64k
+ * segment boundary due to changes in INIT_IDMAP_DIR_SIZE, provided that its
+ * value doesn't change by more than 2M between linker passes.
+ */
+kimage_limit = ALIGN(ABSOLUTE(_end + SZ_64K), SZ_2M);
+
+#undef ASSERT
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index c6b185b885f7..bc57b290e5e7 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -127,6 +127,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = {
.fields = {
FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL),
+ FIELD("mpam", ID_AA64PFR0_EL1_MPAM_SHIFT, NULL),
{}
},
};
@@ -154,6 +155,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = {
FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ FIELD("mpam_frac", ID_AA64PFR1_EL1_MPAM_frac_SHIFT, NULL),
{}
},
};
@@ -246,6 +248,7 @@ static const struct {
{ "rodata=off", "arm64_sw.rodataoff=1" },
{ "arm64.nolva", "id_aa64mmfr2.varange=0" },
{ "arm64.no32bit_el0", "id_aa64pfr0.el0=1" },
+ { "arm64.nompam", "id_aa64pfr0.mpam=0 id_aa64pfr1.mpam_frac=0" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 79509c7f39de..f08e8a7fac93 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -52,13 +52,7 @@ config NO_IOMEM
config UML_IOMEM_EMULATION
bool
select INDIRECT_IOMEM
- select HAS_IOPORT
select GENERIC_PCI_IOMAP
- select GENERIC_IOMAP
- select NO_GENERIC_PCI_IOPORT_MAP
-
-config NO_IOPORT_MAP
- def_bool !UML_IOMEM_EMULATION
config ISA
bool
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index 1ffa088739f4..29d9666eceae 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -52,13 +52,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-CONFIG_UML_NET_MCAST=y
-CONFIG_UML_NET_SLIRP=y
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_AUTOFS_FS=m
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
index 03b10d3f6816..cf309c5406a2 100644
--- a/arch/um/configs/x86_64_defconfig
+++ b/arch/um/configs/x86_64_defconfig
@@ -51,13 +51,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-CONFIG_UML_NET_MCAST=y
-CONFIG_UML_NET_SLIRP=y
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_AUTOFS_FS=m
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 9cb196070614..34085bfc6d41 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -124,206 +124,18 @@ endmenu
menu "UML Network Devices"
depends on NET
-# UML virtual driver
-config UML_NET
- bool "Virtual network device"
- help
- While the User-Mode port cannot directly talk to any physical
- hardware devices, this choice and the following transport options
- provide one or more virtual network devices through which the UML
- kernels can talk to each other, the host, and with the host's help,
- machines on the outside world.
-
- For more information, including explanations of the networking and
- sample configurations, see
- <http://user-mode-linux.sourceforge.net/old/networking.html>.
-
- If you'd like to be able to enable networking in the User-Mode
- linux environment, say Y; otherwise say N. Note that you must
- enable at least one of the following transport options to actually
- make use of UML networking.
-
-config UML_NET_ETHERTAP
- bool "Ethertap transport (obsolete)"
- depends on UML_NET
- help
- The Ethertap User-Mode Linux network transport allows a single
- running UML to exchange packets with its host over one of the
- host's Ethertap devices, such as /dev/tap0. Additional running
- UMLs can use additional Ethertap devices, one per running UML.
- While the UML believes it's on a (multi-device, broadcast) virtual
- Ethernet network, it's in fact communicating over a point-to-point
- link with the host.
-
- To use this, your host kernel must have support for Ethertap
- devices. Also, if your host kernel is 2.4.x, it must have
- CONFIG_NETLINK_DEV configured as Y or M.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Ethertap
- networking.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_TUNTAP
- bool "TUN/TAP transport (obsolete)"
- depends on UML_NET
- help
- The UML TUN/TAP network transport allows a UML instance to exchange
- packets with the host over a TUN/TAP device. This option will only
- work with a 2.4 host, unless you've applied the TUN/TAP patch to
- your 2.2 host kernel.
-
- To use this transport, your host kernel must have support for TUN/TAP
- devices, either built-in or as a module.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_SLIP
- bool "SLIP transport (obsolete)"
- depends on UML_NET
- help
- The slip User-Mode Linux network transport allows a running UML to
- network with its host over a point-to-point link. Unlike Ethertap,
- which can carry any Ethernet frame (and hence even non-IP packets),
- the slip transport can only carry IP packets.
-
- To use this, your host must support slip devices.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html>.
- has examples of the UML command line to use to enable slip
- networking, and details of a few quirks with it.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_DAEMON
- bool "Daemon transport (obsolete)"
- depends on UML_NET
- help
- This User-Mode Linux network transport allows one or more running
- UMLs on a single host to communicate with each other, but not to
- the host.
-
- To use this form of networking, you'll need to run the UML
- networking daemon on the host.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Daemon
- networking.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_DAEMON_DEFAULT_SOCK
- string "Default socket for daemon transport"
- default "/tmp/uml.ctl"
- depends on UML_NET_DAEMON
- help
- This option allows setting the default socket for the daemon
- transport, normally it defaults to /tmp/uml.ctl.
-
config UML_NET_VECTOR
bool "Vector I/O high performance network devices"
- depends on UML_NET
select MAY_HAVE_RUNTIME_DEPS
help
This User-Mode Linux network driver uses multi-message send
and receive functions. The host running the UML guest must have
a linux kernel version above 3.0 and a libc version > 2.13.
- This driver provides tap, raw, gre and l2tpv3 network transports
- with up to 4 times higher network throughput than the UML network
- drivers.
-
-config UML_NET_VDE
- bool "VDE transport (obsolete)"
- depends on UML_NET
- depends on !MODVERSIONS
- select MAY_HAVE_RUNTIME_DEPS
- help
- This User-Mode Linux network transport allows one or more running
- UMLs on a single host to communicate with each other and also
- with the rest of the world using Virtual Distributed Ethernet,
- an improved fork of uml_switch.
+ This driver provides tap, raw, gre and l2tpv3 network transports.
- You must have libvdeplug installed in order to build the vde
- transport into UML.
-
- To use this form of networking, you will need to run vde_switch
- on the host.
-
- For more information, see <http://wiki.virtualsquare.org/>
- That site has a good overview of what VDE is and also examples
- of the UML command line to use to enable VDE networking.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_MCAST
- bool "Multicast transport (obsolete)"
- depends on UML_NET
- help
- This Multicast User-Mode Linux network transport allows multiple
- UMLs (even ones running on different host machines!) to talk to
- each other over a virtual ethernet network. However, it requires
- at least one UML with one of the other transports to act as a
- bridge if any of them need to be able to talk to their hosts or any
- other IP machines.
-
- To use this, your host kernel(s) must support IP Multicasting.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Multicast
- networking, and notes about the security of this approach.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_SLIRP
- bool "SLiRP transport (obsolete)"
- depends on UML_NET
- help
- The SLiRP User-Mode Linux network transport allows a running UML
- to network by invoking a program that can handle SLIP encapsulated
- packets. This is commonly (but not limited to) the application
- known as SLiRP, a program that can re-socket IP packets back onto
- he host on which it is run. Only IP packets are supported,
- unlike other network transports that can handle all Ethernet
- frames. In general, slirp allows the UML the same IP connectivity
- to the outside world that the host user is permitted, and unlike
- other transports, SLiRP works without the need of root level
- privileges, setuid binaries, or SLIP devices on the host. This
- also means not every type of connection is possible, but most
- situations can be accommodated with carefully crafted slirp
- commands that can be passed along as part of the network device's
- setup string. The effect of this transport on the UML is similar
- that of a host behind a firewall that masquerades all network
- connections passing through it (but is less secure).
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
- Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
+ For more information, including explanations of the networking
+ and sample configurations, see
+ <file:Documentation/virt/uml/user_mode_linux_howto_v2.rst>.
endmenu
@@ -367,3 +179,11 @@ config UML_PCI_OVER_VIRTIO_DEVICE_ID
There's no official device ID assigned (yet), set the one you
wish to use for experimentation here. The default of -1 is
not valid and will cause the driver to fail at probe.
+
+config UML_PCI_OVER_VFIO
+ bool "Enable VFIO-based PCI passthrough"
+ select UML_PCI
+ help
+ This driver provides support for VFIO-based PCI passthrough.
+ Currently, only MSI-X capable devices are supported, and it
+ is assumed that drivers will use MSI-X.
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index 0a5820343ad3..6bf8cbf71d3c 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -6,12 +6,7 @@
# pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked
# in to pcap.o
-slip-objs := slip_kern.o slip_user.o
-slirp-objs := slirp_kern.o slirp_user.o
-daemon-objs := daemon_kern.o daemon_user.o
vector-objs := vector_kern.o vector_user.o vector_transports.o
-umcast-objs := umcast_kern.o umcast_user.o
-net-objs := net_kern.o net_user.o
mconsole-objs := mconsole_kern.o mconsole_user.o
hostaudio-objs := hostaudio_kern.o
ubd-objs := ubd_kern.o ubd_user.o
@@ -19,13 +14,7 @@ port-objs := port_kern.o port_user.o
harddog-objs := harddog_kern.o
harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
rtc-objs := rtc_kern.o rtc_user.o
-
-LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
-
-targets := vde_kern.o vde_user.o
-
-$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
- $(LD) -r -dp -o $@ $^ $(ld_flags)
+vfio_uml-objs := vfio_kern.o vfio_user.o
#XXX: The call below does not work because the flags are added before the
# object name, so nothing from the library gets linked.
@@ -38,13 +27,7 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o
obj-$(CONFIG_SSL) += ssl.o
obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
-obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
-obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
-obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_VECTOR) += vector.o
-obj-$(CONFIG_UML_NET_VDE) += vde.o
-obj-$(CONFIG_UML_NET_MCAST) += umcast.o
-obj-$(CONFIG_UML_NET) += net.o
obj-$(CONFIG_MCONSOLE) += mconsole.o
obj-$(CONFIG_MMAPPER) += mmapper_kern.o
obj-$(CONFIG_BLK_DEV_UBD) += ubd.o
@@ -62,9 +45,10 @@ obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI) += virt-pci.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o
+obj-$(CONFIG_UML_PCI_OVER_VFIO) += vfio_uml.o
# pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index e78a99816c86..26442db7d608 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -212,7 +212,7 @@ int enable_chan(struct line *line)
* be permanently disabled. This is discovered in IRQ context, but
* the freeing of the IRQ must be done later.
*/
-static DEFINE_SPINLOCK(irqs_to_free_lock);
+static DEFINE_RAW_SPINLOCK(irqs_to_free_lock);
static LIST_HEAD(irqs_to_free);
void free_irqs(void)
@@ -222,9 +222,9 @@ void free_irqs(void)
struct list_head *ele;
unsigned long flags;
- spin_lock_irqsave(&irqs_to_free_lock, flags);
+ raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
list_splice_init(&irqs_to_free, &list);
- spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+ raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
list_for_each(ele, &list) {
chan = list_entry(ele, struct chan, free_list);
@@ -246,9 +246,9 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
return;
if (delay_free_irq) {
- spin_lock_irqsave(&irqs_to_free_lock, flags);
+ raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
list_add(&chan->free_list, &irqs_to_free);
- spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+ raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
} else {
if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan);
diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h
deleted file mode 100644
index 1509cc7eb907..000000000000
--- a/arch/um/drivers/daemon.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DAEMON_H__
-#define __DAEMON_H__
-
-#include <net_user.h>
-
-#define SWITCH_VERSION 3
-
-struct daemon_data {
- char *sock_type;
- char *ctl_sock;
- void *ctl_addr;
- void *data_addr;
- void *local_addr;
- int fd;
- int control;
- void *dev;
-};
-
-extern const struct net_user_info daemon_user_info;
-
-extern int daemon_user_write(int fd, void *buf, int len,
- struct daemon_data *pri);
-
-#endif
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
deleted file mode 100644
index afde1e82c056..000000000000
--- a/arch/um/drivers/daemon_kern.c
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "daemon.h"
-
-struct daemon_init {
- char *sock_type;
- char *ctl_sock;
-};
-
-static void daemon_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct daemon_data *dpri;
- struct daemon_init *init = data;
-
- pri = netdev_priv(dev);
- dpri = (struct daemon_data *) pri->user;
- dpri->sock_type = init->sock_type;
- dpri->ctl_sock = init->ctl_sock;
- dpri->fd = -1;
- dpri->control = -1;
- dpri->dev = dev;
- /* We will free this pointer. If it contains crap we're burned. */
- dpri->ctl_addr = NULL;
- dpri->data_addr = NULL;
- dpri->local_addr = NULL;
-
- printk("daemon backend (uml_switch version %d) - %s:%s",
- SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock);
- printk("\n");
-}
-
-static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return daemon_user_write(fd, skb->data, skb->len,
- (struct daemon_data *) &lp->user);
-}
-
-static const struct net_kern_info daemon_kern_info = {
- .init = daemon_init,
- .protocol = eth_protocol,
- .read = daemon_read,
- .write = daemon_write,
-};
-
-static int daemon_setup(char *str, char **mac_out, void *data)
-{
- struct daemon_init *init = data;
- char *remain;
-
- *init = ((struct daemon_init)
- { .sock_type = "unix",
- .ctl_sock = CONFIG_UML_NET_DAEMON_DEFAULT_SOCK });
-
- remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock,
- NULL);
- if (remain != NULL)
- printk(KERN_WARNING "daemon_setup : Ignoring data socket "
- "specification\n");
-
- return 1;
-}
-
-static struct transport daemon_transport = {
- .list = LIST_HEAD_INIT(daemon_transport.list),
- .name = "daemon",
- .setup = daemon_setup,
- .user = &daemon_user_info,
- .kern = &daemon_kern_info,
- .private_size = sizeof(struct daemon_data),
- .setup_size = sizeof(struct daemon_init),
-};
-
-static int register_daemon(void)
-{
- register_transport(&daemon_transport);
- return 0;
-}
-
-late_initcall(register_daemon);
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
deleted file mode 100644
index 785baedc3555..000000000000
--- a/arch/um/drivers/daemon_user.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/un.h>
-#include "daemon.h"
-#include <net_user.h>
-#include <os.h>
-#include <um_malloc.h>
-
-enum request_type { REQ_NEW_CONTROL };
-
-#define SWITCH_MAGIC 0xfeedface
-
-struct request_v3 {
- uint32_t magic;
- uint32_t version;
- enum request_type type;
- struct sockaddr_un sock;
-};
-
-static struct sockaddr_un *new_addr(void *name, int len)
-{
- struct sockaddr_un *sun;
-
- sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
- if (sun == NULL) {
- printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
- "failed\n");
- return NULL;
- }
- sun->sun_family = AF_UNIX;
- memcpy(sun->sun_path, name, len);
- return sun;
-}
-
-static int connect_to_switch(struct daemon_data *pri)
-{
- struct sockaddr_un *ctl_addr = pri->ctl_addr;
- struct sockaddr_un *local_addr = pri->local_addr;
- struct sockaddr_un *sun;
- struct request_v3 req;
- int fd, n, err;
-
- pri->control = socket(AF_UNIX, SOCK_STREAM, 0);
- if (pri->control < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : control socket failed, "
- "errno = %d\n", -err);
- return err;
- }
-
- if (connect(pri->control, (struct sockaddr *) ctl_addr,
- sizeof(*ctl_addr)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : control connect failed, "
- "errno = %d\n", -err);
- goto out;
- }
-
- fd = socket(AF_UNIX, SOCK_DGRAM, 0);
- if (fd < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : data socket failed, "
- "errno = %d\n", -err);
- goto out;
- }
- if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : data bind failed, "
- "errno = %d\n", -err);
- goto out_close;
- }
-
- sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
- if (sun == NULL) {
- printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
- "failed\n");
- err = -ENOMEM;
- goto out_close;
- }
-
- req.magic = SWITCH_MAGIC;
- req.version = SWITCH_VERSION;
- req.type = REQ_NEW_CONTROL;
- req.sock = *local_addr;
- n = write(pri->control, &req, sizeof(req));
- if (n != sizeof(req)) {
- printk(UM_KERN_ERR "daemon_open : control setup request "
- "failed, err = %d\n", -errno);
- err = -ENOTCONN;
- goto out_free;
- }
-
- n = read(pri->control, sun, sizeof(*sun));
- if (n != sizeof(*sun)) {
- printk(UM_KERN_ERR "daemon_open : read of data socket failed, "
- "err = %d\n", -errno);
- err = -ENOTCONN;
- goto out_free;
- }
-
- pri->data_addr = sun;
- return fd;
-
- out_free:
- kfree(sun);
- out_close:
- close(fd);
- out:
- close(pri->control);
- return err;
-}
-
-static int daemon_user_init(void *data, void *dev)
-{
- struct daemon_data *pri = data;
- struct timeval tv;
- struct {
- char zero;
- int pid;
- int usecs;
- } name;
-
- if (!strcmp(pri->sock_type, "unix"))
- pri->ctl_addr = new_addr(pri->ctl_sock,
- strlen(pri->ctl_sock) + 1);
- name.zero = 0;
- name.pid = os_getpid();
- gettimeofday(&tv, NULL);
- name.usecs = tv.tv_usec;
- pri->local_addr = new_addr(&name, sizeof(name));
- pri->dev = dev;
- pri->fd = connect_to_switch(pri);
- if (pri->fd < 0) {
- kfree(pri->local_addr);
- pri->local_addr = NULL;
- return pri->fd;
- }
-
- return 0;
-}
-
-static int daemon_open(void *data)
-{
- struct daemon_data *pri = data;
- return pri->fd;
-}
-
-static void daemon_remove(void *data)
-{
- struct daemon_data *pri = data;
-
- close(pri->fd);
- pri->fd = -1;
- close(pri->control);
- pri->control = -1;
-
- kfree(pri->data_addr);
- pri->data_addr = NULL;
- kfree(pri->ctl_addr);
- pri->ctl_addr = NULL;
- kfree(pri->local_addr);
- pri->local_addr = NULL;
-}
-
-int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri)
-{
- struct sockaddr_un *data_addr = pri->data_addr;
-
- return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info daemon_user_info = {
- .init = daemon_user_init,
- .open = daemon_open,
- .close = NULL,
- .remove = daemon_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
deleted file mode 100644
index d5a9c5aabaec..000000000000
--- a/arch/um/drivers/net_kern.c
+++ /dev/null
@@ -1,889 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/memblock.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/inetdevice.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/rtnetlink.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <init.h>
-#include <irq_kern.h>
-#include <irq_user.h>
-#include "mconsole_kern.h"
-#include <net_kern.h>
-#include <net_user.h>
-
-#define DRIVER_NAME "uml-netdev"
-
-static DEFINE_SPINLOCK(opened_lock);
-static LIST_HEAD(opened);
-
-/*
- * The drop_skb is used when we can't allocate an skb. The
- * packet is read into drop_skb in order to get the data off the
- * connection to the host.
- * It is reallocated whenever a maximum packet size is seen which is
- * larger than any seen before. update_drop_skb is called from
- * eth_configure when a new interface is added.
- */
-static DEFINE_SPINLOCK(drop_lock);
-static struct sk_buff *drop_skb;
-static int drop_max;
-
-static int update_drop_skb(int max)
-{
- struct sk_buff *new;
- unsigned long flags;
- int err = 0;
-
- spin_lock_irqsave(&drop_lock, flags);
-
- if (max <= drop_max)
- goto out;
-
- err = -ENOMEM;
- new = dev_alloc_skb(max);
- if (new == NULL)
- goto out;
-
- skb_put(new, max);
-
- kfree_skb(drop_skb);
- drop_skb = new;
- drop_max = max;
- err = 0;
-out:
- spin_unlock_irqrestore(&drop_lock, flags);
-
- return err;
-}
-
-static int uml_net_rx(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- int pkt_len;
- struct sk_buff *skb;
-
- /* If we can't allocate memory, try again next round. */
- skb = dev_alloc_skb(lp->max_packet);
- if (skb == NULL) {
- drop_skb->dev = dev;
- /* Read a packet into drop_skb and don't do anything with it. */
- (*lp->read)(lp->fd, drop_skb, lp);
- dev->stats.rx_dropped++;
- return 0;
- }
-
- skb->dev = dev;
- skb_put(skb, lp->max_packet);
- skb_reset_mac_header(skb);
- pkt_len = (*lp->read)(lp->fd, skb, lp);
-
- if (pkt_len > 0) {
- skb_trim(skb, pkt_len);
- skb->protocol = (*lp->protocol)(skb);
-
- dev->stats.rx_bytes += skb->len;
- dev->stats.rx_packets++;
- netif_rx(skb);
- return pkt_len;
- }
-
- kfree_skb(skb);
- return pkt_len;
-}
-
-static void uml_dev_close(struct work_struct *work)
-{
- struct uml_net_private *lp =
- container_of(work, struct uml_net_private, work);
- dev_close(lp->dev);
-}
-
-static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
-{
- struct net_device *dev = dev_id;
- struct uml_net_private *lp = netdev_priv(dev);
- int err;
-
- if (!netif_running(dev))
- return IRQ_NONE;
-
- spin_lock(&lp->lock);
- while ((err = uml_net_rx(dev)) > 0) ;
- if (err < 0) {
- printk(KERN_ERR
- "Device '%s' read returned %d, shutting it down\n",
- dev->name, err);
- /* dev_close can't be called in interrupt context, and takes
- * again lp->lock.
- * And dev_close() can be safely called multiple times on the
- * same device, since it tests for (dev->flags & IFF_UP). So
- * there's no harm in delaying the device shutdown.
- * Furthermore, the workqueue will not re-enqueue an already
- * enqueued work item. */
- schedule_work(&lp->work);
- goto out;
- }
-out:
- spin_unlock(&lp->lock);
- return IRQ_HANDLED;
-}
-
-static int uml_net_open(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- int err;
-
- if (lp->fd >= 0) {
- err = -ENXIO;
- goto out;
- }
-
- lp->fd = (*lp->open)(&lp->user);
- if (lp->fd < 0) {
- err = lp->fd;
- goto out;
- }
-
- err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
- IRQF_SHARED, dev->name, dev);
- if (err < 0) {
- printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
- err = -ENETUNREACH;
- goto out_close;
- }
-
- netif_start_queue(dev);
-
- /* clear buffer - it can happen that the host side of the interface
- * is full when we get here. In this case, new data is never queued,
- * SIGIOs never arrive, and the net never works.
- */
- while ((err = uml_net_rx(dev)) > 0) ;
-
- spin_lock(&opened_lock);
- list_add(&lp->list, &opened);
- spin_unlock(&opened_lock);
-
- return 0;
-out_close:
- if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user);
- lp->fd = -1;
-out:
- return err;
-}
-
-static int uml_net_close(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
-
- netif_stop_queue(dev);
-
- um_free_irq(dev->irq, dev);
- if (lp->close != NULL)
- (*lp->close)(lp->fd, &lp->user);
- lp->fd = -1;
-
- spin_lock(&opened_lock);
- list_del(&lp->list);
- spin_unlock(&opened_lock);
-
- return 0;
-}
-
-static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- unsigned long flags;
- int len;
-
- netif_stop_queue(dev);
-
- spin_lock_irqsave(&lp->lock, flags);
-
- len = (*lp->write)(lp->fd, skb, lp);
- skb_tx_timestamp(skb);
-
- if (len == skb->len) {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
- netif_trans_update(dev);
- netif_start_queue(dev);
-
- /* this is normally done in the interrupt when tx finishes */
- netif_wake_queue(dev);
- }
- else if (len == 0) {
- netif_start_queue(dev);
- dev->stats.tx_dropped++;
- }
- else {
- netif_start_queue(dev);
- printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len);
- }
-
- spin_unlock_irqrestore(&lp->lock, flags);
-
- dev_consume_skb_any(skb);
-
- return NETDEV_TX_OK;
-}
-
-static void uml_net_set_multicast_list(struct net_device *dev)
-{
- return;
-}
-
-static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
-{
- netif_trans_update(dev);
- netif_wake_queue(dev);
-}
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void uml_net_poll_controller(struct net_device *dev)
-{
- disable_irq(dev->irq);
- uml_net_interrupt(dev->irq, dev);
- enable_irq(dev->irq);
-}
-#endif
-
-static void uml_net_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- strscpy(info->driver, DRIVER_NAME);
-}
-
-static const struct ethtool_ops uml_net_ethtool_ops = {
- .get_drvinfo = uml_net_get_drvinfo,
- .get_link = ethtool_op_get_link,
- .get_ts_info = ethtool_op_get_ts_info,
-};
-
-void uml_net_setup_etheraddr(struct net_device *dev, char *str)
-{
- u8 addr[ETH_ALEN];
- char *end;
- int i;
-
- if (str == NULL)
- goto random;
-
- for (i = 0; i < 6; i++) {
- addr[i] = simple_strtoul(str, &end, 16);
- if ((end == str) ||
- ((*end != ':') && (*end != ',') && (*end != '\0'))) {
- printk(KERN_ERR
- "setup_etheraddr: failed to parse '%s' "
- "as an ethernet address\n", str);
- goto random;
- }
- str = end + 1;
- }
- if (is_multicast_ether_addr(addr)) {
- printk(KERN_ERR
- "Attempt to assign a multicast ethernet address to a "
- "device disallowed\n");
- goto random;
- }
- if (!is_valid_ether_addr(addr)) {
- printk(KERN_ERR
- "Attempt to assign an invalid ethernet address to a "
- "device disallowed\n");
- goto random;
- }
- if (!is_local_ether_addr(addr)) {
- printk(KERN_WARNING
- "Warning: Assigning a globally valid ethernet "
- "address to a device\n");
- printk(KERN_WARNING "You should set the 2nd rightmost bit in "
- "the first byte of the MAC,\n");
- printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
- addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4],
- addr[5]);
- }
- eth_hw_addr_set(dev, addr);
- return;
-
-random:
- printk(KERN_INFO
- "Choosing a random ethernet address for device %s\n", dev->name);
- eth_hw_addr_random(dev);
-}
-
-static DEFINE_SPINLOCK(devices_lock);
-static LIST_HEAD(devices);
-
-static struct platform_driver uml_net_driver = {
- .driver = {
- .name = DRIVER_NAME,
- },
-};
-
-static void net_device_release(struct device *dev)
-{
- struct uml_net *device = container_of(dev, struct uml_net, pdev.dev);
- struct net_device *netdev = device->dev;
- struct uml_net_private *lp = netdev_priv(netdev);
-
- if (lp->remove != NULL)
- (*lp->remove)(&lp->user);
- list_del(&device->list);
- kfree(device);
- free_netdev(netdev);
-}
-
-static const struct net_device_ops uml_netdev_ops = {
- .ndo_open = uml_net_open,
- .ndo_stop = uml_net_close,
- .ndo_start_xmit = uml_net_start_xmit,
- .ndo_set_rx_mode = uml_net_set_multicast_list,
- .ndo_tx_timeout = uml_net_tx_timeout,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = uml_net_poll_controller,
-#endif
-};
-
-/*
- * Ensures that platform_driver_register is called only once by
- * eth_configure. Will be set in an initcall.
- */
-static int driver_registered;
-
-static void eth_configure(int n, void *init, char *mac,
- struct transport *transport, gfp_t gfp_mask)
-{
- struct uml_net *device;
- struct net_device *dev;
- struct uml_net_private *lp;
- int err, size;
-
- size = transport->private_size + sizeof(struct uml_net_private);
-
- device = kzalloc(sizeof(*device), gfp_mask);
- if (device == NULL) {
- printk(KERN_ERR "eth_configure failed to allocate struct "
- "uml_net\n");
- return;
- }
-
- dev = alloc_etherdev(size);
- if (dev == NULL) {
- printk(KERN_ERR "eth_configure: failed to allocate struct "
- "net_device for eth%d\n", n);
- goto out_free_device;
- }
-
- INIT_LIST_HEAD(&device->list);
- device->index = n;
-
- /* If this name ends up conflicting with an existing registered
- * netdevice, that is OK, register_netdev{,ice}() will notice this
- * and fail.
- */
- snprintf(dev->name, sizeof(dev->name), "eth%d", n);
-
- uml_net_setup_etheraddr(dev, mac);
-
- printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
-
- lp = netdev_priv(dev);
- /* This points to the transport private data. It's still clear, but we
- * must memset it to 0 *now*. Let's help the drivers. */
- memset(lp, 0, size);
- INIT_WORK(&lp->work, uml_dev_close);
-
- /* sysfs register */
- if (!driver_registered) {
- platform_driver_register(&uml_net_driver);
- driver_registered = 1;
- }
- device->pdev.id = n;
- device->pdev.name = DRIVER_NAME;
- device->pdev.dev.release = net_device_release;
- dev_set_drvdata(&device->pdev.dev, device);
- if (platform_device_register(&device->pdev))
- goto out_free_netdev;
- SET_NETDEV_DEV(dev,&device->pdev.dev);
-
- device->dev = dev;
-
- /*
- * These just fill in a data structure, so there's no failure
- * to be worried about.
- */
- (*transport->kern->init)(dev, init);
-
- *lp = ((struct uml_net_private)
- { .list = LIST_HEAD_INIT(lp->list),
- .dev = dev,
- .fd = -1,
- .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
- .max_packet = transport->user->max_packet,
- .protocol = transport->kern->protocol,
- .open = transport->user->open,
- .close = transport->user->close,
- .remove = transport->user->remove,
- .read = transport->kern->read,
- .write = transport->kern->write,
- .add_address = transport->user->add_address,
- .delete_address = transport->user->delete_address });
-
- spin_lock_init(&lp->lock);
- memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac));
-
- if ((transport->user->init != NULL) &&
- ((*transport->user->init)(&lp->user, dev) != 0))
- goto out_unregister;
-
- dev->mtu = transport->user->mtu;
- dev->netdev_ops = &uml_netdev_ops;
- dev->ethtool_ops = &uml_net_ethtool_ops;
- dev->watchdog_timeo = (HZ >> 1);
- dev->irq = UM_ETH_IRQ;
-
- err = update_drop_skb(lp->max_packet);
- if (err)
- goto out_undo_user_init;
-
- rtnl_lock();
- err = register_netdevice(dev);
- rtnl_unlock();
- if (err)
- goto out_undo_user_init;
-
- spin_lock(&devices_lock);
- list_add(&device->list, &devices);
- spin_unlock(&devices_lock);
-
- return;
-
-out_undo_user_init:
- if (transport->user->remove != NULL)
- (*transport->user->remove)(&lp->user);
-out_unregister:
- platform_device_unregister(&device->pdev);
- return; /* platform_device_unregister frees dev and device */
-out_free_netdev:
- free_netdev(dev);
-out_free_device:
- kfree(device);
-}
-
-static struct uml_net *find_device(int n)
-{
- struct uml_net *device;
- struct list_head *ele;
-
- spin_lock(&devices_lock);
- list_for_each(ele, &devices) {
- device = list_entry(ele, struct uml_net, list);
- if (device->index == n)
- goto out;
- }
- device = NULL;
- out:
- spin_unlock(&devices_lock);
- return device;
-}
-
-static int eth_parse(char *str, int *index_out, char **str_out,
- char **error_out)
-{
- char *end;
- int n, err = -EINVAL;
-
- n = simple_strtoul(str, &end, 0);
- if (end == str) {
- *error_out = "Bad device number";
- return err;
- }
-
- str = end;
- if (*str != '=') {
- *error_out = "Expected '=' after device number";
- return err;
- }
-
- str++;
- if (find_device(n)) {
- *error_out = "Device already configured";
- return err;
- }
-
- *index_out = n;
- *str_out = str;
- return 0;
-}
-
-struct eth_init {
- struct list_head list;
- char *init;
- int index;
-};
-
-static DEFINE_SPINLOCK(transports_lock);
-static LIST_HEAD(transports);
-
-/* Filled in during early boot */
-static LIST_HEAD(eth_cmd_line);
-
-static int check_transport(struct transport *transport, char *eth, int n,
- void **init_out, char **mac_out, gfp_t gfp_mask)
-{
- int len;
-
- len = strlen(transport->name);
- if (strncmp(eth, transport->name, len))
- return 0;
-
- eth += len;
- if (*eth == ',')
- eth++;
- else if (*eth != '\0')
- return 0;
-
- *init_out = kmalloc(transport->setup_size, gfp_mask);
- if (*init_out == NULL)
- return 1;
-
- if (!transport->setup(eth, mac_out, *init_out)) {
- kfree(*init_out);
- *init_out = NULL;
- }
- return 1;
-}
-
-void register_transport(struct transport *new)
-{
- struct list_head *ele, *next;
- struct eth_init *eth;
- void *init;
- char *mac = NULL;
- int match;
-
- spin_lock(&transports_lock);
- BUG_ON(!list_empty(&new->list));
- list_add(&new->list, &transports);
- spin_unlock(&transports_lock);
-
- list_for_each_safe(ele, next, &eth_cmd_line) {
- eth = list_entry(ele, struct eth_init, list);
- match = check_transport(new, eth->init, eth->index, &init,
- &mac, GFP_KERNEL);
- if (!match)
- continue;
- else if (init != NULL) {
- eth_configure(eth->index, init, mac, new, GFP_KERNEL);
- kfree(init);
- }
- list_del(&eth->list);
- }
-}
-
-static int eth_setup_common(char *str, int index)
-{
- struct list_head *ele;
- struct transport *transport;
- void *init;
- char *mac = NULL;
- int found = 0;
-
- spin_lock(&transports_lock);
- list_for_each(ele, &transports) {
- transport = list_entry(ele, struct transport, list);
- if (!check_transport(transport, str, index, &init,
- &mac, GFP_ATOMIC))
- continue;
- if (init != NULL) {
- eth_configure(index, init, mac, transport, GFP_ATOMIC);
- kfree(init);
- }
- found = 1;
- break;
- }
-
- spin_unlock(&transports_lock);
- return found;
-}
-
-static int __init eth_setup(char *str)
-{
- struct eth_init *new;
- char *error;
- int n, err;
-
- err = eth_parse(str, &n, &str, &error);
- if (err) {
- printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n",
- str, error);
- return 1;
- }
-
- new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
-
- INIT_LIST_HEAD(&new->list);
- new->index = n;
- new->init = str;
-
- list_add_tail(&new->list, &eth_cmd_line);
- return 1;
-}
-
-__setup("eth", eth_setup);
-__uml_help(eth_setup,
-"eth[0-9]+=<transport>,<options>\n"
-" Configure a network device.\n\n"
-);
-
-static int net_config(char *str, char **error_out)
-{
- int n, err;
-
- err = eth_parse(str, &n, &str, error_out);
- if (err)
- return err;
-
- /* This string is broken up and the pieces used by the underlying
- * driver. So, it is freed only if eth_setup_common fails.
- */
- str = kstrdup(str, GFP_KERNEL);
- if (str == NULL) {
- *error_out = "net_config failed to strdup string";
- return -ENOMEM;
- }
- err = !eth_setup_common(str, n);
- if (err)
- kfree(str);
- return err;
-}
-
-static int net_id(char **str, int *start_out, int *end_out)
-{
- char *end;
- int n;
-
- n = simple_strtoul(*str, &end, 0);
- if ((*end != '\0') || (end == *str))
- return -1;
-
- *start_out = n;
- *end_out = n;
- *str = end;
- return n;
-}
-
-static int net_remove(int n, char **error_out)
-{
- struct uml_net *device;
- struct net_device *dev;
- struct uml_net_private *lp;
-
- device = find_device(n);
- if (device == NULL)
- return -ENODEV;
-
- dev = device->dev;
- lp = netdev_priv(dev);
- if (lp->fd > 0)
- return -EBUSY;
- unregister_netdev(dev);
- platform_device_unregister(&device->pdev);
-
- return 0;
-}
-
-static struct mc_device net_mc = {
- .list = LIST_HEAD_INIT(net_mc.list),
- .name = "eth",
- .config = net_config,
- .get_config = NULL,
- .id = net_id,
- .remove = net_remove,
-};
-
-#ifdef CONFIG_INET
-static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *dev = ifa->ifa_dev->dev;
- struct uml_net_private *lp;
- void (*proc)(unsigned char *, unsigned char *, void *);
- unsigned char addr_buf[4], netmask_buf[4];
-
- if (dev->netdev_ops->ndo_open != uml_net_open)
- return NOTIFY_DONE;
-
- lp = netdev_priv(dev);
-
- proc = NULL;
- switch (event) {
- case NETDEV_UP:
- proc = lp->add_address;
- break;
- case NETDEV_DOWN:
- proc = lp->delete_address;
- break;
- }
- if (proc != NULL) {
- memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf));
- memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf));
- (*proc)(addr_buf, netmask_buf, &lp->user);
- }
- return NOTIFY_DONE;
-}
-
-/* uml_net_init shouldn't be called twice on two CPUs at the same time */
-static struct notifier_block uml_inetaddr_notifier = {
- .notifier_call = uml_inetaddr_event,
-};
-
-static void inet_register(void)
-{
- struct list_head *ele;
- struct uml_net_private *lp;
- struct in_device *ip;
- struct in_ifaddr *in;
-
- register_inetaddr_notifier(&uml_inetaddr_notifier);
-
- /* Devices may have been opened already, so the uml_inetaddr_notifier
- * didn't get a chance to run for them. This fakes it so that
- * addresses which have already been set up get handled properly.
- */
- spin_lock(&opened_lock);
- list_for_each(ele, &opened) {
- lp = list_entry(ele, struct uml_net_private, list);
- ip = lp->dev->ip_ptr;
- if (ip == NULL)
- continue;
- in = ip->ifa_list;
- while (in != NULL) {
- uml_inetaddr_event(NULL, NETDEV_UP, in);
- in = in->ifa_next;
- }
- }
- spin_unlock(&opened_lock);
-}
-#else
-static inline void inet_register(void)
-{
-}
-#endif
-
-static int uml_net_init(void)
-{
- mconsole_register_dev(&net_mc);
- inet_register();
- return 0;
-}
-
-__initcall(uml_net_init);
-
-static void close_devices(void)
-{
- struct list_head *ele;
- struct uml_net_private *lp;
-
- spin_lock(&opened_lock);
- list_for_each(ele, &opened) {
- lp = list_entry(ele, struct uml_net_private, list);
- um_free_irq(lp->dev->irq, lp->dev);
- if ((lp->close != NULL) && (lp->fd >= 0))
- (*lp->close)(lp->fd, &lp->user);
- if (lp->remove != NULL)
- (*lp->remove)(&lp->user);
- }
- spin_unlock(&opened_lock);
-}
-
-__uml_exitcall(close_devices);
-
-void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *,
- void *),
- void *arg)
-{
- struct net_device *dev = d;
- struct in_device *ip = dev->ip_ptr;
- struct in_ifaddr *in;
- unsigned char address[4], netmask[4];
-
- if (ip == NULL) return;
- in = ip->ifa_list;
- while (in != NULL) {
- memcpy(address, &in->ifa_address, sizeof(address));
- memcpy(netmask, &in->ifa_mask, sizeof(netmask));
- (*cb)(address, netmask, arg);
- in = in->ifa_next;
- }
-}
-
-int dev_netmask(void *d, void *m)
-{
- struct net_device *dev = d;
- struct in_device *ip = dev->ip_ptr;
- struct in_ifaddr *in;
- __be32 *mask_out = m;
-
- if (ip == NULL)
- return 1;
-
- in = ip->ifa_list;
- if (in == NULL)
- return 1;
-
- *mask_out = in->ifa_mask;
- return 0;
-}
-
-void *get_output_buffer(int *len_out)
-{
- void *ret;
-
- ret = (void *) __get_free_pages(GFP_KERNEL, 0);
- if (ret) *len_out = PAGE_SIZE;
- else *len_out = 0;
- return ret;
-}
-
-void free_output_buffer(void *buffer)
-{
- free_pages((unsigned long) buffer, 0);
-}
-
-int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out,
- char **gate_addr)
-{
- char *remain;
-
- remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL);
- if (remain != NULL) {
- printk(KERN_ERR "tap_setup_common - Extra garbage on "
- "specification : '%s'\n", remain);
- return 1;
- }
-
- return 0;
-}
-
-unsigned short eth_protocol(struct sk_buff *skb)
-{
- return eth_type_trans(skb, skb->dev);
-}
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
deleted file mode 100644
index 4c9576452ab0..000000000000
--- a/arch/um/drivers/net_user.c
+++ /dev/null
@@ -1,271 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include <um_malloc.h>
-
-int tap_open_common(void *dev, char *gate_addr)
-{
- int tap_addr[4];
-
- if (gate_addr == NULL)
- return 0;
- if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
- &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) {
- printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n",
- gate_addr);
- return -EINVAL;
- }
- return 0;
-}
-
-void tap_check_ips(char *gate_addr, unsigned char *eth_addr)
-{
- int tap_addr[4];
-
- if ((gate_addr != NULL) &&
- (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
- &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) &&
- (eth_addr[0] == tap_addr[0]) &&
- (eth_addr[1] == tap_addr[1]) &&
- (eth_addr[2] == tap_addr[2]) &&
- (eth_addr[3] == tap_addr[3])) {
- printk(UM_KERN_ERR "The tap IP address and the UML eth IP "
- "address must be different\n");
- }
-}
-
-/* Do reliable error handling as this fails frequently enough. */
-void read_output(int fd, char *output, int len)
-{
- int remain, ret, expected;
- char c;
- char *str;
-
- if (output == NULL) {
- output = &c;
- len = sizeof(c);
- }
-
- *output = '\0';
- ret = read(fd, &remain, sizeof(remain));
-
- if (ret != sizeof(remain)) {
- if (ret < 0)
- ret = -errno;
- expected = sizeof(remain);
- str = "length";
- goto err;
- }
-
- while (remain != 0) {
- expected = (remain < len) ? remain : len;
- ret = read(fd, output, expected);
- if (ret != expected) {
- if (ret < 0)
- ret = -errno;
- str = "data";
- goto err;
- }
- remain -= ret;
- }
-
- return;
-
-err:
- if (ret < 0)
- printk(UM_KERN_ERR "read_output - read of %s failed, "
- "errno = %d\n", str, -ret);
- else
- printk(UM_KERN_ERR "read_output - read of %s failed, read only "
- "%d of %d bytes\n", str, ret, expected);
-}
-
-int net_read(int fd, void *buf, int len)
-{
- int n;
-
- n = read(fd, buf, len);
-
- if ((n < 0) && (errno == EAGAIN))
- return 0;
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_recvfrom(int fd, void *buf, int len)
-{
- int n;
-
- CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL));
- if (n < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_write(int fd, void *buf, int len)
-{
- int n;
-
- n = write(fd, buf, len);
-
- if ((n < 0) && (errno == EAGAIN))
- return 0;
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_send(int fd, void *buf, int len)
-{
- int n;
-
- CATCH_EINTR(n = send(fd, buf, len, 0));
- if (n < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_sendto(int fd, void *buf, int len, void *to, int sock_len)
-{
- int n;
-
- CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to,
- sock_len));
- if (n < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-struct change_pre_exec_data {
- int close_me;
- int stdout_fd;
-};
-
-static void change_pre_exec(void *arg)
-{
- struct change_pre_exec_data *data = arg;
-
- close(data->close_me);
- dup2(data->stdout_fd, 1);
-}
-
-static int change_tramp(char **argv, char *output, int output_len)
-{
- int pid, fds[2], err;
- struct change_pre_exec_data pe_data;
-
- err = os_pipe(fds, 1, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n",
- -err);
- return err;
- }
- pe_data.close_me = fds[0];
- pe_data.stdout_fd = fds[1];
- pid = run_helper(change_pre_exec, &pe_data, argv);
-
- if (pid > 0) /* Avoid hang as we won't get data in failure case. */
- read_output(fds[0], output, output_len);
-
- close(fds[0]);
- close(fds[1]);
-
- if (pid > 0)
- helper_wait(pid);
- return pid;
-}
-
-static void change(char *dev, char *what, unsigned char *addr,
- unsigned char *netmask)
-{
- char addr_buf[sizeof("255.255.255.255\0")];
- char netmask_buf[sizeof("255.255.255.255\0")];
- char version[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version, what, dev, addr_buf,
- netmask_buf, NULL };
- char *output;
- int output_len, pid;
-
- sprintf(version, "%d", UML_NET_VERSION);
- sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
- sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1],
- netmask[2], netmask[3]);
-
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- if (output == NULL)
- printk(UM_KERN_ERR "change : failed to allocate output "
- "buffer\n");
-
- pid = change_tramp(argv, output, output_len);
- if (pid < 0) {
- kfree(output);
- return;
- }
-
- if (output != NULL) {
- printk("%s", output);
- kfree(output);
- }
-}
-
-void open_addr(unsigned char *addr, unsigned char *netmask, void *arg)
-{
- change(arg, "add", addr, netmask);
-}
-
-void close_addr(unsigned char *addr, unsigned char *netmask, void *arg)
-{
- change(arg, "del", addr, netmask);
-}
-
-char *split_if_spec(char *str, ...)
-{
- char **arg, *end, *ret = NULL;
- va_list ap;
-
- va_start(ap, str);
- while ((arg = va_arg(ap, char **)) != NULL) {
- if (*str == '\0')
- goto out;
- end = strchr(str, ',');
- if (end != str)
- *arg = str;
- if (end == NULL)
- goto out;
- *end++ = '\0';
- str = end;
- }
- ret = str;
-out:
- va_end(ap);
- return ret;
-}
diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h
deleted file mode 100644
index 0f3b7ca99465..000000000000
--- a/arch/um/drivers/slip.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SLIP_H
-#define __UM_SLIP_H
-
-#include "slip_common.h"
-
-struct slip_data {
- void *dev;
- char name[sizeof("slnnnnn\0")];
- char *addr;
- char *gate_addr;
- int slave;
- struct slip_proto slip;
-};
-
-extern const struct net_user_info slip_user_info;
-
-extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri);
-extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri);
-
-#endif
diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c
deleted file mode 100644
index 20fe4f42743d..000000000000
--- a/arch/um/drivers/slip_common.c
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <string.h>
-#include "slip_common.h"
-#include <net_user.h>
-
-int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip)
-{
- int i, n, size, start;
-
- if(slip->more > 0){
- i = 0;
- while(i < slip->more){
- size = slip_unesc(slip->ibuf[i++], slip->ibuf,
- &slip->pos, &slip->esc);
- if(size){
- memcpy(buf, slip->ibuf, size);
- memmove(slip->ibuf, &slip->ibuf[i],
- slip->more - i);
- slip->more = slip->more - i;
- return size;
- }
- }
- slip->more = 0;
- }
-
- n = net_read(fd, &slip->ibuf[slip->pos],
- sizeof(slip->ibuf) - slip->pos);
- if(n <= 0)
- return n;
-
- start = slip->pos;
- for(i = 0; i < n; i++){
- size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos,
- &slip->esc);
- if(size){
- memcpy(buf, slip->ibuf, size);
- memmove(slip->ibuf, &slip->ibuf[start+i+1],
- n - (i + 1));
- slip->more = n - (i + 1);
- return size;
- }
- }
- return 0;
-}
-
-int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip)
-{
- int actual, n;
-
- actual = slip_esc(buf, slip->obuf, len);
- n = net_write(fd, slip->obuf, actual);
- if(n < 0)
- return n;
- else return len;
-}
diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h
deleted file mode 100644
index d3798b5caf7f..000000000000
--- a/arch/um/drivers/slip_common.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SLIP_COMMON_H
-#define __UM_SLIP_COMMON_H
-
-#define BUF_SIZE 1500
- /* two bytes each for a (pathological) max packet of escaped chars + *
- * terminating END char + initial END char */
-#define ENC_BUF_SIZE (2 * BUF_SIZE + 2)
-
-/* SLIP protocol characters. */
-#define SLIP_END 0300 /* indicates end of frame */
-#define SLIP_ESC 0333 /* indicates byte stuffing */
-#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */
-#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */
-
-static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos,
- int *esc)
-{
- int ret;
-
- switch(c){
- case SLIP_END:
- *esc = 0;
- ret=*pos;
- *pos=0;
- return(ret);
- case SLIP_ESC:
- *esc = 1;
- return(0);
- case SLIP_ESC_ESC:
- if(*esc){
- *esc = 0;
- c = SLIP_ESC;
- }
- break;
- case SLIP_ESC_END:
- if(*esc){
- *esc = 0;
- c = SLIP_END;
- }
- break;
- }
- buf[(*pos)++] = c;
- return(0);
-}
-
-static inline int slip_esc(unsigned char *s, unsigned char *d, int len)
-{
- unsigned char *ptr = d;
- unsigned char c;
-
- /*
- * Send an initial END character to flush out any
- * data that may have accumulated in the receiver
- * due to line noise.
- */
-
- *ptr++ = SLIP_END;
-
- /*
- * For each byte in the packet, send the appropriate
- * character sequence, according to the SLIP protocol.
- */
-
- while (len-- > 0) {
- switch(c = *s++) {
- case SLIP_END:
- *ptr++ = SLIP_ESC;
- *ptr++ = SLIP_ESC_END;
- break;
- case SLIP_ESC:
- *ptr++ = SLIP_ESC;
- *ptr++ = SLIP_ESC_ESC;
- break;
- default:
- *ptr++ = c;
- break;
- }
- }
- *ptr++ = SLIP_END;
- return (ptr - d);
-}
-
-struct slip_proto {
- unsigned char ibuf[ENC_BUF_SIZE];
- unsigned char obuf[ENC_BUF_SIZE];
- int more; /* more data: do not read fd until ibuf has been drained */
- int pos;
- int esc;
-};
-
-static inline void slip_proto_init(struct slip_proto * slip)
-{
- memset(slip->ibuf, 0, sizeof(slip->ibuf));
- memset(slip->obuf, 0, sizeof(slip->obuf));
- slip->more = 0;
- slip->pos = 0;
- slip->esc = 0;
-}
-
-extern int slip_proto_read(int fd, void *buf, int len,
- struct slip_proto *slip);
-extern int slip_proto_write(int fd, void *buf, int len,
- struct slip_proto *slip);
-
-#endif
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
deleted file mode 100644
index c58ccdcc16d6..000000000000
--- a/arch/um/drivers/slip_kern.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "slip.h"
-
-struct slip_init {
- char *gate_addr;
-};
-
-static void slip_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *private;
- struct slip_data *spri;
- struct slip_init *init = data;
-
- private = netdev_priv(dev);
- spri = (struct slip_data *) private->user;
-
- memset(spri->name, 0, sizeof(spri->name));
- spri->addr = NULL;
- spri->gate_addr = init->gate_addr;
- spri->slave = -1;
- spri->dev = dev;
-
- slip_proto_init(&spri->slip);
-
- dev->hard_header_len = 0;
- dev->header_ops = NULL;
- dev->addr_len = 0;
- dev->type = ARPHRD_SLIP;
- dev->tx_queue_len = 256;
- dev->flags = IFF_NOARP;
- printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr);
-}
-
-static unsigned short slip_protocol(struct sk_buff *skbuff)
-{
- return htons(ETH_P_IP);
-}
-
-static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
- (struct slip_data *) &lp->user);
-}
-
-static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slip_user_write(fd, skb->data, skb->len,
- (struct slip_data *) &lp->user);
-}
-
-static const struct net_kern_info slip_kern_info = {
- .init = slip_init,
- .protocol = slip_protocol,
- .read = slip_read,
- .write = slip_write,
-};
-
-static int slip_setup(char *str, char **mac_out, void *data)
-{
- struct slip_init *init = data;
-
- *init = ((struct slip_init) { .gate_addr = NULL });
-
- if (str[0] != '\0')
- init->gate_addr = str;
- return 1;
-}
-
-static struct transport slip_transport = {
- .list = LIST_HEAD_INIT(slip_transport.list),
- .name = "slip",
- .setup = slip_setup,
- .user = &slip_user_info,
- .kern = &slip_kern_info,
- .private_size = sizeof(struct slip_data),
- .setup_size = sizeof(struct slip_init),
-};
-
-static int register_slip(void)
-{
- register_transport(&slip_transport);
- return 0;
-}
-
-late_initcall(register_slip);
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
deleted file mode 100644
index 7334019c9e60..000000000000
--- a/arch/um/drivers/slip_user.c
+++ /dev/null
@@ -1,252 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <string.h>
-#include <termios.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include "slip.h"
-#include <um_malloc.h>
-
-static int slip_user_init(void *data, void *dev)
-{
- struct slip_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-static int set_up_tty(int fd)
-{
- int i;
- struct termios tios;
-
- if (tcgetattr(fd, &tios) < 0) {
- printk(UM_KERN_ERR "could not get initial terminal "
- "attributes\n");
- return -1;
- }
-
- tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL;
- tios.c_iflag = IGNBRK | IGNPAR;
- tios.c_oflag = 0;
- tios.c_lflag = 0;
- for (i = 0; i < NCCS; i++)
- tios.c_cc[i] = 0;
- tios.c_cc[VMIN] = 1;
- tios.c_cc[VTIME] = 0;
-
- cfsetospeed(&tios, B38400);
- cfsetispeed(&tios, B38400);
-
- if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) {
- printk(UM_KERN_ERR "failed to set terminal attributes\n");
- return -1;
- }
- return 0;
-}
-
-struct slip_pre_exec_data {
- int stdin_fd;
- int stdout_fd;
- int close_me;
-};
-
-static void slip_pre_exec(void *arg)
-{
- struct slip_pre_exec_data *data = arg;
-
- if (data->stdin_fd >= 0)
- dup2(data->stdin_fd, 0);
- dup2(data->stdout_fd, 1);
- if (data->close_me >= 0)
- close(data->close_me);
-}
-
-static int slip_tramp(char **argv, int fd)
-{
- struct slip_pre_exec_data pe_data;
- char *output;
- int pid, fds[2], err, output_len;
-
- err = os_pipe(fds, 1, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n",
- -err);
- goto out;
- }
-
- err = 0;
- pe_data.stdin_fd = fd;
- pe_data.stdout_fd = fds[1];
- pe_data.close_me = fds[0];
- err = run_helper(slip_pre_exec, &pe_data, argv);
- if (err < 0)
- goto out_close;
- pid = err;
-
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- if (output == NULL) {
- printk(UM_KERN_ERR "slip_tramp : failed to allocate output "
- "buffer\n");
- os_kill_process(pid, 1);
- err = -ENOMEM;
- goto out_close;
- }
-
- close(fds[1]);
- read_output(fds[0], output, output_len);
- printk("%s", output);
-
- err = helper_wait(pid);
- close(fds[0]);
-
- kfree(output);
- return err;
-
-out_close:
- close(fds[0]);
- close(fds[1]);
-out:
- return err;
-}
-
-static int slip_open(void *data)
-{
- struct slip_data *pri = data;
- char version_buf[sizeof("nnnnn\0")];
- char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
- char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf,
- NULL };
- int sfd, mfd, err;
-
- err = get_pty();
- if (err < 0) {
- printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n",
- -err);
- goto out;
- }
- mfd = err;
-
- err = open(ptsname(mfd), O_RDWR, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "Couldn't open tty for slip line, "
- "err = %d\n", -err);
- goto out_close;
- }
- sfd = err;
-
- err = set_up_tty(sfd);
- if (err)
- goto out_close2;
-
- pri->slave = sfd;
- pri->slip.pos = 0;
- pri->slip.esc = 0;
- if (pri->gate_addr != NULL) {
- sprintf(version_buf, "%d", UML_NET_VERSION);
- strcpy(gate_buf, pri->gate_addr);
-
- err = slip_tramp(argv, sfd);
-
- if (err < 0) {
- printk(UM_KERN_ERR "slip_tramp failed - err = %d\n",
- -err);
- goto out_close2;
- }
- err = os_get_ifname(pri->slave, pri->name);
- if (err < 0) {
- printk(UM_KERN_ERR "get_ifname failed, err = %d\n",
- -err);
- goto out_close2;
- }
- iter_addresses(pri->dev, open_addr, pri->name);
- }
- else {
- err = os_set_slip(sfd);
- if (err < 0) {
- printk(UM_KERN_ERR "Failed to set slip discipline "
- "encapsulation - err = %d\n", -err);
- goto out_close2;
- }
- }
- return mfd;
-out_close2:
- close(sfd);
-out_close:
- close(mfd);
-out:
- return err;
-}
-
-static void slip_close(int fd, void *data)
-{
- struct slip_data *pri = data;
- char version_buf[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name,
- NULL };
- int err;
-
- if (pri->gate_addr != NULL)
- iter_addresses(pri->dev, close_addr, pri->name);
-
- sprintf(version_buf, "%d", UML_NET_VERSION);
-
- err = slip_tramp(argv, pri->slave);
-
- if (err != 0)
- printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err);
- close(fd);
- close(pri->slave);
- pri->slave = -1;
-}
-
-int slip_user_read(int fd, void *buf, int len, struct slip_data *pri)
-{
- return slip_proto_read(fd, buf, len, &pri->slip);
-}
-
-int slip_user_write(int fd, void *buf, int len, struct slip_data *pri)
-{
- return slip_proto_write(fd, buf, len, &pri->slip);
-}
-
-static void slip_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct slip_data *pri = data;
-
- if (pri->slave < 0)
- return;
- open_addr(addr, netmask, pri->name);
-}
-
-static void slip_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct slip_data *pri = data;
-
- if (pri->slave < 0)
- return;
- close_addr(addr, netmask, pri->name);
-}
-
-const struct net_user_info slip_user_info = {
- .init = slip_user_init,
- .open = slip_open,
- .close = slip_close,
- .remove = NULL,
- .add_address = slip_add_addr,
- .delete_address = slip_del_addr,
- .mtu = BUF_SIZE,
- .max_packet = BUF_SIZE,
-};
diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h
deleted file mode 100644
index 4aef2b88249a..000000000000
--- a/arch/um/drivers/slirp.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SLIRP_H
-#define __UM_SLIRP_H
-
-#include "slip_common.h"
-
-#define SLIRP_MAX_ARGS 100
-/*
- * XXX this next definition is here because I don't understand why this
- * initializer doesn't work in slirp_kern.c:
- *
- * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] },
- *
- * or why I can't typecast like this:
- *
- * argv : (char* [SLIRP_MAX_ARGS])(init->argv),
- */
-struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; };
-
-struct slirp_data {
- void *dev;
- struct arg_list_dummy_wrapper argw;
- int pid;
- int slave;
- struct slip_proto slip;
-};
-
-extern const struct net_user_info slirp_user_info;
-
-extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri);
-extern int slirp_user_write(int fd, void *buf, int len,
- struct slirp_data *pri);
-
-#endif
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
deleted file mode 100644
index 0a6151ee9572..000000000000
--- a/arch/um/drivers/slirp_kern.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <net_kern.h>
-#include <net_user.h>
-#include "slirp.h"
-
-struct slirp_init {
- struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */
-};
-
-static void slirp_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *private;
- struct slirp_data *spri;
- struct slirp_init *init = data;
- int i;
-
- private = netdev_priv(dev);
- spri = (struct slirp_data *) private->user;
-
- spri->argw = init->argw;
- spri->pid = -1;
- spri->slave = -1;
- spri->dev = dev;
-
- slip_proto_init(&spri->slip);
-
- dev->hard_header_len = 0;
- dev->header_ops = NULL;
- dev->addr_len = 0;
- dev->type = ARPHRD_SLIP;
- dev->tx_queue_len = 256;
- dev->flags = IFF_NOARP;
- printk("SLIRP backend - command line:");
- for (i = 0; spri->argw.argv[i] != NULL; i++)
- printk(" '%s'",spri->argw.argv[i]);
- printk("\n");
-}
-
-static unsigned short slirp_protocol(struct sk_buff *skbuff)
-{
- return htons(ETH_P_IP);
-}
-
-static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
- (struct slirp_data *) &lp->user);
-}
-
-static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slirp_user_write(fd, skb->data, skb->len,
- (struct slirp_data *) &lp->user);
-}
-
-const struct net_kern_info slirp_kern_info = {
- .init = slirp_init,
- .protocol = slirp_protocol,
- .read = slirp_read,
- .write = slirp_write,
-};
-
-static int slirp_setup(char *str, char **mac_out, void *data)
-{
- struct slirp_init *init = data;
- int i=0;
-
- *init = ((struct slirp_init) { .argw = { { "slirp", NULL } } });
-
- str = split_if_spec(str, mac_out, NULL);
-
- if (str == NULL) /* no command line given after MAC addr */
- return 1;
-
- do {
- if (i >= SLIRP_MAX_ARGS - 1) {
- printk(KERN_WARNING "slirp_setup: truncating slirp "
- "arguments\n");
- break;
- }
- init->argw.argv[i++] = str;
- while(*str && *str!=',') {
- if (*str == '_')
- *str=' ';
- str++;
- }
- if (*str != ',')
- break;
- *str++ = '\0';
- } while (1);
-
- init->argw.argv[i] = NULL;
- return 1;
-}
-
-static struct transport slirp_transport = {
- .list = LIST_HEAD_INIT(slirp_transport.list),
- .name = "slirp",
- .setup = slirp_setup,
- .user = &slirp_user_info,
- .kern = &slirp_kern_info,
- .private_size = sizeof(struct slirp_data),
- .setup_size = sizeof(struct slirp_init),
-};
-
-static int register_slirp(void)
-{
- register_transport(&slirp_transport);
- return 0;
-}
-
-late_initcall(register_slirp);
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
deleted file mode 100644
index 97228aa080cb..000000000000
--- a/arch/um/drivers/slirp_user.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include "slirp.h"
-
-static int slirp_user_init(void *data, void *dev)
-{
- struct slirp_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-struct slirp_pre_exec_data {
- int stdin_fd;
- int stdout_fd;
-};
-
-static void slirp_pre_exec(void *arg)
-{
- struct slirp_pre_exec_data *data = arg;
-
- if (data->stdin_fd != -1)
- dup2(data->stdin_fd, 0);
- if (data->stdout_fd != -1)
- dup2(data->stdout_fd, 1);
-}
-
-static int slirp_tramp(char **argv, int fd)
-{
- struct slirp_pre_exec_data pe_data;
- int pid;
-
- pe_data.stdin_fd = fd;
- pe_data.stdout_fd = fd;
- pid = run_helper(slirp_pre_exec, &pe_data, argv);
-
- return pid;
-}
-
-static int slirp_open(void *data)
-{
- struct slirp_data *pri = data;
- int fds[2], err;
-
- err = os_pipe(fds, 1, 1);
- if (err)
- return err;
-
- err = slirp_tramp(pri->argw.argv, fds[1]);
- if (err < 0) {
- printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err);
- goto out;
- }
-
- pri->slave = fds[1];
- pri->slip.pos = 0;
- pri->slip.esc = 0;
- pri->pid = err;
-
- return fds[0];
-out:
- close(fds[0]);
- close(fds[1]);
- return err;
-}
-
-static void slirp_close(int fd, void *data)
-{
- struct slirp_data *pri = data;
- int err;
-
- close(fd);
- close(pri->slave);
-
- pri->slave = -1;
-
- if (pri->pid<1) {
- printk(UM_KERN_ERR "slirp_close: no child process to shut "
- "down\n");
- return;
- }
-
-#if 0
- if (kill(pri->pid, SIGHUP)<0) {
- printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed "
- "(%d)\n", pri->pid, errno);
- }
-#endif
- err = helper_wait(pri->pid);
- if (err < 0)
- return;
-
- pri->pid = -1;
-}
-
-int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri)
-{
- return slip_proto_read(fd, buf, len, &pri->slip);
-}
-
-int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri)
-{
- return slip_proto_write(fd, buf, len, &pri->slip);
-}
-
-const struct net_user_info slirp_user_info = {
- .init = slirp_user_init,
- .open = slirp_open,
- .close = slirp_close,
- .remove = NULL,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = BUF_SIZE,
- .max_packet = BUF_SIZE,
-};
diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h
deleted file mode 100644
index fe39bee1e3bd..000000000000
--- a/arch/um/drivers/umcast.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DRIVERS_UMCAST_H
-#define __DRIVERS_UMCAST_H
-
-#include <net_user.h>
-
-struct umcast_data {
- char *addr;
- unsigned short lport;
- unsigned short rport;
- void *listen_addr;
- void *remote_addr;
- int ttl;
- int unicast;
- void *dev;
-};
-
-extern const struct net_user_info umcast_user_info;
-
-extern int umcast_user_write(int fd, void *buf, int len,
- struct umcast_data *pri);
-
-#endif
diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c
deleted file mode 100644
index 595a54f2b9c6..000000000000
--- a/arch/um/drivers/umcast_kern.c
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "umcast.h"
-#include <net_kern.h>
-
-struct umcast_init {
- char *addr;
- int lport;
- int rport;
- int ttl;
- bool unicast;
-};
-
-static void umcast_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct umcast_data *dpri;
- struct umcast_init *init = data;
-
- pri = netdev_priv(dev);
- dpri = (struct umcast_data *) pri->user;
- dpri->addr = init->addr;
- dpri->lport = init->lport;
- dpri->rport = init->rport;
- dpri->unicast = init->unicast;
- dpri->ttl = init->ttl;
- dpri->dev = dev;
-
- if (dpri->unicast) {
- printk(KERN_INFO "ucast backend address: %s:%u listen port: "
- "%u\n", dpri->addr, dpri->rport, dpri->lport);
- } else {
- printk(KERN_INFO "mcast backend multicast address: %s:%u, "
- "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl);
- }
-}
-
-static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return umcast_user_write(fd, skb->data, skb->len,
- (struct umcast_data *) &lp->user);
-}
-
-static const struct net_kern_info umcast_kern_info = {
- .init = umcast_init,
- .protocol = eth_protocol,
- .read = umcast_read,
- .write = umcast_write,
-};
-
-static int mcast_setup(char *str, char **mac_out, void *data)
-{
- struct umcast_init *init = data;
- char *port_str = NULL, *ttl_str = NULL, *remain;
- char *last;
-
- *init = ((struct umcast_init)
- { .addr = "239.192.168.1",
- .lport = 1102,
- .ttl = 1 });
-
- remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
- NULL);
- if (remain != NULL) {
- printk(KERN_ERR "mcast_setup - Extra garbage on "
- "specification : '%s'\n", remain);
- return 0;
- }
-
- if (port_str != NULL) {
- init->lport = simple_strtoul(port_str, &last, 10);
- if ((*last != '\0') || (last == port_str)) {
- printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
- port_str);
- return 0;
- }
- }
-
- if (ttl_str != NULL) {
- init->ttl = simple_strtoul(ttl_str, &last, 10);
- if ((*last != '\0') || (last == ttl_str)) {
- printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
- ttl_str);
- return 0;
- }
- }
-
- init->unicast = false;
- init->rport = init->lport;
-
- printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
- init->lport, init->ttl);
-
- return 1;
-}
-
-static int ucast_setup(char *str, char **mac_out, void *data)
-{
- struct umcast_init *init = data;
- char *lport_str = NULL, *rport_str = NULL, *remain;
- char *last;
-
- *init = ((struct umcast_init)
- { .addr = "",
- .lport = 1102,
- .rport = 1102 });
-
- remain = split_if_spec(str, mac_out, &init->addr,
- &lport_str, &rport_str, NULL);
- if (remain != NULL) {
- printk(KERN_ERR "ucast_setup - Extra garbage on "
- "specification : '%s'\n", remain);
- return 0;
- }
-
- if (lport_str != NULL) {
- init->lport = simple_strtoul(lport_str, &last, 10);
- if ((*last != '\0') || (last == lport_str)) {
- printk(KERN_ERR "ucast_setup - Bad listen port : "
- "'%s'\n", lport_str);
- return 0;
- }
- }
-
- if (rport_str != NULL) {
- init->rport = simple_strtoul(rport_str, &last, 10);
- if ((*last != '\0') || (last == rport_str)) {
- printk(KERN_ERR "ucast_setup - Bad remote port : "
- "'%s'\n", rport_str);
- return 0;
- }
- }
-
- init->unicast = true;
-
- printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n",
- init->lport, init->addr, init->rport);
-
- return 1;
-}
-
-static struct transport mcast_transport = {
- .list = LIST_HEAD_INIT(mcast_transport.list),
- .name = "mcast",
- .setup = mcast_setup,
- .user = &umcast_user_info,
- .kern = &umcast_kern_info,
- .private_size = sizeof(struct umcast_data),
- .setup_size = sizeof(struct umcast_init),
-};
-
-static struct transport ucast_transport = {
- .list = LIST_HEAD_INIT(ucast_transport.list),
- .name = "ucast",
- .setup = ucast_setup,
- .user = &umcast_user_info,
- .kern = &umcast_kern_info,
- .private_size = sizeof(struct umcast_data),
- .setup_size = sizeof(struct umcast_init),
-};
-
-static int register_umcast(void)
-{
- register_transport(&mcast_transport);
- register_transport(&ucast_transport);
- return 0;
-}
-
-late_initcall(register_umcast);
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
deleted file mode 100644
index b50b13cff04e..000000000000
--- a/arch/um/drivers/umcast_user.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- *
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include "umcast.h"
-#include <net_user.h>
-#include <um_malloc.h>
-
-static struct sockaddr_in *new_addr(char *addr, unsigned short port)
-{
- struct sockaddr_in *sin;
-
- sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
- if (sin == NULL) {
- printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
- "failed\n");
- return NULL;
- }
- sin->sin_family = AF_INET;
- if (addr)
- sin->sin_addr.s_addr = in_aton(addr);
- else
- sin->sin_addr.s_addr = INADDR_ANY;
- sin->sin_port = htons(port);
- return sin;
-}
-
-static int umcast_user_init(void *data, void *dev)
-{
- struct umcast_data *pri = data;
-
- pri->remote_addr = new_addr(pri->addr, pri->rport);
- if (pri->unicast)
- pri->listen_addr = new_addr(NULL, pri->lport);
- else
- pri->listen_addr = pri->remote_addr;
- pri->dev = dev;
- return 0;
-}
-
-static void umcast_remove(void *data)
-{
- struct umcast_data *pri = data;
-
- kfree(pri->listen_addr);
- if (pri->unicast)
- kfree(pri->remote_addr);
- pri->listen_addr = pri->remote_addr = NULL;
-}
-
-static int umcast_open(void *data)
-{
- struct umcast_data *pri = data;
- struct sockaddr_in *lsin = pri->listen_addr;
- struct sockaddr_in *rsin = pri->remote_addr;
- struct ip_mreq mreq;
- int fd, yes = 1, err = -EINVAL;
-
-
- if ((!pri->unicast && lsin->sin_addr.s_addr == 0) ||
- (rsin->sin_addr.s_addr == 0) ||
- (lsin->sin_port == 0) || (rsin->sin_port == 0))
- goto out;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
-
- if (fd < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open : data socket failed, "
- "errno = %d\n", errno);
- goto out;
- }
-
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, "
- "errno = %d\n", errno);
- goto out_close;
- }
-
- if (!pri->unicast) {
- /* set ttl according to config */
- if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
- sizeof(pri->ttl)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL "
- "failed, error = %d\n", errno);
- goto out_close;
- }
-
- /* set LOOP, so data does get fed back to local sockets */
- if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP,
- &yes, sizeof(yes)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP "
- "failed, error = %d\n", errno);
- goto out_close;
- }
- }
-
- /* bind socket to the address */
- if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open : data bind failed, "
- "errno = %d\n", errno);
- goto out_close;
- }
-
- if (!pri->unicast) {
- /* subscribe to the multicast group */
- mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
- mreq.imr_interface.s_addr = 0;
- if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
- &mreq, sizeof(mreq)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP "
- "failed, error = %d\n", errno);
- printk(UM_KERN_ERR "There appears not to be a "
- "multicast-capable network interface on the "
- "host.\n");
- printk(UM_KERN_ERR "eth0 should be configured in order "
- "to use the multicast transport.\n");
- goto out_close;
- }
- }
-
- return fd;
-
- out_close:
- close(fd);
- out:
- return err;
-}
-
-static void umcast_close(int fd, void *data)
-{
- struct umcast_data *pri = data;
-
- if (!pri->unicast) {
- struct ip_mreq mreq;
- struct sockaddr_in *lsin = pri->listen_addr;
-
- mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
- mreq.imr_interface.s_addr = 0;
- if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
- &mreq, sizeof(mreq)) < 0) {
- printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP "
- "failed, error = %d\n", errno);
- }
- }
-
- close(fd);
-}
-
-int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri)
-{
- struct sockaddr_in *data_addr = pri->remote_addr;
-
- return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info umcast_user_info = {
- .init = umcast_user_init,
- .open = umcast_open,
- .close = umcast_close,
- .remove = umcast_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h
deleted file mode 100644
index cab0379e6142..000000000000
--- a/arch/um/drivers/vde.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- */
-
-#ifndef __UM_VDE_H__
-#define __UM_VDE_H__
-
-struct vde_data {
- char *vde_switch;
- char *descr;
- void *args;
- void *conn;
- void *dev;
-};
-
-struct vde_init {
- char *vde_switch;
- char *descr;
- int port;
- char *group;
- int mode;
-};
-
-extern const struct net_user_info vde_user_info;
-
-extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init);
-
-extern int vde_user_read(void *conn, void *buf, int len);
-extern int vde_user_write(void *conn, void *buf, int len);
-
-#endif
diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c
deleted file mode 100644
index bc6f22cbfb35..000000000000
--- a/arch/um/drivers/vde_kern.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- *
- * Transport usage:
- * ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description>
- *
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include <net_user.h>
-#include "vde.h"
-
-static void vde_init(struct net_device *dev, void *data)
-{
- struct vde_init *init = data;
- struct uml_net_private *pri;
- struct vde_data *vpri;
-
- pri = netdev_priv(dev);
- vpri = (struct vde_data *) pri->user;
-
- vpri->vde_switch = init->vde_switch;
- vpri->descr = init->descr ? init->descr : "UML vde_transport";
- vpri->args = NULL;
- vpri->conn = NULL;
- vpri->dev = dev;
-
- printk("vde backend - %s, ", vpri->vde_switch ?
- vpri->vde_switch : "(default socket)");
-
- vde_init_libstuff(vpri, init);
-
- printk("\n");
-}
-
-static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- struct vde_data *pri = (struct vde_data *) &lp->user;
-
- if (pri->conn != NULL)
- return vde_user_read(pri->conn, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-
- printk(KERN_ERR "vde_read - we have no VDECONN to read from");
- return -EBADF;
-}
-
-static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- struct vde_data *pri = (struct vde_data *) &lp->user;
-
- if (pri->conn != NULL)
- return vde_user_write((void *)pri->conn, skb->data,
- skb->len);
-
- printk(KERN_ERR "vde_write - we have no VDECONN to write to");
- return -EBADF;
-}
-
-static const struct net_kern_info vde_kern_info = {
- .init = vde_init,
- .protocol = eth_protocol,
- .read = vde_read,
- .write = vde_write,
-};
-
-static int vde_setup(char *str, char **mac_out, void *data)
-{
- struct vde_init *init = data;
- char *remain, *port_str = NULL, *mode_str = NULL, *last;
-
- *init = ((struct vde_init)
- { .vde_switch = NULL,
- .descr = NULL,
- .port = 0,
- .group = NULL,
- .mode = 0 });
-
- remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str,
- &init->group, &mode_str, &init->descr, NULL);
-
- if (remain != NULL)
- printk(KERN_WARNING "vde_setup - Ignoring extra data :"
- "'%s'\n", remain);
-
- if (port_str != NULL) {
- init->port = simple_strtoul(port_str, &last, 10);
- if ((*last != '\0') || (last == port_str)) {
- printk(KERN_ERR "vde_setup - Bad port : '%s'\n",
- port_str);
- return 0;
- }
- }
-
- if (mode_str != NULL) {
- init->mode = simple_strtoul(mode_str, &last, 8);
- if ((*last != '\0') || (last == mode_str)) {
- printk(KERN_ERR "vde_setup - Bad mode : '%s'\n",
- mode_str);
- return 0;
- }
- }
-
- printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ?
- init->vde_switch : "(default socket)");
-
- return 1;
-}
-
-static struct transport vde_transport = {
- .list = LIST_HEAD_INIT(vde_transport.list),
- .name = "vde",
- .setup = vde_setup,
- .user = &vde_user_info,
- .kern = &vde_kern_info,
- .private_size = sizeof(struct vde_data),
- .setup_size = sizeof(struct vde_init),
-};
-
-static int register_vde(void)
-{
- register_transport(&vde_transport);
- return 0;
-}
-
-late_initcall(register_vde);
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
deleted file mode 100644
index bc7dc4e1e486..000000000000
--- a/arch/um/drivers/vde_user.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- */
-
-#include <stddef.h>
-#include <errno.h>
-#include <libvdeplug.h>
-#include <net_user.h>
-#include <um_malloc.h>
-#include "vde.h"
-
-static int vde_user_init(void *data, void *dev)
-{
- struct vde_data *pri = data;
- VDECONN *conn = NULL;
- int err = -EINVAL;
-
- pri->dev = dev;
-
- conn = vde_open(pri->vde_switch, pri->descr, pri->args);
-
- if (conn == NULL) {
- err = -errno;
- printk(UM_KERN_ERR "vde_user_init: vde_open failed, "
- "errno = %d\n", errno);
- return err;
- }
-
- printk(UM_KERN_INFO "vde backend - connection opened\n");
-
- pri->conn = conn;
-
- return 0;
-}
-
-static int vde_user_open(void *data)
-{
- struct vde_data *pri = data;
-
- if (pri->conn != NULL)
- return vde_datafd(pri->conn);
-
- printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open");
- return -EINVAL;
-}
-
-static void vde_remove(void *data)
-{
- struct vde_data *pri = data;
-
- if (pri->conn != NULL) {
- printk(UM_KERN_INFO "vde backend - closing connection\n");
- vde_close(pri->conn);
- pri->conn = NULL;
- kfree(pri->args);
- pri->args = NULL;
- return;
- }
-
- printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove");
-}
-
-const struct net_user_info vde_user_info = {
- .init = vde_user_init,
- .open = vde_user_open,
- .close = NULL,
- .remove = vde_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
-
-void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init)
-{
- struct vde_open_args *args;
-
- vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL);
- if (vpri->args == NULL) {
- printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args "
- "allocation failed");
- return;
- }
-
- args = vpri->args;
-
- args->port = init->port;
- args->group = init->group;
- args->mode = init->mode ? init->mode : 0700;
-
- args->port ? printk("port %d", args->port) :
- printk("undefined port");
-}
-
-int vde_user_read(void *conn, void *buf, int len)
-{
- VDECONN *vconn = conn;
- int rv;
-
- if (vconn == NULL)
- return 0;
-
- rv = vde_recv(vconn, buf, len, 0);
- if (rv < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (rv == 0)
- return -ENOTCONN;
-
- return rv;
-}
-
-int vde_user_write(void *conn, void *buf, int len)
-{
- VDECONN *vconn = conn;
-
- if (vconn == NULL)
- return 0;
-
- return vde_send(vconn, buf, len, 0);
-}
-
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index b97bb52dd562..5226d2c52e6a 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -8,6 +8,8 @@
* Copyright (C) 2001 by various other people who didn't put their name here.
*/
+#define pr_fmt(fmt) "uml-vector: " fmt
+
#include <linux/memblock.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
@@ -27,7 +29,6 @@
#include <init.h>
#include <irq_kern.h>
#include <irq_user.h>
-#include <net_kern.h>
#include <os.h>
#include "mconsole_kern.h"
#include "vector_user.h"
@@ -1539,7 +1540,41 @@ static void vector_timer_expire(struct timer_list *t)
napi_schedule(&vp->napi);
}
+static void vector_setup_etheraddr(struct net_device *dev, char *str)
+{
+ u8 addr[ETH_ALEN];
+
+ if (str == NULL)
+ goto random;
+
+ if (!mac_pton(str, addr)) {
+ netdev_err(dev,
+ "Failed to parse '%s' as an ethernet address\n", str);
+ goto random;
+ }
+ if (is_multicast_ether_addr(addr)) {
+ netdev_err(dev,
+ "Attempt to assign a multicast ethernet address to a device disallowed\n");
+ goto random;
+ }
+ if (!is_valid_ether_addr(addr)) {
+ netdev_err(dev,
+ "Attempt to assign an invalid ethernet address to a device disallowed\n");
+ goto random;
+ }
+ if (!is_local_ether_addr(addr)) {
+ netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n");
+ netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n");
+ netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
+ addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]);
+ }
+ eth_hw_addr_set(dev, addr);
+ return;
+random:
+ netdev_info(dev, "Choosing a random ethernet address\n");
+ eth_hw_addr_random(dev);
+}
static void vector_eth_configure(
int n,
@@ -1553,14 +1588,12 @@ static void vector_eth_configure(
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (device == NULL) {
- printk(KERN_ERR "eth_configure failed to allocate struct "
- "vector_device\n");
+ pr_err("Failed to allocate struct vector_device for vec%d\n", n);
return;
}
dev = alloc_etherdev(sizeof(struct vector_private));
if (dev == NULL) {
- printk(KERN_ERR "eth_configure: failed to allocate struct "
- "net_device for vec%d\n", n);
+ pr_err("Failed to allocate struct net_device for vec%d\n", n);
goto out_free_device;
}
@@ -1574,7 +1607,7 @@ static void vector_eth_configure(
* and fail.
*/
snprintf(dev->name, sizeof(dev->name), "vec%d", n);
- uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+ vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
vp = netdev_priv(dev);
/* sysfs register */
@@ -1690,8 +1723,7 @@ static int __init vector_setup(char *str)
err = vector_parse(str, &n, &str, &error);
if (err) {
- printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
- str, error);
+ pr_err("Couldn't parse '%s': %s\n", str, error);
return 1;
}
new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c
new file mode 100644
index 000000000000..b51fc9888ae1
--- /dev/null
+++ b/arch/um/drivers/vfio_kern.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+
+#define pr_fmt(fmt) "vfio-uml: " fmt
+
+#include <linux/module.h>
+#include <linux/logic_iomem.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+#include <irq_kern.h>
+#include <init.h>
+#include <os.h>
+
+#include "virt-pci.h"
+#include "vfio_user.h"
+
+#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev)
+
+struct uml_vfio_intr_ctx {
+ struct uml_vfio_device *dev;
+ int irq;
+};
+
+struct uml_vfio_device {
+ const char *name;
+ int group;
+
+ struct um_pci_device pdev;
+ struct uml_vfio_user_device udev;
+ struct uml_vfio_intr_ctx *intr_ctx;
+
+ int msix_cap;
+ int msix_bar;
+ int msix_offset;
+ int msix_size;
+ u32 *msix_data;
+
+ struct list_head list;
+};
+
+struct uml_vfio_group {
+ int id;
+ int fd;
+ int users;
+ struct list_head list;
+};
+
+static struct {
+ int fd;
+ int users;
+} uml_vfio_container = { .fd = -1 };
+static DEFINE_MUTEX(uml_vfio_container_mtx);
+
+static LIST_HEAD(uml_vfio_groups);
+static DEFINE_MUTEX(uml_vfio_groups_mtx);
+
+static LIST_HEAD(uml_vfio_devices);
+
+static int uml_vfio_set_container(int group_fd)
+{
+ int err;
+
+ guard(mutex)(&uml_vfio_container_mtx);
+
+ err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd);
+ if (err)
+ return err;
+
+ uml_vfio_container.users++;
+ if (uml_vfio_container.users > 1)
+ return 0;
+
+ err = uml_vfio_user_setup_iommu(uml_vfio_container.fd);
+ if (err) {
+ uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
+ uml_vfio_container.users--;
+ }
+ return err;
+}
+
+static void uml_vfio_unset_container(int group_fd)
+{
+ guard(mutex)(&uml_vfio_container_mtx);
+
+ uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
+ uml_vfio_container.users--;
+}
+
+static int uml_vfio_open_group(int group_id)
+{
+ struct uml_vfio_group *group;
+ int err;
+
+ guard(mutex)(&uml_vfio_groups_mtx);
+
+ list_for_each_entry(group, &uml_vfio_groups, list) {
+ if (group->id == group_id) {
+ group->users++;
+ return group->fd;
+ }
+ }
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return -ENOMEM;
+
+ group->fd = uml_vfio_user_open_group(group_id);
+ if (group->fd < 0) {
+ err = group->fd;
+ goto free_group;
+ }
+
+ err = uml_vfio_set_container(group->fd);
+ if (err)
+ goto close_group;
+
+ group->id = group_id;
+ group->users = 1;
+
+ list_add(&group->list, &uml_vfio_groups);
+
+ return group->fd;
+
+close_group:
+ os_close_file(group->fd);
+free_group:
+ kfree(group);
+ return err;
+}
+
+static int uml_vfio_release_group(int group_fd)
+{
+ struct uml_vfio_group *group;
+
+ guard(mutex)(&uml_vfio_groups_mtx);
+
+ list_for_each_entry(group, &uml_vfio_groups, list) {
+ if (group->fd == group_fd) {
+ group->users--;
+ if (group->users == 0) {
+ uml_vfio_unset_container(group_fd);
+ os_close_file(group_fd);
+ list_del(&group->list);
+ kfree(group);
+ }
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static irqreturn_t uml_vfio_interrupt(int unused, void *opaque)
+{
+ struct uml_vfio_intr_ctx *ctx = opaque;
+ struct uml_vfio_device *dev = ctx->dev;
+ int index = ctx - dev->intr_ctx;
+ int irqfd = dev->udev.irqfd[index];
+ int irq = dev->msix_data[index];
+ uint64_t v;
+ int r;
+
+ do {
+ r = os_read_file(irqfd, &v, sizeof(v));
+ if (r == sizeof(v))
+ generic_handle_irq(irq);
+ } while (r == sizeof(v) || r == -EINTR);
+ WARN(r != -EAGAIN, "read returned %d\n", r);
+
+ return IRQ_HANDLED;
+}
+
+static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index)
+{
+ struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
+ int err, irqfd;
+
+ if (ctx->irq >= 0)
+ return 0;
+
+ irqfd = uml_vfio_user_activate_irq(&dev->udev, index);
+ if (irqfd < 0)
+ return irqfd;
+
+ ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ,
+ uml_vfio_interrupt, 0,
+ "vfio-uml", ctx);
+ if (ctx->irq < 0) {
+ err = ctx->irq;
+ goto deactivate;
+ }
+
+ err = add_sigio_fd(irqfd);
+ if (err)
+ goto free_irq;
+
+ return 0;
+
+free_irq:
+ um_free_irq(ctx->irq, ctx);
+ ctx->irq = -1;
+deactivate:
+ uml_vfio_user_deactivate_irq(&dev->udev, index);
+ return err;
+}
+
+static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index)
+{
+ struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
+
+ if (ctx->irq >= 0) {
+ ignore_sigio_fd(dev->udev.irqfd[index]);
+ um_free_irq(ctx->irq, ctx);
+ uml_vfio_user_deactivate_irq(&dev->udev, index);
+ ctx->irq = -1;
+ }
+ return 0;
+}
+
+static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ /*
+ * Here, we handle only the operations we care about,
+ * ignoring the rest.
+ */
+ if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) {
+ switch (val & ~PCI_MSIX_FLAGS_QSIZE) {
+ case PCI_MSIX_FLAGS_ENABLE:
+ case 0:
+ return uml_vfio_user_update_irqs(&dev->udev);
+ }
+ }
+ return 0;
+}
+
+static int uml_vfio_update_msix_table(struct uml_vfio_device *dev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ int index;
+
+ /*
+ * Here, we handle only the operations we care about,
+ * ignoring the rest.
+ */
+ offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA;
+
+ if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0)
+ return 0;
+
+ index = offset / PCI_MSIX_ENTRY_SIZE;
+ if (index >= dev->udev.irq_count)
+ return -EINVAL;
+
+ dev->msix_data[index] = val;
+
+ return val ? uml_vfio_activate_irq(dev, index) :
+ uml_vfio_deactivate_irq(dev, index);
+}
+
+static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev,
+ unsigned int offset, int size)
+{
+ u8 data[8];
+
+ memset(data, 0xff, sizeof(data));
+
+ if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size))
+ return ULONG_MAX;
+
+ switch (size) {
+ case 1:
+ return data[0];
+ case 2:
+ return le16_to_cpup((void *)data);
+ case 4:
+ return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+ case 8:
+ return le64_to_cpup((void *)data);
+#endif
+ default:
+ return ULONG_MAX;
+ }
+}
+
+static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev,
+ unsigned int offset, int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ return __uml_vfio_cfgspace_read(dev, offset, size);
+}
+
+static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ u8 data[8];
+
+ switch (size) {
+ case 1:
+ data[0] = (u8)val;
+ break;
+ case 2:
+ put_unaligned_le16(val, (void *)data);
+ break;
+ case 4:
+ put_unaligned_le32(val, (void *)data);
+ break;
+#ifdef CONFIG_64BIT
+ case 8:
+ put_unaligned_le64(val, (void *)data);
+ break;
+#endif
+ }
+
+ WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size));
+}
+
+static void uml_vfio_cfgspace_write(struct um_pci_device *pdev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF &&
+ offset + size > dev->msix_cap)
+ WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val));
+
+ __uml_vfio_cfgspace_write(dev, offset, size, val);
+}
+
+static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar,
+ void *buffer, unsigned int offset, int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ memset(buffer, 0xff, size);
+ uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size);
+}
+
+static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar,
+ unsigned int offset, int size)
+{
+ u8 data[8];
+
+ uml_vfio_bar_copy_from(pdev, bar, data, offset, size);
+
+ switch (size) {
+ case 1:
+ return data[0];
+ case 2:
+ return le16_to_cpup((void *)data);
+ case 4:
+ return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+ case 8:
+ return le64_to_cpup((void *)data);
+#endif
+ default:
+ return ULONG_MAX;
+ }
+}
+
+static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar,
+ unsigned int offset, const void *buffer,
+ int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size);
+}
+
+static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+ u8 data[8];
+
+ if (bar == dev->msix_bar && offset + size > dev->msix_offset &&
+ offset < dev->msix_offset + dev->msix_size)
+ WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val));
+
+ switch (size) {
+ case 1:
+ data[0] = (u8)val;
+ break;
+ case 2:
+ put_unaligned_le16(val, (void *)data);
+ break;
+ case 4:
+ put_unaligned_le32(val, (void *)data);
+ break;
+#ifdef CONFIG_64BIT
+ case 8:
+ put_unaligned_le64(val, (void *)data);
+ break;
+#endif
+ }
+
+ uml_vfio_bar_copy_to(pdev, bar, offset, data, size);
+}
+
+static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar,
+ unsigned int offset, u8 value, int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+ int i;
+
+ for (i = 0; i < size; i++)
+ uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1);
+}
+
+static const struct um_pci_ops uml_vfio_um_pci_ops = {
+ .cfgspace_read = uml_vfio_cfgspace_read,
+ .cfgspace_write = uml_vfio_cfgspace_write,
+ .bar_read = uml_vfio_bar_read,
+ .bar_write = uml_vfio_bar_write,
+ .bar_copy_from = uml_vfio_bar_copy_from,
+ .bar_copy_to = uml_vfio_bar_copy_to,
+ .bar_set = uml_vfio_bar_set,
+};
+
+static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap)
+{
+ u8 id, pos;
+ u16 ent;
+ int ttl = 48; /* PCI_FIND_CAP_TTL */
+
+ pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos));
+
+ while (pos && ttl--) {
+ ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent));
+
+ id = ent & 0xff;
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+
+ pos = ent >> 8;
+ }
+
+ return 0;
+}
+
+static int uml_vfio_read_msix_table(struct uml_vfio_device *dev)
+{
+ unsigned int off;
+ u16 flags;
+ u32 tbl;
+
+ off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX);
+ if (!off)
+ return -ENOTSUPP;
+
+ dev->msix_cap = off;
+
+ tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl));
+ flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags));
+
+ dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR;
+ dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET;
+ dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE;
+
+ dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL);
+ if (!dev->msix_data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void uml_vfio_open_device(struct uml_vfio_device *dev)
+{
+ struct uml_vfio_intr_ctx *ctx;
+ int err, group_id, i;
+
+ group_id = uml_vfio_user_get_group_id(dev->name);
+ if (group_id < 0) {
+ pr_err("Failed to get group id (%s), error %d\n",
+ dev->name, group_id);
+ goto free_dev;
+ }
+
+ dev->group = uml_vfio_open_group(group_id);
+ if (dev->group < 0) {
+ pr_err("Failed to open group %d (%s), error %d\n",
+ group_id, dev->name, dev->group);
+ goto free_dev;
+ }
+
+ err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name);
+ if (err) {
+ pr_err("Failed to setup device (%s), error %d\n",
+ dev->name, err);
+ goto release_group;
+ }
+
+ err = uml_vfio_read_msix_table(dev);
+ if (err) {
+ pr_err("Failed to read MSI-X table (%s), error %d\n",
+ dev->name, err);
+ goto teardown_udev;
+ }
+
+ dev->intr_ctx = kmalloc_array(dev->udev.irq_count,
+ sizeof(struct uml_vfio_intr_ctx),
+ GFP_KERNEL);
+ if (!dev->intr_ctx) {
+ pr_err("Failed to allocate interrupt context (%s)\n",
+ dev->name);
+ goto free_msix;
+ }
+
+ for (i = 0; i < dev->udev.irq_count; i++) {
+ ctx = &dev->intr_ctx[i];
+ ctx->dev = dev;
+ ctx->irq = -1;
+ }
+
+ dev->pdev.ops = &uml_vfio_um_pci_ops;
+
+ err = um_pci_device_register(&dev->pdev);
+ if (err) {
+ pr_err("Failed to register UM PCI device (%s), error %d\n",
+ dev->name, err);
+ goto free_intr_ctx;
+ }
+
+ return;
+
+free_intr_ctx:
+ kfree(dev->intr_ctx);
+free_msix:
+ kfree(dev->msix_data);
+teardown_udev:
+ uml_vfio_user_teardown_device(&dev->udev);
+release_group:
+ uml_vfio_release_group(dev->group);
+free_dev:
+ list_del(&dev->list);
+ kfree(dev->name);
+ kfree(dev);
+}
+
+static void uml_vfio_release_device(struct uml_vfio_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->udev.irq_count; i++)
+ uml_vfio_deactivate_irq(dev, i);
+ uml_vfio_user_update_irqs(&dev->udev);
+
+ um_pci_device_unregister(&dev->pdev);
+ kfree(dev->intr_ctx);
+ kfree(dev->msix_data);
+ uml_vfio_user_teardown_device(&dev->udev);
+ uml_vfio_release_group(dev->group);
+ list_del(&dev->list);
+ kfree(dev->name);
+ kfree(dev);
+}
+
+static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp)
+{
+ struct uml_vfio_device *dev;
+ int fd;
+
+ if (uml_vfio_container.fd < 0) {
+ fd = uml_vfio_user_open_container();
+ if (fd < 0)
+ return fd;
+ uml_vfio_container.fd = fd;
+ }
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->name = kstrdup(device, GFP_KERNEL);
+ if (!dev->name) {
+ kfree(dev);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&dev->list, &uml_vfio_devices);
+ return 0;
+}
+
+static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+ return 0;
+}
+
+static const struct kernel_param_ops uml_vfio_cmdline_param_ops = {
+ .set = uml_vfio_cmdline_set,
+ .get = uml_vfio_cmdline_get,
+};
+
+device_param_cb(device, &uml_vfio_cmdline_param_ops, NULL, 0400);
+__uml_help(uml_vfio_cmdline_param_ops,
+"vfio_uml.device=<domain:bus:slot.function>\n"
+" Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n"
+" capable devices are supported, and it is assumed that drivers will\n"
+" use MSI-X. This parameter can be specified multiple times to pass\n"
+" through multiple PCI devices to UML.\n\n"
+);
+
+static int __init uml_vfio_init(void)
+{
+ struct uml_vfio_device *dev, *n;
+
+ sigio_broken();
+
+ /* If the opening fails, the device will be released. */
+ list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
+ uml_vfio_open_device(dev);
+
+ return 0;
+}
+late_initcall(uml_vfio_init);
+
+static void __exit uml_vfio_exit(void)
+{
+ struct uml_vfio_device *dev, *n;
+
+ list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
+ uml_vfio_release_device(dev);
+
+ if (uml_vfio_container.fd >= 0)
+ os_close_file(uml_vfio_container.fd);
+}
+module_exit(uml_vfio_exit);
diff --git a/arch/um/drivers/vfio_user.c b/arch/um/drivers/vfio_user.c
new file mode 100644
index 000000000000..6a45d8e14582
--- /dev/null
+++ b/arch/um/drivers/vfio_user.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include <linux/limits.h>
+#include <linux/vfio.h>
+#include <linux/pci_regs.h>
+#include <as-layout.h>
+#include <um_malloc.h>
+
+#include "vfio_user.h"
+
+int uml_vfio_user_open_container(void)
+{
+ int r, fd;
+
+ fd = open("/dev/vfio/vfio", O_RDWR);
+ if (fd < 0)
+ return -errno;
+
+ r = ioctl(fd, VFIO_GET_API_VERSION);
+ if (r != VFIO_API_VERSION) {
+ r = r < 0 ? -errno : -EINVAL;
+ goto error;
+ }
+
+ r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
+ if (r <= 0) {
+ r = r < 0 ? -errno : -EINVAL;
+ goto error;
+ }
+
+ return fd;
+
+error:
+ close(fd);
+ return r;
+}
+
+int uml_vfio_user_setup_iommu(int container)
+{
+ /*
+ * This is a bit tricky. See the big comment in
+ * vhost_user_set_mem_table() in virtio_uml.c.
+ */
+ unsigned long reserved = uml_reserved - uml_physmem;
+ struct vfio_iommu_type1_dma_map dma_map = {
+ .argsz = sizeof(dma_map),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = uml_reserved,
+ .iova = reserved,
+ .size = physmem_size - reserved,
+ };
+
+ if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
+ return -errno;
+
+ if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int uml_vfio_user_get_group_id(const char *device)
+{
+ char *path, *buf, *end;
+ const char *name;
+ int r;
+
+ path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device);
+
+ buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
+ if (!buf) {
+ r = -ENOMEM;
+ goto free_path;
+ }
+
+ r = readlink(path, buf, PATH_MAX);
+ if (r < 0) {
+ r = -errno;
+ goto free_buf;
+ }
+ buf[r] = '\0';
+
+ name = basename(buf);
+
+ r = strtoul(name, &end, 10);
+ if (*end != '\0' || end == name) {
+ r = -EINVAL;
+ goto free_buf;
+ }
+
+free_buf:
+ kfree(buf);
+free_path:
+ kfree(path);
+ return r;
+}
+
+int uml_vfio_user_open_group(int group_id)
+{
+ char *path;
+ int fd;
+
+ path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ sprintf(path, "/dev/vfio/%d", group_id);
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ fd = -errno;
+ goto out;
+ }
+
+out:
+ kfree(path);
+ return fd;
+}
+
+int uml_vfio_user_set_container(int container, int group)
+{
+ if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
+ return -errno;
+ return 0;
+}
+
+int uml_vfio_user_unset_container(int container, int group)
+{
+ if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
+ return -errno;
+ return 0;
+}
+
+static int vfio_set_irqs(int device, int start, int count, int *irqfd)
+{
+ struct vfio_irq_set *irq_set;
+ int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
+ int err = 0;
+
+ irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
+ if (!irq_set)
+ return -ENOMEM;
+
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = start;
+ irq_set->count = count;
+ memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
+
+ if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
+ err = -errno;
+ goto out;
+ }
+
+out:
+ kfree(irq_set);
+ return err;
+}
+
+int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
+ int group, const char *device)
+{
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+ int err, i;
+
+ dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
+ if (dev->device < 0)
+ return -errno;
+
+ if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
+ err = -errno;
+ goto close_device;
+ }
+
+ dev->num_regions = device_info.num_regions;
+ if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
+ dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
+
+ dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
+ UM_GFP_KERNEL);
+ if (!dev->region) {
+ err = -ENOMEM;
+ goto close_device;
+ }
+
+ for (i = 0; i < dev->num_regions; i++) {
+ struct vfio_region_info region = {
+ .argsz = sizeof(region),
+ .index = i,
+ };
+ if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, &region) < 0) {
+ err = -errno;
+ goto free_region;
+ }
+ dev->region[i].size = region.size;
+ dev->region[i].offset = region.offset;
+ }
+
+ /* Only MSI-X is supported currently. */
+ irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
+ if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
+ err = -errno;
+ goto free_region;
+ }
+
+ dev->irq_count = irq_info.count;
+
+ dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
+ if (!dev->irqfd) {
+ err = -ENOMEM;
+ goto free_region;
+ }
+
+ memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
+
+ err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
+ if (err)
+ goto free_irqfd;
+
+ return 0;
+
+free_irqfd:
+ kfree(dev->irqfd);
+free_region:
+ kfree(dev->region);
+close_device:
+ close(dev->device);
+ return err;
+}
+
+void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
+{
+ kfree(dev->irqfd);
+ kfree(dev->region);
+ close(dev->device);
+}
+
+int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
+{
+ int irqfd;
+
+ irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (irqfd < 0)
+ return -errno;
+
+ dev->irqfd[index] = irqfd;
+ return irqfd;
+}
+
+void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
+{
+ close(dev->irqfd[index]);
+ dev->irqfd[index] = -1;
+}
+
+int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
+{
+ return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
+}
+
+static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
+ uint64_t offset, void *buf, uint64_t size)
+{
+ if (index >= dev->num_regions || offset + size > dev->region[index].size)
+ return -EINVAL;
+
+ if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
+ uint64_t offset, const void *buf, uint64_t size)
+{
+ if (index >= dev->num_regions || offset + size > dev->region[index].size)
+ return -EINVAL;
+
+ if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
+ unsigned int offset, void *buf, int size)
+{
+ return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ offset, buf, size);
+}
+
+int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
+ unsigned int offset, const void *buf, int size)
+{
+ return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ offset, buf, size);
+}
+
+int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, void *buf, int size)
+{
+ return vfio_region_read(dev, bar, offset, buf, size);
+}
+
+int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, const void *buf, int size)
+{
+ return vfio_region_write(dev, bar, offset, buf, size);
+}
diff --git a/arch/um/drivers/vfio_user.h b/arch/um/drivers/vfio_user.h
new file mode 100644
index 000000000000..75535e05059b
--- /dev/null
+++ b/arch/um/drivers/vfio_user.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_VFIO_USER_H
+#define __UM_VFIO_USER_H
+
+struct uml_vfio_user_device {
+ int device;
+
+ struct {
+ uint64_t size;
+ uint64_t offset;
+ } *region;
+ int num_regions;
+
+ int32_t *irqfd;
+ int irq_count;
+};
+
+int uml_vfio_user_open_container(void);
+int uml_vfio_user_setup_iommu(int container);
+
+int uml_vfio_user_get_group_id(const char *device);
+int uml_vfio_user_open_group(int group_id);
+int uml_vfio_user_set_container(int container, int group);
+int uml_vfio_user_unset_container(int container, int group);
+
+int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
+ int group, const char *device);
+void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev);
+
+int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index);
+void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index);
+int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev);
+
+int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
+ unsigned int offset, void *buf, int size);
+int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
+ unsigned int offset, const void *buf, int size);
+
+int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, void *buf, int size);
+int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, const void *buf, int size);
+
+#endif /* __UM_VFIO_USER_H */
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index b83b5a765d4e..0fe207ca4b72 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -538,11 +538,6 @@ void um_pci_platform_device_unregister(struct um_pci_device *dev)
static int __init um_pci_init(void)
{
- struct irq_domain_info inner_domain_info = {
- .size = MAX_MSI_VECTORS,
- .hwirq_max = MAX_MSI_VECTORS,
- .ops = &um_pci_inner_domain_ops,
- };
int err, i;
WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
@@ -564,10 +559,10 @@ static int __init um_pci_init(void)
goto free;
}
- inner_domain_info.fwnode = um_pci_fwnode;
- um_pci_inner_domain = irq_domain_instantiate(&inner_domain_info);
- if (IS_ERR(um_pci_inner_domain)) {
- err = PTR_ERR(um_pci_inner_domain);
+ um_pci_inner_domain = irq_domain_create_linear(um_pci_fwnode, MAX_MSI_VECTORS,
+ &um_pci_inner_domain_ops, NULL);
+ if (!um_pci_inner_domain) {
+ err = -ENOMEM;
goto free;
}
@@ -602,7 +597,7 @@ static int __init um_pci_init(void)
return 0;
free:
- if (!IS_ERR_OR_NULL(um_pci_inner_domain))
+ if (um_pci_inner_domain)
irq_domain_remove(um_pci_inner_domain);
if (um_pci_fwnode)
irq_domain_free_fwnode(um_pci_fwnode);
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index e4316c7981e8..d05918e422f9 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -81,7 +81,7 @@ __uml_setup("xterm=", xterm_setup,
" '<switch> command arg1 arg2 ...'.\n"
" The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR
",-T,-e'.\n"
-" Values for gnome-terminal are 'xterm=gnome-terminal,-t,-x'.\n\n"
+" Values for gnome-terminal are 'xterm=gnome-terminal,-t,--'.\n\n"
);
static int xterm_open(int input, int output, int primary, void *d,
@@ -97,12 +97,9 @@ static int xterm_open(int input, int output, int primary, void *d,
if (access(argv[4], X_OK) < 0)
argv[4] = "port-helper";
- /*
- * Check that DISPLAY is set, this doesn't guarantee the xterm
- * will work but w/o it we can be pretty sure it won't.
- */
- if (getenv("DISPLAY") == NULL) {
- printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n");
+ /* Ensure we are running on Xorg or Wayland. */
+ if (!getenv("DISPLAY") && !getenv("WAYLAND_DISPLAY")) {
+ printk(UM_KERN_ERR "xterm_open : neither $DISPLAY nor $WAYLAND_DISPLAY is set.\n");
return -ENODEV;
}
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
index 5898a26daa0d..408b31d59127 100644
--- a/arch/um/include/asm/asm-prototypes.h
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -1 +1,6 @@
#include <asm-generic/asm-prototypes.h>
+#include <asm/checksum.h>
+
+#ifdef CONFIG_UML_X86
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index 749dfe8512e8..36dbedd1af48 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -13,17 +13,18 @@
#define TELNETD_IRQ 8
#define XTERM_IRQ 9
#define RANDOM_IRQ 10
+#define SIGCHLD_IRQ 11
#ifdef CONFIG_UML_NET_VECTOR
-#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1)
+#define VECTOR_BASE_IRQ (SIGCHLD_IRQ + 1)
#define VECTOR_IRQ_SPACE 8
#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
#else
-#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
+#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1)
#endif
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index a3eaca41ff61..4d0e4239f3cc 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -6,11 +6,14 @@
#ifndef __ARCH_UM_MMU_H
#define __ARCH_UM_MMU_H
+#include "linux/types.h"
#include <mm_id.h>
typedef struct mm_context {
struct mm_id id;
+ struct list_head list;
+
/* Address range in need of a TLB sync */
unsigned long sync_tlb_range_from;
unsigned long sync_tlb_range_to;
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 73f3a4792ed8..8ca66a1918c3 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -14,3 +14,7 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
+
+DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
+
+DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 88835b52ae2b..746abc24a5d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -17,6 +17,8 @@ enum um_irq_type {
struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si,
struct uml_pt_regs *regs, void *mc);
+extern void sigchld_handler(int sig, struct siginfo *unused_si,
+ struct uml_pt_regs *regs, void *mc);
void sigio_run_timetravel_handlers(void);
extern void free_irq_by_fd(int fd);
extern void deactivate_fd(int fd, int irqnum);
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
deleted file mode 100644
index 67b2e9a1f2e5..000000000000
--- a/arch/um/include/shared/net_kern.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_NET_KERN_H
-#define __UM_NET_KERN_H
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/list.h>
-#include <linux/workqueue.h>
-
-struct uml_net {
- struct list_head list;
- struct net_device *dev;
- struct platform_device pdev;
- int index;
-};
-
-struct uml_net_private {
- struct list_head list;
- spinlock_t lock;
- struct net_device *dev;
- struct timer_list tl;
-
- struct work_struct work;
- int fd;
- unsigned char mac[ETH_ALEN];
- int max_packet;
- unsigned short (*protocol)(struct sk_buff *);
- int (*open)(void *);
- void (*close)(int, void *);
- void (*remove)(void *);
- int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
- int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
-
- void (*add_address)(unsigned char *, unsigned char *, void *);
- void (*delete_address)(unsigned char *, unsigned char *, void *);
- char user[];
-};
-
-struct net_kern_info {
- void (*init)(struct net_device *, void *);
- unsigned short (*protocol)(struct sk_buff *);
- int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
- int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
-};
-
-struct transport {
- struct list_head list;
- const char *name;
- int (* const setup)(char *, char **, void *);
- const struct net_user_info *user;
- const struct net_kern_info *kern;
- const int private_size;
- const int setup_size;
-};
-
-extern int tap_setup_common(char *str, char *type, char **dev_name,
- char **mac_out, char **gate_addr);
-extern void register_transport(struct transport *new);
-extern unsigned short eth_protocol(struct sk_buff *skb);
-extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
-
-
-#endif
diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h
deleted file mode 100644
index ba92a4d93531..000000000000
--- a/arch/um/include/shared/net_user.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_NET_USER_H__
-#define __UM_NET_USER_H__
-
-#define ETH_ADDR_LEN (6)
-#define ETH_HEADER_ETHERTAP (16)
-#define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for crazy people */
-#define ETH_MAX_PACKET (1500)
-
-#define UML_NET_VERSION (4)
-
-struct net_user_info {
- int (*init)(void *, void *);
- int (*open)(void *);
- void (*close)(int, void *);
- void (*remove)(void *);
- void (*add_address)(unsigned char *, unsigned char *, void *);
- void (*delete_address)(unsigned char *, unsigned char *, void *);
- int max_packet;
- int mtu;
-};
-
-extern void iter_addresses(void *d, void (*cb)(unsigned char *,
- unsigned char *, void *),
- void *arg);
-
-extern void *get_output_buffer(int *len_out);
-extern void free_output_buffer(void *buffer);
-
-extern int tap_open_common(void *dev, char *gate_addr);
-extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr);
-
-extern void read_output(int fd, char *output_out, int len);
-
-extern int net_read(int fd, void *buf, int len);
-extern int net_recvfrom(int fd, void *buf, int len);
-extern int net_write(int fd, void *buf, int len);
-extern int net_send(int fd, void *buf, int len);
-extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len);
-
-extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg);
-extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg);
-
-extern char *split_if_spec(char *str, ...);
-
-extern int dev_netmask(void *d, void *m);
-
-#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 152a60080d5b..b35cc8ce333b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -143,7 +143,6 @@ extern int os_access(const char *file, int mode);
extern int os_set_exec_close(int fd);
extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
extern int os_get_ifname(int fd, char *namebuf);
-extern int os_set_slip(int fd);
extern int os_mode_fd(int fd, int mode);
extern int os_seek_file(int fd, unsigned long long offset);
@@ -198,6 +197,7 @@ extern int create_mem_file(unsigned long long len);
extern void report_enomem(void);
/* process.c */
+pid_t os_reap_child(void);
extern void os_alarm_process(int pid);
extern void os_kill_process(int pid, int reap_child);
extern void os_kill_ptraced_process(int pid, int reap_child);
@@ -286,7 +286,7 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
/* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data);
-extern int start_userspace(unsigned long stub_stack);
+extern int start_userspace(struct mm_id *mm_id);
extern void userspace(struct uml_pt_regs *regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you);
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index 140388c282f6..89df9a55fbea 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -6,12 +6,21 @@
#ifndef __MM_ID_H
#define __MM_ID_H
+#define STUB_MAX_FDS 4
+
struct mm_id {
int pid;
unsigned long stack;
int syscall_data_len;
+
+ /* Only used with SECCOMP mode */
+ int sock;
+ int syscall_fd_num;
+ int syscall_fd_map[STUB_MAX_FDS];
};
void __switch_mm(struct mm_id *mm_idp);
+void notify_mm_kill(int pid);
+
#endif
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index 85c50122ab98..7d1de4cab551 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -8,6 +8,7 @@
#include <sysdep/ptrace.h>
+extern int using_seccomp;
extern int userspace_pid[];
extern void new_thread_handler(void);
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 81a4cace032c..c261a77a32f6 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -11,8 +11,15 @@
#include <linux/compiler_types.h>
#include <as-layout.h>
#include <sysdep/tls.h>
+#include <sysdep/stub-data.h>
+#include <mm_id.h>
+
+#define FUTEX_IN_CHILD 0
+#define FUTEX_IN_KERN 1
struct stub_init_data {
+ int seccomp;
+
unsigned long stub_start;
int stub_code_fd;
@@ -20,7 +27,8 @@ struct stub_init_data {
int stub_data_fd;
unsigned long stub_data_offset;
- unsigned long segv_handler;
+ unsigned long signal_handler;
+ unsigned long signal_restorer;
};
#define STUB_NEXT_SYSCALL(s) \
@@ -52,6 +60,16 @@ struct stub_data {
/* 128 leaves enough room for additional fields in the struct */
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
+ /* data shared with signal handler (only used in seccomp mode) */
+ short restart_wait;
+ unsigned int futex;
+ int signal;
+ unsigned short si_offset;
+ unsigned short mctx_offset;
+
+ /* seccomp architecture specific state restore */
+ struct stub_data_arch arch_data;
+
/* Stack for our signal handlers and for calling into . */
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
};
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 4df1cd0d2017..4669db2aa9be 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -25,7 +25,6 @@ obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_OF) += dtb.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
USER_OBJS := config.o
diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c
deleted file mode 100644
index 7220615b3beb..000000000000
--- a/arch/um/kernel/ioport.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2021 Intel Corporation
- * Author: Johannes Berg <johannes@sipsolutions.net>
- */
-#include <asm/iomap.h>
-#include <asm-generic/pci_iomap.h>
-
-void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
- unsigned int nr)
-{
- return NULL;
-}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index abe8f30a521c..0dfaf96bb7da 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -690,3 +690,9 @@ void __init init_IRQ(void)
/* Initialize EPOLL Loop */
os_setup_epoll();
}
+
+void sigchld_handler(int sig, struct siginfo *unused_si,
+ struct uml_pt_regs *regs, void *mc)
+{
+ do_IRQ(SIGCHLD_IRQ, regs);
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0eb5a1d3ba70..849fafa4b54f 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -8,6 +8,7 @@
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <shared/irq_kern.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu_context.h>
@@ -19,6 +20,9 @@
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
+spinlock_t mm_list_lock;
+struct list_head mm_list;
+
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_id *new_id = &mm->context.id;
@@ -31,14 +35,14 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
new_id->stack = stack;
- block_signals_trace();
- new_id->pid = start_userspace(stack);
- unblock_signals_trace();
+ scoped_guard(spinlock_irqsave, &mm_list_lock) {
+ /* Insert into list, used for lookups when the child dies */
+ list_add(&mm->context.list, &mm_list);
+ }
- if (new_id->pid < 0) {
- ret = new_id->pid;
+ ret = start_userspace(new_id);
+ if (ret < 0)
goto out_free;
- }
/* Ensure the new MM is clean and nothing unwanted is mapped */
unmap(new_id, 0, STUB_START);
@@ -60,13 +64,82 @@ void destroy_context(struct mm_struct *mm)
* zero, resulting in a kill(0), which will result in the
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
+ *
+ * Negative cases happen if the child died unexpectedly.
*/
- if (mmu->id.pid < 2) {
+ if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
mmu->id.pid);
return;
}
- os_kill_ptraced_process(mmu->id.pid, 1);
+
+ if (mmu->id.pid > 0) {
+ os_kill_ptraced_process(mmu->id.pid, 1);
+ mmu->id.pid = -1;
+ }
+
+ if (using_seccomp && mmu->id.sock)
+ os_close_file(mmu->id.sock);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
+
+ guard(spinlock_irqsave)(&mm_list_lock);
+
+ list_del(&mm->context.list);
+}
+
+static irqreturn_t mm_sigchld_irq(int irq, void* dev)
+{
+ struct mm_context *mm_context;
+ pid_t pid;
+
+ guard(spinlock)(&mm_list_lock);
+
+ while ((pid = os_reap_child()) > 0) {
+ /*
+ * A child died, check if we have an MM with the PID. This is
+ * only relevant in SECCOMP mode (as ptrace will fail anyway).
+ *
+ * See wait_stub_done_seccomp for more details.
+ */
+ list_for_each_entry(mm_context, &mm_list, list) {
+ if (mm_context->id.pid == pid) {
+ struct stub_data *stub_data;
+ printk("Unexpectedly lost MM child! Affected tasks will segfault.");
+
+ /* Marks the MM as dead */
+ mm_context->id.pid = -1;
+
+ /*
+ * NOTE: If SMP is implemented, a futex_wake
+ * needs to be added here.
+ */
+ stub_data = (void *)mm_context->id.stack;
+ stub_data->futex = FUTEX_IN_KERN;
+
+ /*
+ * NOTE: Currently executing syscalls by
+ * affected tasks may finish normally.
+ */
+ break;
+ }
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __init init_child_tracking(void)
+{
+ int err;
+
+ spin_lock_init(&mm_list_lock);
+ INIT_LIST_HEAD(&mm_list);
+
+ err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
+ if (err < 0)
+ panic("Failed to register SIGCHLD IRQ: %d", err);
+
+ return 0;
}
+early_initcall(init_child_tracking)
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 796fc266d3bb..67cab46a602c 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -5,21 +5,54 @@
#include <sysdep/stub.h>
-static __always_inline int syscall_handler(struct stub_data *d)
+#include <linux/futex.h>
+#include <sys/socket.h>
+#include <errno.h>
+
+/*
+ * Known security issues
+ *
+ * Userspace can jump to this address to execute *any* syscall that is
+ * permitted by the stub. As we will return afterwards, it can do
+ * whatever it likes, including:
+ * - Tricking the kernel into handing out the memory FD
+ * - Using this memory FD to read/write all physical memory
+ * - Running in parallel to the kernel processing a syscall
+ * (possibly creating data races?)
+ * - Blocking e.g. SIGALRM to avoid time based scheduling
+ *
+ * To avoid this, the permitted location for each syscall needs to be
+ * checked for in the SECCOMP filter (which is reasonably simple). Also,
+ * more care will need to go into considerations how the code might be
+ * tricked by using a prepared stack (or even modifying the stack from
+ * another thread in case SMP support is added).
+ *
+ * As for the SIGALRM, the best counter measure will be to check in the
+ * kernel that the process is reporting back the SIGALRM in a timely
+ * fashion.
+ */
+static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
{
+ struct stub_data *d = get_stub_data();
int i;
unsigned long res;
+ int fd;
for (i = 0; i < d->syscall_data_len; i++) {
struct stub_syscall *sc = &d->syscall_data[i];
switch (sc->syscall) {
case STUB_SYSCALL_MMAP:
+ if (fd_map)
+ fd = fd_map[sc->mem.fd];
+ else
+ fd = sc->mem.fd;
+
res = stub_syscall6(STUB_MMAP_NR,
sc->mem.addr, sc->mem.length,
sc->mem.prot,
MAP_SHARED | MAP_FIXED,
- sc->mem.fd, sc->mem.offset);
+ fd, sc->mem.offset);
if (res != sc->mem.addr) {
d->err = res;
d->syscall_data_len = i;
@@ -51,9 +84,98 @@ static __always_inline int syscall_handler(struct stub_data *d)
void __section(".__syscall_stub")
stub_syscall_handler(void)
{
+ syscall_handler(NULL);
+
+ trap_myself();
+}
+
+void __section(".__syscall_stub")
+stub_signal_interrupt(int sig, siginfo_t *info, void *p)
+{
struct stub_data *d = get_stub_data();
+ char rcv_data;
+ union {
+ char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
+ struct cmsghdr align;
+ } ctrl = {};
+ struct iovec iov = {
+ .iov_base = &rcv_data,
+ .iov_len = 1,
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
+ ucontext_t *uc = p;
+ struct cmsghdr *fd_msg;
+ int *fd_map;
+ int num_fds;
+ long res;
- syscall_handler(d);
+ d->signal = sig;
+ d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
+ d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
- trap_myself();
+restart_wait:
+ d->futex = FUTEX_IN_KERN;
+ do {
+ res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAKE, 1);
+ } while (res == -EINTR);
+
+ do {
+ res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAIT, FUTEX_IN_KERN, 0);
+ } while (res == -EINTR || d->futex == FUTEX_IN_KERN);
+
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ if (d->syscall_data_len) {
+ /* Read passed FDs (if any) */
+ do {
+ res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
+ } while (res == -EINTR);
+
+ /* We should never have a receive error (other than -EAGAIN) */
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ /* Receive the FDs */
+ num_fds = 0;
+ fd_msg = msghdr.msg_control;
+ fd_map = (void *)&CMSG_DATA(fd_msg);
+ if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
+ num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ /* Try running queued syscalls. */
+ res = syscall_handler(fd_map);
+
+ while (num_fds)
+ stub_syscall2(__NR_close, fd_map[--num_fds], 0);
+ } else {
+ res = 0;
+ }
+
+ if (res < 0 || d->restart_wait) {
+ /* Report SIGSYS if we restart. */
+ d->signal = SIGSYS;
+ d->restart_wait = 0;
+
+ goto restart_wait;
+ }
+
+ /* Restore arch dependent state that is not part of the mcontext */
+ stub_seccomp_restore_state(&d->arch_data);
+
+ /* Return so that the host modified mcontext is restored. */
+}
+
+void __section(".__syscall_stub")
+stub_signal_restorer(void)
+{
+ /* We must not have anything on the stack when doing rt_sigreturn */
+ stub_syscall0(__NR_rt_sigreturn);
}
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
index 23c99b285e82..cbafaa684e66 100644
--- a/arch/um/kernel/skas/stub_exe.c
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -1,8 +1,12 @@
#include <sys/ptrace.h>
#include <sys/prctl.h>
+#include <sys/fcntl.h>
#include <asm/unistd.h>
#include <sysdep/stub.h>
#include <stub-data.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <generated/asm-offsets.h>
void _start(void);
@@ -25,8 +29,6 @@ noinline static void real_init(void)
} sa = {
/* Need to set SA_RESTORER (but the handler never returns) */
.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
- /* no need to mask any signals */
- .sa_mask = 0,
};
/* set a nice name */
@@ -35,13 +37,20 @@ noinline static void real_init(void)
/* Make sure this process dies if the kernel dies */
stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
+ /* Needed in SECCOMP mode (and safe to do anyway) */
+ stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
/* read information from STDIN and close it */
res = stub_syscall3(__NR_read, 0,
(unsigned long)&init_data, sizeof(init_data));
if (res != sizeof(init_data))
stub_syscall1(__NR_exit, 10);
- stub_syscall1(__NR_close, 0);
+ /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
+ if (!init_data.seccomp)
+ stub_syscall1(__NR_close, 0);
+ else
+ stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
/* map stub code + data */
res = stub_syscall6(STUB_MMAP_NR,
@@ -59,22 +68,148 @@ noinline static void real_init(void)
if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
stub_syscall1(__NR_exit, 12);
+ /* In SECCOMP mode, we only need the signalling FD from now on */
+ if (init_data.seccomp) {
+ res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
+ if (res != 0)
+ stub_syscall1(__NR_exit, 13);
+ }
+
/* setup signal stack inside stub data */
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
- /* register SIGSEGV handler */
- sa.sa_handler_ = (void *) init_data.segv_handler;
- res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0,
- sizeof(sa.sa_mask));
- if (res != 0)
- stub_syscall1(__NR_exit, 13);
+ /* register signal handlers */
+ sa.sa_handler_ = (void *) init_data.signal_handler;
+ sa.sa_restorer = (void *) init_data.signal_restorer;
+ if (!init_data.seccomp) {
+ /* In ptrace mode, the SIGSEGV handler never returns */
+ sa.sa_mask = 0;
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 14);
+ } else {
+ /* SECCOMP mode uses rt_sigreturn, need to mask all signals */
+ sa.sa_mask = ~0ULL;
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 15);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 16);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 17);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 18);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGILL,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 19);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 20);
+ }
+
+ /*
+ * If in seccomp mode, install the SECCOMP filter and trigger a syscall.
+ * Otherwise set PTRACE_TRACEME and do a SIGSTOP.
+ */
+ if (init_data.seccomp) {
+ struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+ /* [0] Load upper 32bit of instruction pointer from seccomp_data */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, instruction_pointer) + 4)),
+
+ /* [1] Jump forward 3 instructions if the upper address is not identical */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
+#endif
+ /* [2] Load lower 32bit of instruction pointer from seccomp_data */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, instruction_pointer))),
+
+ /* [3] Mask out lower bits */
+ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+ /* [4] Jump to [6] if the lower bits are not on the expected page */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
+
+ /* [5] Trap call, allow */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+
+ /* [6,7] Check architecture */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, arch)),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
+ UM_SECCOMP_ARCH_NATIVE, 1, 0),
+
+ /* [8] Kill (for architecture check) */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+ /* [9] Load syscall number */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+ /* [10-16] Check against permitted syscalls */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
+ 7, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
+ 6, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
+ 5, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
+ 4, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
+ 3, 0),
+#ifdef __i386__
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
+ 2, 0),
+#else
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
+ 2, 0),
+#endif
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
+ 1, 0),
+
+ /* [17] Not one of the permitted syscalls */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+ /* [18] Permitted call for the stub */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = sizeof(filter) / sizeof(filter[0]),
+ .filter = filter,
+ };
+
+ if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_TSYNC,
+ (unsigned long)&prog) != 0)
+ stub_syscall1(__NR_exit, 21);
- stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+ /* Fall through, the exit syscall will cause SIGSYS */
+ } else {
+ stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
- stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+ stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+ }
- stub_syscall1(__NR_exit, 14);
+ stub_syscall1(__NR_exit, 30);
__builtin_unreachable();
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 1394568c0210..ae0fa2173778 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -856,11 +856,16 @@ static struct clock_event_device timer_clockevent = {
static irqreturn_t um_timer(int irq, void *dev)
{
- if (get_current()->mm != NULL)
- {
- /* userspace - relay signal, results in correct userspace timers */
+ /*
+ * Interrupt the (possibly) running userspace process, technically this
+ * should only happen if userspace is currently executing.
+ * With infinite CPU time-travel, we can only get here when userspace
+ * is not executing. Do not notify there and avoid spurious scheduling.
+ */
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL &&
+ get_current()->mm)
os_alarm_process(get_current()->mm->context.id.pid);
- }
(*timer_clockevent.event_handler)(&timer_clockevent);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ef2272e92a43..5b80a3a89c20 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -16,7 +16,122 @@
#include <kern_util.h>
#include <os.h>
#include <skas.h>
-#include <arch.h>
+
+/*
+ * NOTE: UML does not have exception tables. As such, this is almost a copy
+ * of the code in mm/memory.c, only adjusting the logic to simply check whether
+ * we are coming from the kernel instead of doing an additional lookup in the
+ * exception table.
+ * We can do this simplification because we never get here if the exception was
+ * fixable.
+ */
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ if (likely(mmap_read_trylock(mm)))
+ return true;
+
+ if (!is_user)
+ return false;
+
+ return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ mmap_read_unlock(mm);
+ if (!is_user)
+ return false;
+
+ return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+static struct vm_area_struct *
+um_lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, bool is_user)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack_locked(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
@@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto out;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (is_user && !ARCH_IS_STACKGROW(address))
- goto out;
- vma = expand_stack(mm, address);
+ vma = um_lock_mm_and_find_vma(mm, address, is_user);
if (!vma)
goto out_nosemaphore;
-good_area:
*code_out = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 049dfa5bc9c6..fae836713487 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n
obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
registers.o sigio.o signal.o start_up.o time.o tty.o \
- umid.o user_syms.o util.o drivers/ skas/
+ umid.o user_syms.o util.o skas/
CFLAGS_signal.o += -Wframe-larger-than=4096
diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile
deleted file mode 100644
index cf2d75bb1884..000000000000
--- a/arch/um/os-Linux/drivers/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
-#
-
-ethertap-objs := ethertap_kern.o ethertap_user.o
-tuntap-objs := tuntap_kern.o tuntap_user.o
-
-obj-y =
-obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o
-obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o
-
-include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
deleted file mode 100644
index a475259f90e1..000000000000
--- a/arch/um/os-Linux/drivers/etap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DRIVERS_ETAP_H
-#define __DRIVERS_ETAP_H
-
-#include <net_user.h>
-
-struct ethertap_data {
- char *dev_name;
- char *gate_addr;
- int data_fd;
- int control_fd;
- void *dev;
-};
-
-extern const struct net_user_info ethertap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
deleted file mode 100644
index 5e5ee40680ce..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "etap.h"
-#include <net_kern.h>
-
-struct ethertap_init {
- char *dev_name;
- char *gate_addr;
-};
-
-static void etap_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct ethertap_data *epri;
- struct ethertap_init *init = data;
-
- pri = netdev_priv(dev);
- epri = (struct ethertap_data *) pri->user;
- epri->dev_name = init->dev_name;
- epri->gate_addr = init->gate_addr;
- epri->data_fd = -1;
- epri->control_fd = -1;
- epri->dev = dev;
-
- printk(KERN_INFO "ethertap backend - %s", epri->dev_name);
- if (epri->gate_addr != NULL)
- printk(KERN_CONT ", IP = %s", epri->gate_addr);
- printk(KERN_CONT "\n");
-}
-
-static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- int len;
-
- len = net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP);
- if (len <= 0)
- return(len);
-
- skb_pull(skb, 2);
- len -= 2;
- return len;
-}
-
-static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- skb_push(skb, 2);
- return net_send(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info ethertap_kern_info = {
- .init = etap_init,
- .protocol = eth_protocol,
- .read = etap_read,
- .write = etap_write,
-};
-
-static int ethertap_setup(char *str, char **mac_out, void *data)
-{
- struct ethertap_init *init = data;
-
- *init = ((struct ethertap_init)
- { .dev_name = NULL,
- .gate_addr = NULL });
- if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
- &init->gate_addr))
- return 0;
- if (init->dev_name == NULL) {
- printk(KERN_ERR "ethertap_setup : Missing tap device name\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct transport ethertap_transport = {
- .list = LIST_HEAD_INIT(ethertap_transport.list),
- .name = "ethertap",
- .setup = ethertap_setup,
- .user = &ethertap_user_info,
- .kern = &ethertap_kern_info,
- .private_size = sizeof(struct ethertap_data),
- .setup_size = sizeof(struct ethertap_init),
-};
-
-static int register_ethertap(void)
-{
- register_transport(&ethertap_transport);
- return 0;
-}
-
-late_initcall(register_ethertap);
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
deleted file mode 100644
index bdf215c0eca7..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include "etap.h"
-#include <os.h>
-#include <net_user.h>
-#include <um_malloc.h>
-
-#define MAX_PACKET ETH_MAX_PACKET
-
-static int etap_user_init(void *data, void *dev)
-{
- struct ethertap_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-struct addr_change {
- enum { ADD_ADDR, DEL_ADDR } what;
- unsigned char addr[4];
- unsigned char netmask[4];
-};
-
-static void etap_change(int op, unsigned char *addr, unsigned char *netmask,
- int fd)
-{
- struct addr_change change;
- char *output;
- int n;
-
- change.what = op;
- memcpy(change.addr, addr, sizeof(change.addr));
- memcpy(change.netmask, netmask, sizeof(change.netmask));
- CATCH_EINTR(n = write(fd, &change, sizeof(change)));
- if (n != sizeof(change)) {
- printk(UM_KERN_ERR "etap_change - request failed, err = %d\n",
- errno);
- return;
- }
-
- output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL);
- if (output == NULL)
- printk(UM_KERN_ERR "etap_change : Failed to allocate output "
- "buffer\n");
- read_output(fd, output, UM_KERN_PAGE_SIZE);
- if (output != NULL) {
- printk("%s", output);
- kfree(output);
- }
-}
-
-static void etap_open_addr(unsigned char *addr, unsigned char *netmask,
- void *arg)
-{
- etap_change(ADD_ADDR, addr, netmask, *((int *) arg));
-}
-
-static void etap_close_addr(unsigned char *addr, unsigned char *netmask,
- void *arg)
-{
- etap_change(DEL_ADDR, addr, netmask, *((int *) arg));
-}
-
-struct etap_pre_exec_data {
- int control_remote;
- int control_me;
- int data_me;
-};
-
-static void etap_pre_exec(void *arg)
-{
- struct etap_pre_exec_data *data = arg;
-
- dup2(data->control_remote, 1);
- close(data->data_me);
- close(data->control_me);
-}
-
-static int etap_tramp(char *dev, char *gate, int control_me,
- int control_remote, int data_me, int data_remote)
-{
- struct etap_pre_exec_data pe_data;
- int pid, err, n;
- char version_buf[sizeof("nnnnn\0")];
- char data_fd_buf[sizeof("nnnnnn\0")];
- char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
- char *setup_args[] = { "uml_net", version_buf, "ethertap", dev,
- data_fd_buf, gate_buf, NULL };
- char *nosetup_args[] = { "uml_net", version_buf, "ethertap",
- dev, data_fd_buf, NULL };
- char **args, c;
-
- sprintf(data_fd_buf, "%d", data_remote);
- sprintf(version_buf, "%d", UML_NET_VERSION);
- if (gate != NULL) {
- strscpy(gate_buf, gate);
- args = setup_args;
- }
- else args = nosetup_args;
-
- err = 0;
- pe_data.control_remote = control_remote;
- pe_data.control_me = control_me;
- pe_data.data_me = data_me;
- pid = run_helper(etap_pre_exec, &pe_data, args);
-
- if (pid < 0)
- err = pid;
- close(data_remote);
- close(control_remote);
- CATCH_EINTR(n = read(control_me, &c, sizeof(c)));
- if (n != sizeof(c)) {
- err = -errno;
- printk(UM_KERN_ERR "etap_tramp : read of status failed, "
- "err = %d\n", -err);
- return err;
- }
- if (c != 1) {
- printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
- err = helper_wait(pid);
- }
- return err;
-}
-
-static int etap_open(void *data)
-{
- struct ethertap_data *pri = data;
- char *output;
- int data_fds[2], control_fds[2], err, output_len;
-
- err = tap_open_common(pri->dev, pri->gate_addr);
- if (err)
- return err;
-
- err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "etap_open - data socketpair failed - "
- "err = %d\n", errno);
- return err;
- }
-
- err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "etap_open - control socketpair failed - "
- "err = %d\n", errno);
- goto out_close_data;
- }
-
- err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0],
- control_fds[1], data_fds[0], data_fds[1]);
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- read_output(control_fds[0], output, output_len);
-
- if (output == NULL)
- printk(UM_KERN_ERR "etap_open : failed to allocate output "
- "buffer\n");
- else {
- printk("%s", output);
- kfree(output);
- }
-
- if (err < 0) {
- printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err);
- goto out_close_control;
- }
-
- pri->data_fd = data_fds[0];
- pri->control_fd = control_fds[0];
- iter_addresses(pri->dev, etap_open_addr, &pri->control_fd);
- return data_fds[0];
-
-out_close_control:
- close(control_fds[0]);
- close(control_fds[1]);
-out_close_data:
- close(data_fds[0]);
- close(data_fds[1]);
- return err;
-}
-
-static void etap_close(int fd, void *data)
-{
- struct ethertap_data *pri = data;
-
- iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
- close(fd);
-
- if (shutdown(pri->data_fd, SHUT_RDWR) < 0)
- printk(UM_KERN_ERR "etap_close - shutdown data socket failed, "
- "errno = %d\n", errno);
-
- if (shutdown(pri->control_fd, SHUT_RDWR) < 0)
- printk(UM_KERN_ERR "etap_close - shutdown control socket "
- "failed, errno = %d\n", errno);
-
- close(pri->data_fd);
- pri->data_fd = -1;
- close(pri->control_fd);
- pri->control_fd = -1;
-}
-
-static void etap_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct ethertap_data *pri = data;
-
- tap_check_ips(pri->gate_addr, addr);
- if (pri->control_fd == -1)
- return;
- etap_open_addr(addr, netmask, &pri->control_fd);
-}
-
-static void etap_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct ethertap_data *pri = data;
-
- if (pri->control_fd == -1)
- return;
-
- etap_close_addr(addr, netmask, &pri->control_fd);
-}
-
-const struct net_user_info ethertap_user_info = {
- .init = etap_user_init,
- .open = etap_open,
- .close = etap_close,
- .remove = NULL,
- .add_address = etap_add_addr,
- .delete_address = etap_del_addr,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP,
-};
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
deleted file mode 100644
index e364e42abfc5..000000000000
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_TUNTAP_H
-#define __UM_TUNTAP_H
-
-#include <net_user.h>
-
-struct tuntap_data {
- char *dev_name;
- int fixed_config;
- char *gate_addr;
- int fd;
- void *dev;
-};
-
-extern const struct net_user_info tuntap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
deleted file mode 100644
index ff022d9cf0dd..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/netdevice.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <asm/errno.h>
-#include <net_kern.h>
-#include "tuntap.h"
-
-struct tuntap_init {
- char *dev_name;
- char *gate_addr;
-};
-
-static void tuntap_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct tuntap_data *tpri;
- struct tuntap_init *init = data;
-
- pri = netdev_priv(dev);
- tpri = (struct tuntap_data *) pri->user;
- tpri->dev_name = init->dev_name;
- tpri->fixed_config = (init->dev_name != NULL);
- tpri->gate_addr = init->gate_addr;
- tpri->fd = -1;
- tpri->dev = dev;
-
- printk(KERN_INFO "TUN/TAP backend - ");
- if (tpri->gate_addr != NULL)
- printk(KERN_CONT "IP = %s", tpri->gate_addr);
- printk(KERN_CONT "\n");
-}
-
-static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_read(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_write(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info tuntap_kern_info = {
- .init = tuntap_init,
- .protocol = eth_protocol,
- .read = tuntap_read,
- .write = tuntap_write,
-};
-
-static int tuntap_setup(char *str, char **mac_out, void *data)
-{
- struct tuntap_init *init = data;
-
- *init = ((struct tuntap_init)
- { .dev_name = NULL,
- .gate_addr = NULL });
- if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
- &init->gate_addr))
- return 0;
-
- return 1;
-}
-
-static struct transport tuntap_transport = {
- .list = LIST_HEAD_INIT(tuntap_transport.list),
- .name = "tuntap",
- .setup = tuntap_setup,
- .user = &tuntap_user_info,
- .kern = &tuntap_kern_info,
- .private_size = sizeof(struct tuntap_data),
- .setup_size = sizeof(struct tuntap_init),
-};
-
-static int register_tuntap(void)
-{
- register_transport(&tuntap_transport);
- return 0;
-}
-
-late_initcall(register_tuntap);
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
deleted file mode 100644
index 91f0e27ca3a6..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ /dev/null
@@ -1,215 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/if_tun.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <kern_util.h>
-#include <os.h>
-#include "tuntap.h"
-
-static int tuntap_user_init(void *data, void *dev)
-{
- struct tuntap_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct tuntap_data *pri = data;
-
- tap_check_ips(pri->gate_addr, addr);
- if ((pri->fd == -1) || pri->fixed_config)
- return;
- open_addr(addr, netmask, pri->dev_name);
-}
-
-static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct tuntap_data *pri = data;
-
- if ((pri->fd == -1) || pri->fixed_config)
- return;
- close_addr(addr, netmask, pri->dev_name);
-}
-
-struct tuntap_pre_exec_data {
- int stdout_fd;
- int close_me;
-};
-
-static void tuntap_pre_exec(void *arg)
-{
- struct tuntap_pre_exec_data *data = arg;
-
- dup2(data->stdout_fd, 1);
- close(data->close_me);
-}
-
-static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
- char *buffer, int buffer_len, int *used_out)
-{
- struct tuntap_pre_exec_data data;
- char version_buf[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate,
- NULL };
- char buf[CMSG_SPACE(sizeof(*fd_out))];
- struct msghdr msg;
- struct cmsghdr *cmsg;
- struct iovec iov;
- int pid, n, err;
-
- sprintf(version_buf, "%d", UML_NET_VERSION);
-
- data.stdout_fd = remote;
- data.close_me = me;
-
- pid = run_helper(tuntap_pre_exec, &data, argv);
-
- if (pid < 0)
- return pid;
-
- close(remote);
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- if (buffer != NULL) {
- iov = ((struct iovec) { buffer, buffer_len });
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- }
- else {
- msg.msg_iov = NULL;
- msg.msg_iovlen = 0;
- }
- msg.msg_control = buf;
- msg.msg_controllen = sizeof(buf);
- msg.msg_flags = 0;
- n = recvmsg(me, &msg, 0);
- *used_out = n;
- if (n < 0) {
- err = -errno;
- printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - "
- "errno = %d\n", errno);
- return err;
- }
- helper_wait(pid);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- if (cmsg == NULL) {
- printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
- "message\n");
- return -EINVAL;
- }
- if ((cmsg->cmsg_level != SOL_SOCKET) ||
- (cmsg->cmsg_type != SCM_RIGHTS)) {
- printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
- "descriptor\n");
- return -EINVAL;
- }
- *fd_out = ((int *) CMSG_DATA(cmsg))[0];
- os_set_exec_close(*fd_out);
- return 0;
-}
-
-static int tuntap_open(void *data)
-{
- struct ifreq ifr;
- struct tuntap_data *pri = data;
- char *output, *buffer;
- int err, fds[2], len, used;
-
- err = tap_open_common(pri->dev, pri->gate_addr);
- if (err < 0)
- return err;
-
- if (pri->fixed_config) {
- pri->fd = os_open_file("/dev/net/tun",
- of_cloexec(of_rdwr(OPENFLAGS())), 0);
- if (pri->fd < 0) {
- printk(UM_KERN_ERR "Failed to open /dev/net/tun, "
- "err = %d\n", -pri->fd);
- return pri->fd;
- }
- memset(&ifr, 0, sizeof(ifr));
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strscpy(ifr.ifr_name, pri->dev_name);
- if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
- errno);
- close(pri->fd);
- return err;
- }
- }
- else {
- err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "tuntap_open : socketpair failed - "
- "errno = %d\n", errno);
- return err;
- }
-
- buffer = get_output_buffer(&len);
- if (buffer != NULL)
- len--;
- used = 0;
-
- err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0],
- fds[1], buffer, len, &used);
-
- output = buffer;
- if (err < 0) {
- printk("%s", output);
- free_output_buffer(buffer);
- printk(UM_KERN_ERR "tuntap_open_tramp failed - "
- "err = %d\n", -err);
- return err;
- }
-
- pri->dev_name = uml_strdup(buffer);
- output += IFNAMSIZ;
- printk("%s", output);
- free_output_buffer(buffer);
-
- close(fds[0]);
- iter_addresses(pri->dev, open_addr, pri->dev_name);
- }
-
- return pri->fd;
-}
-
-static void tuntap_close(int fd, void *data)
-{
- struct tuntap_data *pri = data;
-
- if (!pri->fixed_config)
- iter_addresses(pri->dev, close_addr, pri->dev_name);
- close(fd);
- pri->fd = -1;
-}
-
-const struct net_user_info tuntap_user_info = {
- .init = tuntap_user_init,
- .open = tuntap_open,
- .close = tuntap_close,
- .remove = NULL,
- .add_address = tuntap_add_addr,
- .delete_address = tuntap_del_addr,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index a0d01c68ce3e..617886d1fb1e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf)
return 0;
}
-int os_set_slip(int fd)
-{
- int disc, sencap;
-
- disc = N_SLIP;
- if (ioctl(fd, TIOCSETD, &disc) < 0)
- return -errno;
-
- sencap = 0;
- if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
- return -errno;
-
- return 0;
-}
-
int os_mode_fd(int fd, int mode)
{
int err;
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
index 317fca190c2b..5d8d3b0817a9 100644
--- a/arch/um/os-Linux/internal.h
+++ b/arch/um/os-Linux/internal.h
@@ -2,6 +2,9 @@
#ifndef __UM_OS_LINUX_INTERNAL_H
#define __UM_OS_LINUX_INTERNAL_H
+#include <mm_id.h>
+#include <stub-data.h>
+
/*
* elf_aux.c
*/
@@ -16,5 +19,5 @@ void check_tmpexec(void);
* skas/process.c
*/
void wait_stub_done(int pid);
-
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
#endif /* __UM_OS_LINUX_INTERNAL_H */
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 184566edeee9..00b49e90d05f 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -18,17 +18,29 @@
#include <init.h>
#include <longjmp.h>
#include <os.h>
+#include <skas/skas.h>
void os_alarm_process(int pid)
{
+ if (pid <= 0)
+ return;
+
kill(pid, SIGALRM);
}
void os_kill_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
+ /* Block signals until child is reaped */
+ block_signals();
+
kill(pid, SIGKILL);
if (reap_child)
CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+ unblock_signals();
}
/* Kill off a ptraced child by all means available. kill it normally first,
@@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child)
void os_kill_ptraced_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
+ /* Block signals until child is reaped */
+ block_signals();
+
kill(pid, SIGKILL);
ptrace(PTRACE_KILL, pid);
ptrace(PTRACE_CONT, pid);
if (reap_child)
CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+ unblock_signals();
+}
+
+pid_t os_reap_child(void)
+{
+ int status;
+
+ /* Try to reap a child */
+ return waitpid(-1, &status, WNOHANG);
}
/* Don't use the glibc version, which caches the result in TLS. It misses some
@@ -151,6 +179,9 @@ void init_new_thread_signals(void)
set_handler(SIGBUS);
signal(SIGHUP, SIG_IGN);
set_handler(SIGIO);
+ /* We (currently) only use the child reaper IRQ in seccomp mode */
+ if (using_seccomp)
+ set_handler(SIGCHLD);
signal(SIGWINCH, SIG_IGN);
}
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index d7ca148807b2..bfba2cbc9478 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -14,8 +14,8 @@
/* This is set once at boot time and not changed thereafter */
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long *exec_fp_regs;
+unsigned long exec_regs[MAX_REG_NR];
+unsigned long *exec_fp_regs;
int init_pid_registers(int pid)
{
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index a05a6ecee756..6de145f8fe3d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -12,6 +12,7 @@
#include <signal.h>
#include <string.h>
#include <sys/epoll.h>
+#include <asm/unistd.h>
#include <kern_util.h>
#include <init.h>
#include <os.h>
@@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused)
__func__, errno);
}
- CATCH_EINTR(r = tgkill(pid, pid, SIGIO));
+ CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO));
if (r < 0)
printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
__func__, errno);
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index e71e5b4878d1..11f07f498270 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) =
[SIGBUS] = relay_signal,
[SIGSEGV] = segv_handler,
[SIGIO] = sigio_handler,
+ [SIGCHLD] = sigchld_handler,
};
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
@@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
}
/* enable signals if sig isn't IRQ signal */
- if ((sig != SIGIO) && (sig != SIGWINCH))
+ if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
unblock_signals_trace();
(*sig_info[sig])(sig, si, &r, mc);
@@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGALRM_BIT 1
#define SIGALRM_MASK (1 << SIGALRM_BIT)
+#define SIGCHLD_BIT 2
+#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
+
int signals_enabled;
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
static int signals_blocked, signals_blocked_pending;
@@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
return;
}
+ if (!enabled && (sig == SIGCHLD)) {
+ signals_pending |= SIGCHLD_MASK;
+ return;
+ }
+
block_signals_trace();
sig_handler_common(sig, si, mc);
@@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
[SIGIO] = sig_handler,
[SIGWINCH] = sig_handler,
+ /* SIGCHLD is only actually registered in seccomp mode. */
+ [SIGCHLD] = sig_handler,
[SIGALRM] = timer_alarm_handler,
[SIGUSR1] = sigusr1_handler,
@@ -309,6 +320,12 @@ void unblock_signals(void)
if (save_pending & SIGIO_MASK)
sig_handler_common(SIGIO, NULL, NULL);
+ if (save_pending & SIGCHLD_MASK) {
+ struct uml_pt_regs regs = {};
+
+ sigchld_handler(SIGCHLD, NULL, &regs, NULL);
+ }
+
/* Do not reenter the handler */
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index d7f1814b0e5a..8b9921ac3ef8 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp)
print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
16, 4, sc, sizeof(*sc), 0);
+
+ if (using_seccomp) {
+ printk(UM_KERN_ERR "%s: FD map num: %d", __func__,
+ mm_idp->syscall_fd_num);
+ print_hex_dump(UM_KERN_ERR,
+ " FD map: ", 0, 16,
+ sizeof(mm_idp->syscall_fd_map[0]),
+ mm_idp->syscall_fd_map,
+ sizeof(mm_idp->syscall_fd_map), 0);
+ }
}
static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
@@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
int n, i;
int err, pid = mm_idp->pid;
- n = ptrace_setregs(pid, syscall_regs);
- if (n < 0) {
- printk(UM_KERN_ERR "Registers - \n");
- for (i = 0; i < MAX_REG_NR; i++)
- printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
- panic("%s : PTRACE_SETREGS failed, errno = %d\n",
- __func__, -n);
- }
-
/* Inform process how much we have filled in. */
proc_data->syscall_data_len = mm_idp->syscall_data_len;
- err = ptrace(PTRACE_CONT, pid, 0, 0);
- if (err)
- panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
- errno);
-
- wait_stub_done(pid);
+ if (using_seccomp) {
+ proc_data->restart_wait = 1;
+ wait_stub_done_seccomp(mm_idp, 0, 1);
+ } else {
+ n = ptrace_setregs(pid, syscall_regs);
+ if (n < 0) {
+ printk(UM_KERN_ERR "Registers -\n");
+ for (i = 0; i < MAX_REG_NR; i++)
+ printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
+ panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+ __func__, -n);
+ }
+
+ err = ptrace(PTRACE_CONT, pid, 0, 0);
+ if (err)
+ panic("Failed to continue stub, pid = %d, errno = %d\n",
+ pid, errno);
+
+ wait_stub_done(pid);
+ }
/*
- * proc_data->err will be non-zero if there was an (unexpected) error.
+ * proc_data->err will be negative if there was an (unexpected) error.
* In that case, syscall_data_len points to the last executed syscall,
* otherwise it will be zero (but we do not need to rely on that).
*/
@@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
mm_idp->syscall_data_len = 0;
}
+ if (using_seccomp)
+ mm_idp->syscall_fd_num = 0;
+
return mm_idp->syscall_data_len;
}
@@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
return NULL;
}
+static int get_stub_fd(struct mm_id *mm_idp, int fd)
+{
+ int i;
+
+ /* Find an FD slot (or flush and use first) */
+ if (!using_seccomp)
+ return fd;
+
+ /* Already crashed, value does not matter */
+ if (mm_idp->syscall_data_len < 0)
+ return 0;
+
+ /* Find existing FD in map if we can allocate another syscall */
+ if (mm_idp->syscall_data_len <
+ ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) {
+ for (i = 0; i < mm_idp->syscall_fd_num; i++) {
+ if (mm_idp->syscall_fd_map[i] == fd)
+ return i;
+ }
+
+ if (mm_idp->syscall_fd_num < STUB_MAX_FDS) {
+ i = mm_idp->syscall_fd_num;
+ mm_idp->syscall_fd_map[i] = fd;
+
+ mm_idp->syscall_fd_num++;
+
+ return i;
+ }
+ }
+
+ /* FD map full or no syscall space available, continue after flush */
+ do_syscall_stub(mm_idp);
+ mm_idp->syscall_fd_map[0] = fd;
+ mm_idp->syscall_fd_num = 1;
+
+ return 0;
+}
+
int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset)
{
@@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
- if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
+ if (sc && sc->mem.prot == prot &&
sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
- sc->mem.length += len;
- return 0;
+ int prev_fd = sc->mem.fd;
+
+ if (using_seccomp)
+ prev_fd = mm_idp->syscall_fd_map[sc->mem.fd];
+
+ if (phys_fd == prev_fd) {
+ sc->mem.length += len;
+ return 0;
+ }
}
+ phys_fd = get_stub_fd(mm_idp, phys_fd);
+
sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MMAP;
sc->mem.addr = virt;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index ae2aea062f06..e42ffac23e3c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -15,6 +16,7 @@
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/stat.h>
+#include <sys/socket.h>
#include <asm/unistd.h>
#include <as-layout.h>
#include <init.h>
@@ -25,8 +27,11 @@
#include <registers.h>
#include <skas.h>
#include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
+#include <linux/futex.h>
#include <linux/threads.h>
#include <timetravel.h>
+#include <asm-generic/rwonce.h>
#include "../internal.h"
int is_skas_winch(int pid, int fd, void *data)
@@ -142,6 +147,105 @@ bad_wait:
fatal_sigsegv();
}
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
+{
+ struct stub_data *data = (void *)mm_idp->stack;
+ int ret;
+
+ do {
+ const char byte = 0;
+ struct iovec iov = {
+ .iov_base = (void *)&byte,
+ .iov_len = sizeof(byte),
+ };
+ union {
+ char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))];
+ struct cmsghdr align;
+ } ctrl;
+ struct msghdr msgh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ if (!running) {
+ if (mm_idp->syscall_fd_num) {
+ unsigned int fds_size =
+ sizeof(int) * mm_idp->syscall_fd_num;
+ struct cmsghdr *cmsg;
+
+ msgh.msg_control = ctrl.data;
+ msgh.msg_controllen = CMSG_SPACE(fds_size);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(fds_size);
+ memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map,
+ fds_size);
+
+ CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock,
+ &msgh, 0));
+ }
+
+ data->signal = 0;
+ data->futex = FUTEX_IN_CHILD;
+ CATCH_EINTR(syscall(__NR_futex, &data->futex,
+ FUTEX_WAKE, 1, NULL, NULL, 0));
+ }
+
+ do {
+ /*
+ * We need to check whether the child is still alive
+ * before and after the FUTEX_WAIT call. Before, in
+ * case it just died but we still updated data->futex
+ * to FUTEX_IN_CHILD. And after, in case it died while
+ * we were waiting (and SIGCHLD woke us up, see the
+ * IRQ handler in mmu.c).
+ *
+ * Either way, if PID is negative, then we have no
+ * choice but to kill the task.
+ */
+ if (__READ_ONCE(mm_idp->pid) < 0)
+ goto out_kill;
+
+ ret = syscall(__NR_futex, &data->futex,
+ FUTEX_WAIT, FUTEX_IN_CHILD,
+ NULL, NULL, 0);
+ if (ret < 0 && errno != EINTR && errno != EAGAIN) {
+ printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n",
+ __func__, errno);
+ goto out_kill;
+ }
+ } while (data->futex == FUTEX_IN_CHILD);
+
+ if (__READ_ONCE(mm_idp->pid) < 0)
+ goto out_kill;
+
+ running = 0;
+
+ /* We may receive a SIGALRM before SIGSYS, iterate again. */
+ } while (wait_sigsys && data->signal == SIGALRM);
+
+ if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+ printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+ goto out_kill;
+ }
+
+ if (wait_sigsys && data->signal != SIGSYS) {
+ printk(UM_KERN_ERR "%s : expected SIGSYS but got %d",
+ __func__, data->signal);
+ goto out_kill;
+ }
+
+ return;
+
+out_kill:
+ printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n",
+ __func__, mm_idp->pid, errno);
+ /* This is not true inside start_userspace */
+ if (current_mm_id() == mm_idp)
+ fatal_sigsegv();
+}
+
extern unsigned long current_stub_stack(void);
static void get_skas_faultinfo(int pid, struct faultinfo *fi)
@@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi)
memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
}
-static void handle_segv(int pid, struct uml_pt_regs *regs)
-{
- get_skas_faultinfo(pid, &regs->faultinfo);
- segv(regs->faultinfo, 0, 1, NULL, NULL);
-}
-
static void handle_trap(int pid, struct uml_pt_regs *regs)
{
if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
@@ -181,29 +279,48 @@ extern char __syscall_stub_start[];
static int stub_exe_fd;
+struct tramp_data {
+ struct stub_data *stub_data;
+ /* 0 is inherited, 1 is the kernel side */
+ int sockpair[2];
+};
+
#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif
-static int userspace_tramp(void *stack)
+static int userspace_tramp(void *data)
{
+ struct tramp_data *tramp_data = data;
char *const argv[] = { "uml-userspace", NULL };
- int pipe_fds[2];
unsigned long long offset;
struct stub_init_data init_data = {
+ .seccomp = using_seccomp,
.stub_start = STUB_START,
- .segv_handler = STUB_CODE +
- (unsigned long) stub_segv_handler -
- (unsigned long) __syscall_stub_start,
};
struct iomem_region *iomem;
int ret;
+ if (using_seccomp) {
+ init_data.signal_handler = STUB_CODE +
+ (unsigned long) stub_signal_interrupt -
+ (unsigned long) __syscall_stub_start;
+ init_data.signal_restorer = STUB_CODE +
+ (unsigned long) stub_signal_restorer -
+ (unsigned long) __syscall_stub_start;
+ } else {
+ init_data.signal_handler = STUB_CODE +
+ (unsigned long) stub_segv_handler -
+ (unsigned long) __syscall_stub_start;
+ init_data.signal_restorer = 0;
+ }
+
init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
&offset);
init_data.stub_code_offset = MMAP_OFFSET(offset);
- init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset);
+ init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data),
+ &offset);
init_data.stub_data_offset = MMAP_OFFSET(offset);
/*
@@ -214,20 +331,21 @@ static int userspace_tramp(void *stack)
syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC);
fcntl(init_data.stub_data_fd, F_SETFD, 0);
- for (iomem = iomem_regions; iomem; iomem = iomem->next)
- fcntl(iomem->fd, F_SETFD, 0);
- /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */
- if (pipe(pipe_fds))
- exit(2);
+ /* In SECCOMP mode, these FDs are passed when needed */
+ if (!using_seccomp) {
+ for (iomem = iomem_regions; iomem; iomem = iomem->next)
+ fcntl(iomem->fd, F_SETFD, 0);
+ }
- if (dup2(pipe_fds[0], 0) < 0)
+ /* dup2 signaling FD/socket to STDIN */
+ if (dup2(tramp_data->sockpair[0], 0) < 0)
exit(3);
- close(pipe_fds[0]);
+ close(tramp_data->sockpair[0]);
/* Write init_data and close write side */
- ret = write(pipe_fds[1], &init_data, sizeof(init_data));
- close(pipe_fds[1]);
+ ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data));
+ close(tramp_data->sockpair[1]);
if (ret != sizeof(init_data))
exit(4);
@@ -315,11 +433,12 @@ static int __init init_stub_exe_fd(void)
}
__initcall(init_stub_exe_fd);
+int using_seccomp;
int userspace_pid[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
- * @stub_stack: pointer to the stub stack.
+ * @mm_id: The corresponding struct mm_id
*
* Setups a new temporary stack page that is used while userspace_tramp() runs
* Clones the kernel process into a new userspace process, with FDs only.
@@ -328,11 +447,15 @@ int userspace_pid[NR_CPUS];
* when negative: an error number.
* FIXME: can PIDs become negative?!
*/
-int start_userspace(unsigned long stub_stack)
+int start_userspace(struct mm_id *mm_id)
{
+ struct stub_data *proc_data = (void *)mm_id->stack;
+ struct tramp_data tramp_data = {
+ .stub_data = proc_data,
+ };
void *stack;
unsigned long sp;
- int pid, status, n, err;
+ int status, n, err;
/* setup a temporary stack page */
stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -348,40 +471,55 @@ int start_userspace(unsigned long stub_stack)
/* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
- /* clone into new userspace process */
- pid = clone(userspace_tramp, (void *) sp,
+ /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n",
+ __func__, errno);
+ return err;
+ }
+
+ if (using_seccomp)
+ proc_data->futex = FUTEX_IN_CHILD;
+
+ mm_id->pid = clone(userspace_tramp, (void *) sp,
CLONE_VFORK | CLONE_VM | SIGCHLD,
- (void *)stub_stack);
- if (pid < 0) {
+ (void *)&tramp_data);
+ if (mm_id->pid < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
__func__, errno);
- return err;
+ goto out_close;
}
- do {
- CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
- if (n < 0) {
+ if (using_seccomp) {
+ wait_stub_done_seccomp(mm_id, 1, 1);
+ } else {
+ do {
+ CATCH_EINTR(n = waitpid(mm_id->pid, &status,
+ WUNTRACED | __WALL));
+ if (n < 0) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+ __func__, errno);
+ goto out_kill;
+ }
+ } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+ if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+ err = -EINVAL;
+ printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+ __func__, status);
+ goto out_kill;
+ }
+
+ if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL,
+ (void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
- printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+ printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
- } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
-
- if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
- err = -EINVAL;
- printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
- __func__, status);
- goto out_kill;
- }
-
- if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
- (void *) PTRACE_O_TRACESYSGOOD) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
- __func__, errno);
- goto out_kill;
}
if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@@ -391,10 +529,22 @@ int start_userspace(unsigned long stub_stack)
goto out_kill;
}
- return pid;
+ close(tramp_data.sockpair[0]);
+ if (using_seccomp)
+ mm_id->sock = tramp_data.sockpair[1];
+ else
+ close(tramp_data.sockpair[1]);
+
+ return 0;
+
+out_kill:
+ os_kill_ptraced_process(mm_id->pid, 1);
+out_close:
+ close(tramp_data.sockpair[0]);
+ close(tramp_data.sockpair[1]);
+
+ mm_id->pid = -1;
- out_kill:
- os_kill_ptraced_process(pid, 1);
return err;
}
@@ -404,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies;
void userspace(struct uml_pt_regs *regs)
{
int err, status, op, pid = userspace_pid[0];
- siginfo_t si;
+ siginfo_t si_ptrace;
+ siginfo_t *si;
+ int sig;
/* Handle any immediate reschedules or signals */
interrupt_end();
@@ -437,103 +589,177 @@ void userspace(struct uml_pt_regs *regs)
current_mm_sync();
- /* Flush out any pending syscalls */
- err = syscall_stub_flush(current_mm_id());
- if (err) {
- if (err == -ENOMEM)
- report_enomem();
+ if (using_seccomp) {
+ struct mm_id *mm_id = current_mm_id();
+ struct stub_data *proc_data = (void *) mm_id->stack;
+ int ret;
- printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
- __func__, -err);
- fatal_sigsegv();
- }
+ ret = set_stub_state(regs, proc_data, singlestepping());
+ if (ret) {
+ printk(UM_KERN_ERR "%s - failed to set regs: %d",
+ __func__, ret);
+ fatal_sigsegv();
+ }
- /*
- * This can legitimately fail if the process loads a
- * bogus value into a segment register. It will
- * segfault and PTRACE_GETREGS will read that value
- * out of the process. However, PTRACE_SETREGS will
- * fail. In this case, there is nothing to do but
- * just kill the process.
- */
- if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
- printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ /* Must have been reset by the syscall caller */
+ if (proc_data->restart_wait != 0)
+ panic("Programming error: Flag to only run syscalls in child was not cleared!");
- if (put_fp_registers(pid, regs->fp)) {
- printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ /* Mark pending syscalls for flushing */
+ proc_data->syscall_data_len = mm_id->syscall_data_len;
- if (singlestepping())
- op = PTRACE_SYSEMU_SINGLESTEP;
- else
- op = PTRACE_SYSEMU;
+ wait_stub_done_seccomp(mm_id, 0, 0);
- if (ptrace(op, pid, 0, 0)) {
- printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
- __func__, op, errno);
- fatal_sigsegv();
- }
+ sig = proc_data->signal;
- CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
- if (err < 0) {
- printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ if (sig == SIGTRAP && proc_data->err != 0) {
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
+ __func__);
+ syscall_stub_dump_error(mm_id);
+ mm_id->syscall_data_len = proc_data->err;
+ fatal_sigsegv();
+ }
- regs->is_user = 1;
- if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
- printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ mm_id->syscall_data_len = 0;
+ mm_id->syscall_fd_num = 0;
- if (get_fp_registers(pid, regs->fp)) {
- printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ ret = get_stub_state(regs, proc_data, NULL);
+ if (ret) {
+ printk(UM_KERN_ERR "%s - failed to get regs: %d",
+ __func__, ret);
+ fatal_sigsegv();
+ }
- UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+ if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
+ panic("%s - Invalid siginfo offset from child",
+ __func__);
+ si = (void *)&proc_data->sigstack[proc_data->si_offset];
+
+ regs->is_user = 1;
+
+ /* Fill in ORIG_RAX and extract fault information */
+ PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+ if (sig == SIGSEGV) {
+ mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
- if (WIFSTOPPED(status)) {
- int sig = WSTOPSIG(status);
+ GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
+ }
+ } else {
+ /* Flush out any pending syscalls */
+ err = syscall_stub_flush(current_mm_id());
+ if (err) {
+ if (err == -ENOMEM)
+ report_enomem();
+
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+ __func__, -err);
+ fatal_sigsegv();
+ }
- /* These signal handlers need the si argument.
- * The SIGIO and SIGALARM handlers which constitute the
- * majority of invocations, do not use it.
+ /*
+ * This can legitimately fail if the process loads a
+ * bogus value into a segment register. It will
+ * segfault and PTRACE_GETREGS will read that value
+ * out of the process. However, PTRACE_SETREGS will
+ * fail. In this case, there is nothing to do but
+ * just kill the process.
*/
- switch (sig) {
- case SIGSEGV:
- case SIGTRAP:
- case SIGILL:
- case SIGBUS:
- case SIGFPE:
- case SIGWINCH:
- ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
- break;
+ if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+ printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
}
- switch (sig) {
- case SIGSEGV:
- if (PTRACE_FULL_FAULTINFO) {
+ if (put_fp_registers(pid, regs->fp)) {
+ printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (singlestepping())
+ op = PTRACE_SYSEMU_SINGLESTEP;
+ else
+ op = PTRACE_SYSEMU;
+
+ if (ptrace(op, pid, 0, 0)) {
+ printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+ __func__, op, errno);
+ fatal_sigsegv();
+ }
+
+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
+ if (err < 0) {
+ printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ regs->is_user = 1;
+ if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+ printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (get_fp_registers(pid, regs->fp)) {
+ printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (WIFSTOPPED(status)) {
+ sig = WSTOPSIG(status);
+
+ /*
+ * These signal handlers need the si argument
+ * and SIGSEGV needs the faultinfo.
+ * The SIGIO and SIGALARM handlers which constitute
+ * the majority of invocations, do not use it.
+ */
+ switch (sig) {
+ case SIGSEGV:
get_skas_faultinfo(pid,
&regs->faultinfo);
- (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
- regs, NULL);
+ fallthrough;
+ case SIGTRAP:
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGWINCH:
+ ptrace(PTRACE_GETSIGINFO, pid, 0,
+ (struct siginfo *)&si_ptrace);
+ si = &si_ptrace;
+ break;
+ default:
+ si = NULL;
+ break;
}
- else handle_segv(pid, regs);
+ } else {
+ sig = 0;
+ }
+ }
+
+ UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+ if (sig) {
+ switch (sig) {
+ case SIGSEGV:
+ if (using_seccomp || PTRACE_FULL_FAULTINFO)
+ (*sig_info[SIGSEGV])(SIGSEGV,
+ (struct siginfo *)si,
+ regs, NULL);
+ else
+ segv(regs->faultinfo, 0, 1, NULL, NULL);
+
+ break;
+ case SIGSYS:
+ handle_syscall(regs);
break;
case SIGTRAP + 0x80:
handle_trap(pid, regs);
break;
case SIGTRAP:
- relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL);
+ relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL);
break;
case SIGALRM:
break;
@@ -543,7 +769,7 @@ void userspace(struct uml_pt_regs *regs)
case SIGFPE:
case SIGWINCH:
block_signals_trace();
- (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL);
+ (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL);
unblock_signals_trace();
break;
default:
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 93fc82c01aba..a827c2e01aa5 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -24,6 +25,13 @@
#include <kern_util.h>
#include <mem_user.h>
#include <ptrace_user.h>
+#include <stdbool.h>
+#include <stub-data.h>
+#include <sys/prctl.h>
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <sysdep/mcontext.h>
+#include <sysdep/stub.h>
#include <registers.h>
#include <skas.h>
#include "internal.h"
@@ -224,6 +232,140 @@ static void __init check_ptrace(void)
check_sysemu();
}
+extern unsigned long host_fp_size;
+extern unsigned long exec_regs[MAX_REG_NR];
+extern unsigned long *exec_fp_regs;
+
+__initdata static struct stub_data *seccomp_test_stub_data;
+
+static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
+{
+ ucontext_t *uc = p;
+
+ /* Stow away the location of the mcontext in the stack */
+ seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext -
+ (unsigned long)&seccomp_test_stub_data->sigstack[0];
+
+ /* Prevent libc from clearing memory (mctx_offset in particular) */
+ syscall(__NR_exit, 0);
+}
+
+static int __init seccomp_helper(void *data)
+{
+ static struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+ };
+ static struct sock_fprog prog = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ struct sigaction sa;
+
+ /* close_range is needed for the stub */
+ if (stub_syscall3(__NR_close_range, 1, ~0U, 0))
+ exit(1);
+
+ set_sigstack(seccomp_test_stub_data->sigstack,
+ sizeof(seccomp_test_stub_data->sigstack));
+
+ sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+ sa.sa_sigaction = (void *) sigsys_handler;
+ sa.sa_restorer = NULL;
+ if (sigaction(SIGSYS, &sa, NULL) < 0)
+ exit(2);
+
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
+ exit(3);
+
+ sleep(0);
+
+ /* Never reached. */
+ _exit(4);
+}
+
+static bool __init init_seccomp(void)
+{
+ int pid;
+ int status;
+ int n;
+ unsigned long sp;
+
+ /*
+ * We check that we can install a seccomp filter and then exit(0)
+ * from a trapped syscall.
+ *
+ * Note that we cannot verify that no seccomp filter already exists
+ * for a syscall that results in the process/thread to be killed.
+ */
+
+ os_info("Checking that seccomp filters can be installed...");
+
+ seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANON, 0, 0);
+
+ /* Use the syscall data area as stack, we just need something */
+ sp = (unsigned long)&seccomp_test_stub_data->syscall_data +
+ sizeof(seccomp_test_stub_data->syscall_data) -
+ sizeof(void *);
+ pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL);
+
+ if (pid < 0)
+ fatal_perror("check_seccomp : clone failed");
+
+ CATCH_EINTR(n = waitpid(pid, &status, __WCLONE));
+ if (n < 0)
+ fatal_perror("check_seccomp : waitpid failed");
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ struct uml_pt_regs *regs;
+ unsigned long fp_size;
+ int r;
+
+ /* Fill in the host_fp_size from the mcontext. */
+ regs = calloc(1, sizeof(struct uml_pt_regs));
+ get_stub_state(regs, seccomp_test_stub_data, &fp_size);
+ host_fp_size = fp_size;
+ free(regs);
+
+ /* Repeat with the correct size */
+ regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size);
+ r = get_stub_state(regs, seccomp_test_stub_data, NULL);
+
+ /* Store as the default startup registers */
+ exec_fp_regs = malloc(host_fp_size);
+ memcpy(exec_regs, regs->gp, sizeof(exec_regs));
+ memcpy(exec_fp_regs, regs->fp, host_fp_size);
+
+ munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+
+ free(regs);
+
+ if (r) {
+ os_info("failed to fetch registers: %d\n", r);
+ return false;
+ }
+
+ os_info("OK\n");
+ return true;
+ }
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
+ os_info("missing\n");
+ else
+ os_info("error\n");
+
+ munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+ return false;
+}
+
+
static void __init check_coredump_limit(void)
{
struct rlimit lim;
@@ -278,6 +420,44 @@ void __init get_host_cpu_features(
}
}
+static int seccomp_config __initdata;
+
+static int __init uml_seccomp_config(char *line, int *add)
+{
+ *add = 0;
+
+ if (strcmp(line, "off") == 0)
+ seccomp_config = 0;
+ else if (strcmp(line, "auto") == 0)
+ seccomp_config = 1;
+ else if (strcmp(line, "on") == 0)
+ seccomp_config = 2;
+ else
+ fatal("Invalid seccomp option '%s', expected on/auto/off\n",
+ line);
+
+ return 0;
+}
+
+__uml_setup("seccomp=", uml_seccomp_config,
+"seccomp=<on/auto/off>\n"
+" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n"
+" processes work collaboratively with the kernel instead of being\n"
+" traced using ptrace. All syscalls from the application are caught and\n"
+" redirected using a signal. This signal handler in turn is permitted to\n"
+" do the selected set of syscalls to communicate with the UML kernel and\n"
+" do the required memory management.\n"
+"\n"
+" This method is overall faster than the ptrace based userspace, primarily\n"
+" because it reduces the number of context switches for (minor) page faults.\n"
+"\n"
+" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n"
+" userspace from reading and writing all physical memory. Userspace\n"
+" processes could also trick the stub into disabling SIGALRM which\n"
+" prevents it from being interrupted for scheduling purposes.\n"
+"\n"
+" This is insecure and should only be used with a trusted userspace\n\n"
+);
void __init os_early_checks(void)
{
@@ -286,13 +466,24 @@ void __init os_early_checks(void)
/* Print out the core dump limits early */
check_coredump_limit();
- check_ptrace();
-
/* Need to check this early because mmapping happens before the
* kernel is running.
*/
check_tmpexec();
+ if (seccomp_config) {
+ if (init_seccomp()) {
+ using_seccomp = 1;
+ return;
+ }
+
+ if (seccomp_config == 2)
+ fatal("SECCOMP userspace requested but not functional!\n");
+ }
+
+ using_seccomp = 0;
+ check_ptrace();
+
pid = start_ptraced_child();
if (init_pid_registers(pid))
fatal("Failed to initialize default registers");
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b07824500363..ddc144657efa 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -20,6 +20,9 @@
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+/* Do not call this directly. Declared for export type visibility. */
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
+
/**
* csum_fold - Fold and invert a 32bit checksum.
* sum: 32bit unfolded sum
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 478710384b34..e222d2ae28fd 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -21,10 +21,10 @@
#include <asm/user.h>
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
+/* PAUSE is a good thing to insert into busy-wait loops. */
+static __always_inline void native_pause(void)
{
- __asm__ __volatile__("rep;nop": : :"memory");
+ __asm__ __volatile__("pause": : :"memory");
}
static __always_inline void cpu_relax(void)
@@ -33,7 +33,7 @@ static __always_inline void cpu_relax(void)
time_travel_mode == TT_MODE_EXTERNAL)
time_travel_ndelay(1);
else
- rep_nop();
+ native_pause();
}
#define task_pt_regs(t) (&(t)->thread.regs)
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
index 37decaa74761..a21403df6663 100644
--- a/arch/x86/um/os-Linux/mcontext.c
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <sys/ucontext.h>
#define __FRAME_OFFSETS
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <sys/ucontext.h>
#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
#include <sysdep/ptrace.h>
#include <sysdep/mcontext.h>
#include <arch.h>
@@ -18,6 +21,10 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY2(UESP, ESP); /* sic */
COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#undef COPY2
+#undef COPY
+#undef COPY_SEG
+#undef COPY_SEG_CPL3
#else
#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
@@ -29,6 +36,8 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY2(EFLAGS, EFL);
COPY2(CS, CSGSFS);
regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
+#undef COPY2
+#undef COPY
#endif
}
@@ -42,3 +51,210 @@ void mc_set_rip(void *_mc, void *target)
mc->gregs[REG_RIP] = (unsigned long)target;
#endif
}
+
+/* Same thing, but the copy macros are turned around. */
+void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc, int single_stepping)
+{
+#ifdef __i386__
+#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X]
+#define COPY(X) mc->gregs[REG_##X] = regs->gp[X]
+#define COPY_SEG(X) mc->gregs[REG_##X] = regs->gp[X] & 0xffff;
+#define COPY_SEG_CPL3(X) mc->gregs[REG_##X] = (regs->gp[X] & 0xffff) | 3;
+ COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
+ COPY(EDI); COPY(ESI); COPY(EBP);
+ COPY2(UESP, ESP); /* sic */
+ COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
+ COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#else
+#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X/sizeof(unsigned long)]
+#define COPY(X) mc->gregs[REG_##X] = regs->gp[X/sizeof(unsigned long)]
+ COPY(R8); COPY(R9); COPY(R10); COPY(R11);
+ COPY(R12); COPY(R13); COPY(R14); COPY(R15);
+ COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
+ COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
+ COPY(RIP);
+ COPY2(EFLAGS, EFL);
+ mc->gregs[REG_CSGSFS] = mc->gregs[REG_CSGSFS] & 0xffffffffffffl;
+ mc->gregs[REG_CSGSFS] |= (regs->gp[SS / sizeof(unsigned long)] & 0xffff) << 48;
+#endif
+
+ if (single_stepping)
+ mc->gregs[REG_EFL] |= X86_EFLAGS_TF;
+ else
+ mc->gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+}
+
+#ifdef CONFIG_X86_32
+struct _xstate_64 {
+ struct _fpstate_64 fpstate;
+ struct _header xstate_hdr;
+ struct _ymmh_state ymmh;
+ /* New processor state extensions go here: */
+};
+
+/* Not quite the right structures as these contain more information */
+int um_i387_from_fxsr(struct _fpstate_32 *i387,
+ const struct _fpstate_64 *fxsave);
+int um_fxsr_from_i387(struct _fpstate_64 *fxsave,
+ const struct _fpstate_32 *from);
+#else
+#define _xstate_64 _xstate
+#endif
+
+static struct _fpstate *get_fpstate(struct stub_data *data,
+ mcontext_t *mcontext,
+ int *fp_size)
+{
+ struct _fpstate *res;
+
+ /* Assume floating point registers are on the same page */
+ res = (void *)(((unsigned long)mcontext->fpregs &
+ (UM_KERN_PAGE_SIZE - 1)) +
+ (unsigned long)&data->sigstack[0]);
+
+ if ((void *)res + sizeof(struct _fpstate) >
+ (void *)data->sigstack + sizeof(data->sigstack))
+ return NULL;
+
+ if (res->sw_reserved.magic1 != FP_XSTATE_MAGIC1) {
+ *fp_size = sizeof(struct _fpstate);
+ } else {
+ char *magic2_addr;
+
+ magic2_addr = (void *)res;
+ magic2_addr += res->sw_reserved.extended_size;
+ magic2_addr -= FP_XSTATE_MAGIC2_SIZE;
+
+ /* We still need to be within our stack */
+ if ((void *)magic2_addr >
+ (void *)data->sigstack + sizeof(data->sigstack))
+ return NULL;
+
+ /* If we do not read MAGIC2, then we did something wrong */
+ if (*(__u32 *)magic2_addr != FP_XSTATE_MAGIC2)
+ return NULL;
+
+ /* Remove MAGIC2 from the size, we do not save/restore it */
+ *fp_size = res->sw_reserved.extended_size -
+ FP_XSTATE_MAGIC2_SIZE;
+ }
+
+ return res;
+}
+
+int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ unsigned long *fp_size_out)
+{
+ mcontext_t *mcontext;
+ struct _fpstate *fpstate_stub;
+ struct _xstate_64 *xstate_stub;
+ int fp_size, xstate_size;
+
+ /* mctx_offset is verified by wait_stub_done_seccomp */
+ mcontext = (void *)&data->sigstack[data->mctx_offset];
+
+ get_regs_from_mc(regs, mcontext);
+
+ fpstate_stub = get_fpstate(data, mcontext, &fp_size);
+ if (!fpstate_stub)
+ return -EINVAL;
+
+#ifdef CONFIG_X86_32
+ xstate_stub = (void *)&fpstate_stub->_fxsr_env;
+ xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env);
+#else
+ xstate_stub = (void *)fpstate_stub;
+ xstate_size = fp_size;
+#endif
+
+ if (fp_size_out)
+ *fp_size_out = xstate_size;
+
+ if (xstate_size > host_fp_size)
+ return -ENOSPC;
+
+ memcpy(&regs->fp, xstate_stub, xstate_size);
+
+ /* We do not need to read the x86_64 FS_BASE/GS_BASE registers as
+ * we do not permit userspace to set them directly.
+ */
+
+#ifdef CONFIG_X86_32
+ /* Read the i387 legacy FP registers */
+ if (um_fxsr_from_i387((void *)&regs->fp, fpstate_stub))
+ return -EINVAL;
+#endif
+
+ return 0;
+}
+
+/* Copied because we cannot include regset.h here. */
+struct task_struct;
+struct user_regset;
+struct membuf {
+ void *p;
+ size_t left;
+};
+
+int fpregs_legacy_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to);
+
+int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ int single_stepping)
+{
+ mcontext_t *mcontext;
+ struct _fpstate *fpstate_stub;
+ struct _xstate_64 *xstate_stub;
+ int fp_size, xstate_size;
+
+ /* mctx_offset is verified by wait_stub_done_seccomp */
+ mcontext = (void *)&data->sigstack[data->mctx_offset];
+
+ if ((unsigned long)mcontext < (unsigned long)data->sigstack ||
+ (unsigned long)mcontext >
+ (unsigned long) data->sigstack +
+ sizeof(data->sigstack) - sizeof(*mcontext))
+ return -EINVAL;
+
+ get_mc_from_regs(regs, mcontext, single_stepping);
+
+ fpstate_stub = get_fpstate(data, mcontext, &fp_size);
+ if (!fpstate_stub)
+ return -EINVAL;
+
+#ifdef CONFIG_X86_32
+ xstate_stub = (void *)&fpstate_stub->_fxsr_env;
+ xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env);
+#else
+ xstate_stub = (void *)fpstate_stub;
+ xstate_size = fp_size;
+#endif
+
+ memcpy(xstate_stub, &regs->fp, xstate_size);
+
+#ifdef __i386__
+ /*
+ * On x86, the GDT entries are updated by arch_set_tls.
+ */
+
+ /* Store the i387 legacy FP registers which the host will use */
+ if (um_i387_from_fxsr(fpstate_stub, (void *)&regs->fp))
+ return -EINVAL;
+#else
+ /*
+ * On x86_64, we need to sync the FS_BASE/GS_BASE registers using the
+ * arch specific data.
+ */
+ if (data->arch_data.fs_base != regs->gp[FS_BASE / sizeof(unsigned long)]) {
+ data->arch_data.fs_base = regs->gp[FS_BASE / sizeof(unsigned long)];
+ data->arch_data.sync |= STUB_SYNC_FS_BASE;
+ }
+ if (data->arch_data.gs_base != regs->gp[GS_BASE / sizeof(unsigned long)]) {
+ data->arch_data.gs_base = regs->gp[GS_BASE / sizeof(unsigned long)];
+ data->arch_data.sync |= STUB_SYNC_GS_BASE;
+ }
+#endif
+
+ return 0;
+}
diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c
index 57c504fd5626..3275870330fe 100644
--- a/arch/x86/um/ptrace.c
+++ b/arch/x86/um/ptrace.c
@@ -25,7 +25,8 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
return tmp;
}
-static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
+static inline unsigned long
+twd_fxsr_to_i387(const struct user_fxsr_struct *fxsave)
{
struct _fpxreg *st = NULL;
unsigned long twd = (unsigned long) fxsave->twd;
@@ -69,12 +70,16 @@ static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
return ret;
}
-/* Get/set the old 32bit i387 registers (pre-FPX) */
-static int fpregs_legacy_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
+/*
+ * Get/set the old 32bit i387 registers (pre-FPX)
+ *
+ * We provide simple wrappers for mcontext.c, they are only defined locally
+ * because mcontext.c is userspace facing and needs to a different definition
+ * of the structures.
+ */
+static int _um_i387_from_fxsr(struct membuf to,
+ const struct user_fxsr_struct *fxsave)
{
- struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
int i;
membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul);
@@ -91,23 +96,36 @@ static int fpregs_legacy_get(struct task_struct *target,
return 0;
}
-static int fpregs_legacy_set(struct task_struct *target,
+int um_i387_from_fxsr(struct user_i387_struct *i387,
+ const struct user_fxsr_struct *fxsave);
+
+int um_i387_from_fxsr(struct user_i387_struct *i387,
+ const struct user_fxsr_struct *fxsave)
+{
+ struct membuf to = {
+ .p = i387,
+ .left = sizeof(*i387),
+ };
+
+ return _um_i387_from_fxsr(to, fxsave);
+}
+
+static int fpregs_legacy_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+ struct membuf to)
{
struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
- const struct user_i387_struct *from;
- struct user_i387_struct buf;
- int i;
- if (ubuf) {
- if (copy_from_user(&buf, ubuf, sizeof(buf)))
- return -EFAULT;
- from = &buf;
- } else {
- from = kbuf;
- }
+ return _um_i387_from_fxsr(to, fxsave);
+}
+
+int um_fxsr_from_i387(struct user_fxsr_struct *fxsave,
+ const struct user_i387_struct *from);
+
+int um_fxsr_from_i387(struct user_fxsr_struct *fxsave,
+ const struct user_i387_struct *from)
+{
+ int i;
fxsave->cwd = (unsigned short)(from->cwd & 0xffff);
fxsave->swd = (unsigned short)(from->swd & 0xffff);
@@ -125,6 +143,26 @@ static int fpregs_legacy_set(struct task_struct *target,
return 0;
}
+
+static int fpregs_legacy_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
+ const struct user_i387_struct *from;
+ struct user_i387_struct buf;
+
+ if (ubuf) {
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+ from = &buf;
+ } else {
+ from = kbuf;
+ }
+
+ return um_fxsr_from_i387(fxsave, &buf);
+}
#endif
static int genregs_get(struct task_struct *target,
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 48de3a71f845..6fd1ed400399 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -4,7 +4,9 @@
#include <linux/elf.h>
#include <linux/crypto.h>
#include <linux/kbuild.h>
+#include <linux/audit.h>
#include <asm/mman.h>
+#include <asm/seccomp.h>
/* workaround for a warning with -Wmissing-prototypes */
void foo(void);
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
index b724c54da316..6fe490cc5b98 100644
--- a/arch/x86/um/shared/sysdep/mcontext.h
+++ b/arch/x86/um/shared/sysdep/mcontext.h
@@ -6,7 +6,16 @@
#ifndef __SYS_SIGCONTEXT_X86_H
#define __SYS_SIGCONTEXT_X86_H
+#include <stub-data.h>
+
extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
+extern void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc,
+ int single_stepping);
+
+extern int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ unsigned long *fp_size_out);
+extern int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ int single_stepping);
#ifdef __i386__
diff --git a/arch/x86/um/shared/sysdep/stub-data.h b/arch/x86/um/shared/sysdep/stub-data.h
new file mode 100644
index 000000000000..82b1b7f8ac3d
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub-data.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_STUB_DATA_H
+#define __ARCH_STUB_DATA_H
+
+#ifdef __i386__
+#include <generated/asm-offsets.h>
+#include <asm/ldt.h>
+
+struct stub_data_arch {
+ int sync;
+ struct user_desc tls[UM_KERN_GDT_ENTRY_TLS_ENTRIES];
+};
+#else
+#define STUB_SYNC_FS_BASE (1 << 0)
+#define STUB_SYNC_GS_BASE (1 << 1)
+struct stub_data_arch {
+ int sync;
+ unsigned long fs_base;
+ unsigned long gs_base;
+};
+#endif
+
+#endif /* __ARCH_STUB_DATA_H */
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index dc89f4423454..4fa58f5b4fca 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -13,3 +13,5 @@
extern void stub_segv_handler(int, siginfo_t *, void *);
extern void stub_syscall_handler(void);
+extern void stub_signal_interrupt(int, siginfo_t *, void *);
+extern void stub_signal_restorer(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 390988132c0a..df568fc3ceb4 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -131,4 +131,17 @@ static __always_inline void *get_stub_data(void)
"call *%%eax ;" \
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
"i" (&fn))
+
+static __always_inline void
+stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+ for (int i = 0; i < sizeof(arch->tls) / sizeof(arch->tls[0]); i++) {
+ if (arch->sync & (1 << i))
+ stub_syscall1(__NR_set_thread_area,
+ (unsigned long) &arch->tls[i]);
+ }
+
+ arch->sync = 0;
+}
+
#endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 294affbec742..9cfd31afa769 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -10,6 +10,7 @@
#include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h>
#include <linux/stddef.h>
+#include <asm/prctl.h>
#define STUB_MMAP_NR __NR_mmap
#define MMAP_OFFSET(o) (o)
@@ -134,4 +135,20 @@ static __always_inline void *get_stub_data(void)
"call *%%rax ;" \
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
"i" (&fn))
+
+static __always_inline void
+stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+ /*
+ * We could use _writefsbase_u64/_writegsbase_u64 if the host reports
+ * support in the hwcaps (HWCAP2_FSGSBASE).
+ */
+ if (arch->sync & STUB_SYNC_FS_BASE)
+ stub_syscall2(__NR_arch_prctl, ARCH_SET_FS, arch->fs_base);
+ if (arch->sync & STUB_SYNC_GS_BASE)
+ stub_syscall2(__NR_arch_prctl, ARCH_SET_GS, arch->gs_base);
+
+ arch->sync = 0;
+}
+
#endif
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index fbb129023080..cb3f17627d16 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -12,6 +12,7 @@
#include <skas.h>
#include <sysdep/tls.h>
#include <asm/desc.h>
+#include <stub-data.h>
/*
* If needed we can detect when it's uninitialized.
@@ -21,14 +22,25 @@
static int host_supports_tls = -1;
int host_gdt_entry_tls_min;
-static int do_set_thread_area(struct user_desc *info)
+static int do_set_thread_area(struct task_struct* task, struct user_desc *info)
{
int ret;
- u32 cpu;
- cpu = get_cpu();
- ret = os_set_thread_area(info, userspace_pid[cpu]);
- put_cpu();
+ if (info->entry_number < host_gdt_entry_tls_min ||
+ info->entry_number >= host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES)
+ return -EINVAL;
+
+ if (using_seccomp) {
+ int idx = info->entry_number - host_gdt_entry_tls_min;
+ struct stub_data *data = (void *)task->mm->context.id.stack;
+
+ data->arch_data.tls[idx] = *info;
+ data->arch_data.sync |= BIT(idx);
+
+ return 0;
+ }
+
+ ret = os_set_thread_area(info, task->mm->context.id.pid);
if (ret)
printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
@@ -97,7 +109,7 @@ static int load_TLS(int flags, struct task_struct *to)
if (!(flags & O_FORCE) && curr->flushed)
continue;
- ret = do_set_thread_area(&curr->tls);
+ ret = do_set_thread_area(current, &curr->tls);
if (ret)
goto out;
@@ -275,7 +287,7 @@ SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, user_desc)
return -EFAULT;
}
- ret = do_set_thread_area(&info);
+ ret = do_set_thread_area(current, &info);
if (ret)
return ret;
return set_tls_entry(current, &info, idx, 1);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 19fd55b8ac77..77c7a99f0870 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -638,6 +638,13 @@ static int dpm_async_with_cleanup(struct device *dev, void *fn)
static void dpm_async_resume_children(struct device *dev, async_func_t func)
{
/*
+ * Prevent racing with dpm_clear_async_state() during initial list
+ * walks in dpm_noirq_resume_devices(), dpm_resume_early(), and
+ * dpm_resume().
+ */
+ guard(mutex)(&dpm_list_mtx);
+
+ /*
* Start processing "async" children of the device unless it's been
* started already for them.
*
@@ -985,6 +992,8 @@ static void device_resume(struct device *dev, pm_message_t state, bool async)
if (!dev->power.is_suspended)
goto Complete;
+ dev->power.is_suspended = false;
+
if (dev->power.direct_complete) {
/*
* Allow new children to be added under the device after this
@@ -1047,7 +1056,6 @@ static void device_resume(struct device *dev, pm_message_t state, bool async)
End:
error = dpm_run_callback(callback, dev, state, info);
- dev->power.is_suspended = false;
device_unlock(dev);
dpm_watchdog_clear(&wd);
@@ -1451,7 +1459,7 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
* Move all devices to the target list to resume them
* properly.
*/
- list_splice(&dpm_late_early_list, &dpm_noirq_list);
+ list_splice_init(&dpm_late_early_list, &dpm_noirq_list);
break;
}
}
@@ -1653,7 +1661,7 @@ int dpm_suspend_late(pm_message_t state)
* Move all devices to the target list to resume them
* properly.
*/
- list_splice(&dpm_suspended_list, &dpm_late_early_list);
+ list_splice_init(&dpm_suspended_list, &dpm_late_early_list);
break;
}
}
@@ -1946,7 +1954,7 @@ int dpm_suspend(pm_message_t state)
* Move all devices to the target list to resume them
* properly.
*/
- list_splice(&dpm_prepared_list, &dpm_suspended_list);
+ list_splice_init(&dpm_prepared_list, &dpm_suspended_list);
break;
}
}
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index b34623a69b8a..6b13feed06df 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -533,6 +533,8 @@ static int ps_setup(struct hci_dev *hdev)
ps_host_wakeup_irq_handler,
IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
dev_name(&serdev->dev), nxpdev);
+ if (ret)
+ bt_dev_info(hdev, "error setting wakeup IRQ handler, ignoring\n");
disable_irq(psdata->irq_handler);
device_init_wakeup(&serdev->dev, true);
}
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e00590ba24fd..a2dc39c005f4 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2415,14 +2415,14 @@ static int qca_serdev_probe(struct serdev_device *serdev)
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
- if (IS_ERR(qcadev->bt_en) &&
- (data->soc_type == QCA_WCN6750 ||
- data->soc_type == QCA_WCN6855)) {
- dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
- return PTR_ERR(qcadev->bt_en);
- }
+ if (IS_ERR(qcadev->bt_en))
+ return dev_err_probe(&serdev->dev,
+ PTR_ERR(qcadev->bt_en),
+ "failed to acquire BT_EN gpio\n");
- if (!qcadev->bt_en)
+ if (!qcadev->bt_en &&
+ (data->soc_type == QCA_WCN6750 ||
+ data->soc_type == QCA_WCN6855))
power_ctrl_enabled = false;
qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index df2d2dc00a05..db87dd2a07f7 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -93,6 +93,14 @@ config APPLE_ADMAC
help
Enable support for Audio DMA Controller found on Apple Silicon SoCs.
+config ARM_DMA350
+ tristate "Arm DMA-350 support"
+ depends on ARM || ARM64 || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the Arm DMA-350 controller.
+
config AT_HDMAC
tristate "Atmel AHB DMA support"
depends on ARCH_AT91
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 19ba465011a6..ba9732644752 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o
obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
obj-$(CONFIG_APPLE_ADMAC) += apple-admac.o
+obj-$(CONFIG_ARM_DMA350) += arm-dma350.o
obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
index 81339664036f..628c49ce5de9 100644
--- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
+++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
@@ -566,7 +566,6 @@ int pt_dmaengine_register(struct pt_device *pt)
struct ae4_device *ae4 = NULL;
struct pt_dma_chan *chan;
char *desc_cache_name;
- char *cmd_cache_name;
int ret, i;
if (pt->ver == AE4_DMA_VERSION)
@@ -582,27 +581,17 @@ int pt_dmaengine_register(struct pt_device *pt)
if (!pt->pt_dma_chan)
return -ENOMEM;
- cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
- "%s-dmaengine-cmd-cache",
- dev_name(pt->dev));
- if (!cmd_cache_name)
- return -ENOMEM;
-
desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
"%s-dmaengine-desc-cache",
dev_name(pt->dev));
- if (!desc_cache_name) {
- ret = -ENOMEM;
- goto err_cache;
- }
+ if (!desc_cache_name)
+ return -ENOMEM;
pt->dma_desc_cache = kmem_cache_create(desc_cache_name,
sizeof(struct pt_dma_desc), 0,
SLAB_HWCACHE_ALIGN, NULL);
- if (!pt->dma_desc_cache) {
- ret = -ENOMEM;
- goto err_cache;
- }
+ if (!pt->dma_desc_cache)
+ return -ENOMEM;
dma_dev->dev = pt->dev;
dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
@@ -656,9 +645,6 @@ int pt_dmaengine_register(struct pt_device *pt)
err_reg:
kmem_cache_destroy(pt->dma_desc_cache);
-err_cache:
- kmem_cache_destroy(pt->dma_cmd_cache);
-
return ret;
}
EXPORT_SYMBOL_GPL(pt_dmaengine_register);
@@ -670,5 +656,4 @@ void pt_dmaengine_unregister(struct pt_device *pt)
dma_async_device_unregister(dma_dev);
kmem_cache_destroy(pt->dma_desc_cache);
- kmem_cache_destroy(pt->dma_cmd_cache);
}
diff --git a/drivers/dma/amd/ptdma/ptdma.h b/drivers/dma/amd/ptdma/ptdma.h
index 0a7939105e51..ef3f55632107 100644
--- a/drivers/dma/amd/ptdma/ptdma.h
+++ b/drivers/dma/amd/ptdma/ptdma.h
@@ -254,7 +254,6 @@ struct pt_device {
/* Support for the DMA Engine capabilities */
struct dma_device dma_dev;
struct pt_dma_chan *pt_dma_chan;
- struct kmem_cache *dma_cmd_cache;
struct kmem_cache *dma_desc_cache;
wait_queue_head_t lsb_queue;
diff --git a/drivers/dma/arm-dma350.c b/drivers/dma/arm-dma350.c
new file mode 100644
index 000000000000..9efe2ca7d5ec
--- /dev/null
+++ b/drivers/dma/arm-dma350.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2024-2025 Arm Limited
+// Arm DMA-350 driver
+
+#include <linux/bitfield.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DMAINFO 0x0f00
+
+#define DMA_BUILDCFG0 0xb0
+#define DMA_CFG_DATA_WIDTH GENMASK(18, 16)
+#define DMA_CFG_ADDR_WIDTH GENMASK(15, 10)
+#define DMA_CFG_NUM_CHANNELS GENMASK(9, 4)
+
+#define DMA_BUILDCFG1 0xb4
+#define DMA_CFG_NUM_TRIGGER_IN GENMASK(8, 0)
+
+#define IIDR 0xc8
+#define IIDR_PRODUCTID GENMASK(31, 20)
+#define IIDR_VARIANT GENMASK(19, 16)
+#define IIDR_REVISION GENMASK(15, 12)
+#define IIDR_IMPLEMENTER GENMASK(11, 0)
+
+#define PRODUCTID_DMA350 0x3a0
+#define IMPLEMENTER_ARM 0x43b
+
+#define DMACH(n) (0x1000 + 0x0100 * (n))
+
+#define CH_CMD 0x00
+#define CH_CMD_RESUME BIT(5)
+#define CH_CMD_PAUSE BIT(4)
+#define CH_CMD_STOP BIT(3)
+#define CH_CMD_DISABLE BIT(2)
+#define CH_CMD_CLEAR BIT(1)
+#define CH_CMD_ENABLE BIT(0)
+
+#define CH_STATUS 0x04
+#define CH_STAT_RESUMEWAIT BIT(21)
+#define CH_STAT_PAUSED BIT(20)
+#define CH_STAT_STOPPED BIT(19)
+#define CH_STAT_DISABLED BIT(18)
+#define CH_STAT_ERR BIT(17)
+#define CH_STAT_DONE BIT(16)
+#define CH_STAT_INTR_ERR BIT(1)
+#define CH_STAT_INTR_DONE BIT(0)
+
+#define CH_INTREN 0x08
+#define CH_INTREN_ERR BIT(1)
+#define CH_INTREN_DONE BIT(0)
+
+#define CH_CTRL 0x0c
+#define CH_CTRL_USEDESTRIGIN BIT(26)
+#define CH_CTRL_USESRCTRIGIN BIT(26)
+#define CH_CTRL_DONETYPE GENMASK(23, 21)
+#define CH_CTRL_REGRELOADTYPE GENMASK(20, 18)
+#define CH_CTRL_XTYPE GENMASK(11, 9)
+#define CH_CTRL_TRANSIZE GENMASK(2, 0)
+
+#define CH_SRCADDR 0x10
+#define CH_SRCADDRHI 0x14
+#define CH_DESADDR 0x18
+#define CH_DESADDRHI 0x1c
+#define CH_XSIZE 0x20
+#define CH_XSIZEHI 0x24
+#define CH_SRCTRANSCFG 0x28
+#define CH_DESTRANSCFG 0x2c
+#define CH_CFG_MAXBURSTLEN GENMASK(19, 16)
+#define CH_CFG_PRIVATTR BIT(11)
+#define CH_CFG_SHAREATTR GENMASK(9, 8)
+#define CH_CFG_MEMATTR GENMASK(7, 0)
+
+#define TRANSCFG_DEVICE \
+ FIELD_PREP(CH_CFG_MAXBURSTLEN, 0xf) | \
+ FIELD_PREP(CH_CFG_SHAREATTR, SHAREATTR_OSH) | \
+ FIELD_PREP(CH_CFG_MEMATTR, MEMATTR_DEVICE)
+#define TRANSCFG_NC \
+ FIELD_PREP(CH_CFG_MAXBURSTLEN, 0xf) | \
+ FIELD_PREP(CH_CFG_SHAREATTR, SHAREATTR_OSH) | \
+ FIELD_PREP(CH_CFG_MEMATTR, MEMATTR_NC)
+#define TRANSCFG_WB \
+ FIELD_PREP(CH_CFG_MAXBURSTLEN, 0xf) | \
+ FIELD_PREP(CH_CFG_SHAREATTR, SHAREATTR_ISH) | \
+ FIELD_PREP(CH_CFG_MEMATTR, MEMATTR_WB)
+
+#define CH_XADDRINC 0x30
+#define CH_XY_DES GENMASK(31, 16)
+#define CH_XY_SRC GENMASK(15, 0)
+
+#define CH_FILLVAL 0x38
+#define CH_SRCTRIGINCFG 0x4c
+#define CH_DESTRIGINCFG 0x50
+#define CH_LINKATTR 0x70
+#define CH_LINK_SHAREATTR GENMASK(9, 8)
+#define CH_LINK_MEMATTR GENMASK(7, 0)
+
+#define CH_AUTOCFG 0x74
+#define CH_LINKADDR 0x78
+#define CH_LINKADDR_EN BIT(0)
+
+#define CH_LINKADDRHI 0x7c
+#define CH_ERRINFO 0x90
+#define CH_ERRINFO_AXIRDPOISERR BIT(18)
+#define CH_ERRINFO_AXIWRRESPERR BIT(17)
+#define CH_ERRINFO_AXIRDRESPERR BIT(16)
+
+#define CH_BUILDCFG0 0xf8
+#define CH_CFG_INC_WIDTH GENMASK(29, 26)
+#define CH_CFG_DATA_WIDTH GENMASK(24, 22)
+#define CH_CFG_DATA_BUF_SIZE GENMASK(7, 0)
+
+#define CH_BUILDCFG1 0xfc
+#define CH_CFG_HAS_CMDLINK BIT(8)
+#define CH_CFG_HAS_TRIGSEL BIT(7)
+#define CH_CFG_HAS_TRIGIN BIT(5)
+#define CH_CFG_HAS_WRAP BIT(1)
+
+
+#define LINK_REGCLEAR BIT(0)
+#define LINK_INTREN BIT(2)
+#define LINK_CTRL BIT(3)
+#define LINK_SRCADDR BIT(4)
+#define LINK_SRCADDRHI BIT(5)
+#define LINK_DESADDR BIT(6)
+#define LINK_DESADDRHI BIT(7)
+#define LINK_XSIZE BIT(8)
+#define LINK_XSIZEHI BIT(9)
+#define LINK_SRCTRANSCFG BIT(10)
+#define LINK_DESTRANSCFG BIT(11)
+#define LINK_XADDRINC BIT(12)
+#define LINK_FILLVAL BIT(14)
+#define LINK_SRCTRIGINCFG BIT(19)
+#define LINK_DESTRIGINCFG BIT(20)
+#define LINK_AUTOCFG BIT(29)
+#define LINK_LINKADDR BIT(30)
+#define LINK_LINKADDRHI BIT(31)
+
+
+enum ch_ctrl_donetype {
+ CH_CTRL_DONETYPE_NONE = 0,
+ CH_CTRL_DONETYPE_CMD = 1,
+ CH_CTRL_DONETYPE_CYCLE = 3
+};
+
+enum ch_ctrl_xtype {
+ CH_CTRL_XTYPE_DISABLE = 0,
+ CH_CTRL_XTYPE_CONTINUE = 1,
+ CH_CTRL_XTYPE_WRAP = 2,
+ CH_CTRL_XTYPE_FILL = 3
+};
+
+enum ch_cfg_shareattr {
+ SHAREATTR_NSH = 0,
+ SHAREATTR_OSH = 2,
+ SHAREATTR_ISH = 3
+};
+
+enum ch_cfg_memattr {
+ MEMATTR_DEVICE = 0x00,
+ MEMATTR_NC = 0x44,
+ MEMATTR_WB = 0xff
+};
+
+struct d350_desc {
+ struct virt_dma_desc vd;
+ u32 command[16];
+ u16 xsize;
+ u16 xsizehi;
+ u8 tsz;
+};
+
+struct d350_chan {
+ struct virt_dma_chan vc;
+ struct d350_desc *desc;
+ void __iomem *base;
+ int irq;
+ enum dma_status status;
+ dma_cookie_t cookie;
+ u32 residue;
+ u8 tsz;
+ bool has_trig;
+ bool has_wrap;
+ bool coherent;
+};
+
+struct d350 {
+ struct dma_device dma;
+ int nchan;
+ int nreq;
+ struct d350_chan channels[] __counted_by(nchan);
+};
+
+static inline struct d350_chan *to_d350_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct d350_chan, vc.chan);
+}
+
+static inline struct d350_desc *to_d350_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct d350_desc, vd);
+}
+
+static void d350_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(to_d350_desc(vd));
+}
+
+static struct dma_async_tx_descriptor *d350_prep_memcpy(struct dma_chan *chan,
+ dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ struct d350_desc *desc;
+ u32 *cmd;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->tsz = __ffs(len | dest | src | (1 << dch->tsz));
+ desc->xsize = lower_16_bits(len >> desc->tsz);
+ desc->xsizehi = upper_16_bits(len >> desc->tsz);
+
+ cmd = desc->command;
+ cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_SRCADDRHI | LINK_DESADDR |
+ LINK_DESADDRHI | LINK_XSIZE | LINK_XSIZEHI | LINK_SRCTRANSCFG |
+ LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, desc->tsz) |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE) |
+ FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
+
+ cmd[2] = lower_32_bits(src);
+ cmd[3] = upper_32_bits(src);
+ cmd[4] = lower_32_bits(dest);
+ cmd[5] = upper_32_bits(dest);
+ cmd[6] = FIELD_PREP(CH_XY_SRC, desc->xsize) | FIELD_PREP(CH_XY_DES, desc->xsize);
+ cmd[7] = FIELD_PREP(CH_XY_SRC, desc->xsizehi) | FIELD_PREP(CH_XY_DES, desc->xsizehi);
+ cmd[8] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[9] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[10] = FIELD_PREP(CH_XY_SRC, 1) | FIELD_PREP(CH_XY_DES, 1);
+ cmd[11] = 0;
+
+ return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *d350_prep_memset(struct dma_chan *chan,
+ dma_addr_t dest, int value, size_t len, unsigned long flags)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ struct d350_desc *desc;
+ u32 *cmd;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->tsz = __ffs(len | dest | (1 << dch->tsz));
+ desc->xsize = lower_16_bits(len >> desc->tsz);
+ desc->xsizehi = upper_16_bits(len >> desc->tsz);
+
+ cmd = desc->command;
+ cmd[0] = LINK_CTRL | LINK_DESADDR | LINK_DESADDRHI |
+ LINK_XSIZE | LINK_XSIZEHI | LINK_DESTRANSCFG |
+ LINK_XADDRINC | LINK_FILLVAL | LINK_LINKADDR;
+
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, desc->tsz) |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_FILL) |
+ FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
+
+ cmd[2] = lower_32_bits(dest);
+ cmd[3] = upper_32_bits(dest);
+ cmd[4] = FIELD_PREP(CH_XY_DES, desc->xsize);
+ cmd[5] = FIELD_PREP(CH_XY_DES, desc->xsizehi);
+ cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[7] = FIELD_PREP(CH_XY_DES, 1);
+ cmd[8] = (u8)value * 0x01010101;
+ cmd[9] = 0;
+
+ return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+}
+
+static int d350_pause(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dch->vc.lock, flags);
+ if (dch->status == DMA_IN_PROGRESS) {
+ writel_relaxed(CH_CMD_PAUSE, dch->base + CH_CMD);
+ dch->status = DMA_PAUSED;
+ }
+ spin_unlock_irqrestore(&dch->vc.lock, flags);
+
+ return 0;
+}
+
+static int d350_resume(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dch->vc.lock, flags);
+ if (dch->status == DMA_PAUSED) {
+ writel_relaxed(CH_CMD_RESUME, dch->base + CH_CMD);
+ dch->status = DMA_IN_PROGRESS;
+ }
+ spin_unlock_irqrestore(&dch->vc.lock, flags);
+
+ return 0;
+}
+
+static u32 d350_get_residue(struct d350_chan *dch)
+{
+ u32 res, xsize, xsizehi, hi_new;
+ int retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */
+
+ hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
+ do {
+ xsizehi = hi_new;
+ xsize = readl_relaxed(dch->base + CH_XSIZE);
+ hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
+ } while (xsizehi != hi_new && --retries);
+
+ res = FIELD_GET(CH_XY_DES, xsize);
+ res |= FIELD_GET(CH_XY_DES, xsizehi) << 16;
+
+ return res << dch->desc->tsz;
+}
+
+static int d350_terminate_all(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ spin_lock_irqsave(&dch->vc.lock, flags);
+ writel_relaxed(CH_CMD_STOP, dch->base + CH_CMD);
+ if (dch->desc) {
+ if (dch->status != DMA_ERROR)
+ vchan_terminate_vdesc(&dch->desc->vd);
+ dch->desc = NULL;
+ dch->status = DMA_COMPLETE;
+ }
+ vchan_get_all_descriptors(&dch->vc, &list);
+ list_splice_tail(&list, &dch->vc.desc_terminated);
+ spin_unlock_irqrestore(&dch->vc.lock, flags);
+
+ return 0;
+}
+
+static void d350_synchronize(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+
+ vchan_synchronize(&dch->vc);
+}
+
+static u32 d350_desc_bytes(struct d350_desc *desc)
+{
+ return ((u32)desc->xsizehi << 16 | desc->xsize) << desc->tsz;
+}
+
+static enum dma_status d350_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ struct virt_dma_desc *vd;
+ enum dma_status status;
+ unsigned long flags;
+ u32 residue = 0;
+
+ status = dma_cookie_status(chan, cookie, state);
+
+ spin_lock_irqsave(&dch->vc.lock, flags);
+ if (cookie == dch->cookie) {
+ status = dch->status;
+ if (status == DMA_IN_PROGRESS || status == DMA_PAUSED)
+ dch->residue = d350_get_residue(dch);
+ residue = dch->residue;
+ } else if ((vd = vchan_find_desc(&dch->vc, cookie))) {
+ residue = d350_desc_bytes(to_d350_desc(vd));
+ } else if (status == DMA_IN_PROGRESS) {
+ /* Somebody else terminated it? */
+ status = DMA_ERROR;
+ }
+ spin_unlock_irqrestore(&dch->vc.lock, flags);
+
+ dma_set_residue(state, residue);
+ return status;
+}
+
+static void d350_start_next(struct d350_chan *dch)
+{
+ u32 hdr, *reg;
+
+ dch->desc = to_d350_desc(vchan_next_desc(&dch->vc));
+ if (!dch->desc)
+ return;
+
+ list_del(&dch->desc->vd.node);
+ dch->status = DMA_IN_PROGRESS;
+ dch->cookie = dch->desc->vd.tx.cookie;
+ dch->residue = d350_desc_bytes(dch->desc);
+
+ hdr = dch->desc->command[0];
+ reg = &dch->desc->command[1];
+
+ if (hdr & LINK_INTREN)
+ writel_relaxed(*reg++, dch->base + CH_INTREN);
+ if (hdr & LINK_CTRL)
+ writel_relaxed(*reg++, dch->base + CH_CTRL);
+ if (hdr & LINK_SRCADDR)
+ writel_relaxed(*reg++, dch->base + CH_SRCADDR);
+ if (hdr & LINK_SRCADDRHI)
+ writel_relaxed(*reg++, dch->base + CH_SRCADDRHI);
+ if (hdr & LINK_DESADDR)
+ writel_relaxed(*reg++, dch->base + CH_DESADDR);
+ if (hdr & LINK_DESADDRHI)
+ writel_relaxed(*reg++, dch->base + CH_DESADDRHI);
+ if (hdr & LINK_XSIZE)
+ writel_relaxed(*reg++, dch->base + CH_XSIZE);
+ if (hdr & LINK_XSIZEHI)
+ writel_relaxed(*reg++, dch->base + CH_XSIZEHI);
+ if (hdr & LINK_SRCTRANSCFG)
+ writel_relaxed(*reg++, dch->base + CH_SRCTRANSCFG);
+ if (hdr & LINK_DESTRANSCFG)
+ writel_relaxed(*reg++, dch->base + CH_DESTRANSCFG);
+ if (hdr & LINK_XADDRINC)
+ writel_relaxed(*reg++, dch->base + CH_XADDRINC);
+ if (hdr & LINK_FILLVAL)
+ writel_relaxed(*reg++, dch->base + CH_FILLVAL);
+ if (hdr & LINK_SRCTRIGINCFG)
+ writel_relaxed(*reg++, dch->base + CH_SRCTRIGINCFG);
+ if (hdr & LINK_DESTRIGINCFG)
+ writel_relaxed(*reg++, dch->base + CH_DESTRIGINCFG);
+ if (hdr & LINK_AUTOCFG)
+ writel_relaxed(*reg++, dch->base + CH_AUTOCFG);
+ if (hdr & LINK_LINKADDR)
+ writel_relaxed(*reg++, dch->base + CH_LINKADDR);
+ if (hdr & LINK_LINKADDRHI)
+ writel_relaxed(*reg++, dch->base + CH_LINKADDRHI);
+
+ writel(CH_CMD_ENABLE, dch->base + CH_CMD);
+}
+
+static void d350_issue_pending(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dch->vc.lock, flags);
+ if (vchan_issue_pending(&dch->vc) && !dch->desc)
+ d350_start_next(dch);
+ spin_unlock_irqrestore(&dch->vc.lock, flags);
+}
+
+static irqreturn_t d350_irq(int irq, void *data)
+{
+ struct d350_chan *dch = data;
+ struct device *dev = dch->vc.chan.device->dev;
+ struct virt_dma_desc *vd = &dch->desc->vd;
+ u32 ch_status;
+
+ ch_status = readl(dch->base + CH_STATUS);
+ if (!ch_status)
+ return IRQ_NONE;
+
+ if (ch_status & CH_STAT_INTR_ERR) {
+ u32 errinfo = readl_relaxed(dch->base + CH_ERRINFO);
+
+ if (errinfo & (CH_ERRINFO_AXIRDPOISERR | CH_ERRINFO_AXIRDRESPERR))
+ vd->tx_result.result = DMA_TRANS_READ_FAILED;
+ else if (errinfo & CH_ERRINFO_AXIWRRESPERR)
+ vd->tx_result.result = DMA_TRANS_WRITE_FAILED;
+ else
+ vd->tx_result.result = DMA_TRANS_ABORTED;
+
+ vd->tx_result.residue = d350_get_residue(dch);
+ } else if (!(ch_status & CH_STAT_INTR_DONE)) {
+ dev_warn(dev, "Unexpected IRQ source? 0x%08x\n", ch_status);
+ }
+ writel_relaxed(ch_status, dch->base + CH_STATUS);
+
+ spin_lock(&dch->vc.lock);
+ vchan_cookie_complete(vd);
+ if (ch_status & CH_STAT_INTR_DONE) {
+ dch->status = DMA_COMPLETE;
+ dch->residue = 0;
+ d350_start_next(dch);
+ } else {
+ dch->status = DMA_ERROR;
+ dch->residue = vd->tx_result.residue;
+ }
+ spin_unlock(&dch->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+static int d350_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ int ret = request_irq(dch->irq, d350_irq, IRQF_SHARED,
+ dev_name(&dch->vc.chan.dev->device), dch);
+ if (!ret)
+ writel_relaxed(CH_INTREN_DONE | CH_INTREN_ERR, dch->base + CH_INTREN);
+
+ return ret;
+}
+
+static void d350_free_chan_resources(struct dma_chan *chan)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+
+ writel_relaxed(0, dch->base + CH_INTREN);
+ free_irq(dch->irq, dch);
+ vchan_free_chan_resources(&dch->vc);
+}
+
+static int d350_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct d350 *dmac;
+ void __iomem *base;
+ u32 reg;
+ int ret, nchan, dw, aw, r, p;
+ bool coherent, memset;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ reg = readl_relaxed(base + DMAINFO + IIDR);
+ r = FIELD_GET(IIDR_VARIANT, reg);
+ p = FIELD_GET(IIDR_REVISION, reg);
+ if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM ||
+ FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350)
+ return dev_err_probe(dev, -ENODEV, "Not a DMA-350!");
+
+ reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0);
+ nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1;
+ dw = 1 << FIELD_GET(DMA_CFG_DATA_WIDTH, reg);
+ aw = FIELD_GET(DMA_CFG_ADDR_WIDTH, reg) + 1;
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
+ coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
+
+ dmac = devm_kzalloc(dev, struct_size(dmac, channels, nchan), GFP_KERNEL);
+ if (!dmac)
+ return -ENOMEM;
+
+ dmac->nchan = nchan;
+
+ reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1);
+ dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg);
+
+ dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq);
+
+ dmac->dma.dev = dev;
+ for (int i = min(dw, 16); i > 0; i /= 2) {
+ dmac->dma.src_addr_widths |= BIT(i);
+ dmac->dma.dst_addr_widths |= BIT(i);
+ }
+ dmac->dma.directions = BIT(DMA_MEM_TO_MEM);
+ dmac->dma.descriptor_reuse = true;
+ dmac->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources;
+ dmac->dma.device_free_chan_resources = d350_free_chan_resources;
+ dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask);
+ dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
+ dmac->dma.device_pause = d350_pause;
+ dmac->dma.device_resume = d350_resume;
+ dmac->dma.device_terminate_all = d350_terminate_all;
+ dmac->dma.device_synchronize = d350_synchronize;
+ dmac->dma.device_tx_status = d350_tx_status;
+ dmac->dma.device_issue_pending = d350_issue_pending;
+ INIT_LIST_HEAD(&dmac->dma.channels);
+
+ /* Would be nice to have per-channel caps for this... */
+ memset = true;
+ for (int i = 0; i < nchan; i++) {
+ struct d350_chan *dch = &dmac->channels[i];
+
+ dch->base = base + DMACH(i);
+ writel_relaxed(CH_CMD_CLEAR, dch->base + CH_CMD);
+
+ reg = readl_relaxed(dch->base + CH_BUILDCFG1);
+ if (!(FIELD_GET(CH_CFG_HAS_CMDLINK, reg))) {
+ dev_warn(dev, "No command link support on channel %d\n", i);
+ continue;
+ }
+ dch->irq = platform_get_irq(pdev, i);
+ if (dch->irq < 0)
+ return dev_err_probe(dev, dch->irq,
+ "Failed to get IRQ for channel %d\n", i);
+
+ dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg);
+ dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) &
+ FIELD_GET(CH_CFG_HAS_TRIGSEL, reg);
+
+ /* Fill is a special case of Wrap */
+ memset &= dch->has_wrap;
+
+ reg = readl_relaxed(dch->base + CH_BUILDCFG0);
+ dch->tsz = FIELD_GET(CH_CFG_DATA_WIDTH, reg);
+
+ reg = FIELD_PREP(CH_LINK_SHAREATTR, coherent ? SHAREATTR_ISH : SHAREATTR_OSH);
+ reg |= FIELD_PREP(CH_LINK_MEMATTR, coherent ? MEMATTR_WB : MEMATTR_NC);
+ writel_relaxed(reg, dch->base + CH_LINKATTR);
+
+ dch->vc.desc_free = d350_desc_free;
+ vchan_init(&dch->vc, &dmac->dma);
+ }
+
+ if (memset) {
+ dma_cap_set(DMA_MEMSET, dmac->dma.cap_mask);
+ dmac->dma.device_prep_dma_memset = d350_prep_memset;
+ }
+
+ platform_set_drvdata(pdev, dmac);
+
+ ret = dma_async_device_register(&dmac->dma);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to register DMA device\n");
+
+ return 0;
+}
+
+static void d350_remove(struct platform_device *pdev)
+{
+ struct d350 *dmac = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&dmac->dma);
+}
+
+static const struct of_device_id d350_of_match[] __maybe_unused = {
+ { .compatible = "arm,dma-350" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, d350_of_match);
+
+static struct platform_driver d350_driver = {
+ .driver = {
+ .name = "arm-dma350",
+ .of_match_table = of_match_ptr(d350_of_match),
+ },
+ .probe = d350_probe,
+ .remove = d350_remove,
+};
+module_platform_driver(d350_driver);
+
+MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
+MODULE_DESCRIPTION("Arm DMA-350 driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index ba25c23164e7..3fbc74710a13 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -2033,10 +2033,8 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
* at_xdmac_start_xfer() for this descriptor. Now it's time
* to release it.
*/
- if (desc->active_xfer) {
- pm_runtime_put_autosuspend(atxdmac->dev);
- pm_runtime_mark_last_busy(atxdmac->dev);
- }
+ if (desc->active_xfer)
+ pm_runtime_put_noidle(atxdmac->dev);
}
clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 1c6043751dc9..49f09998e5c0 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -136,7 +136,8 @@ static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
map = FIELD_GET(DW_PCIE_VSEC_DMA_MAP, val);
if (map != EDMA_MF_EDMA_LEGACY &&
map != EDMA_MF_EDMA_UNROLL &&
- map != EDMA_MF_HDMA_COMPAT)
+ map != EDMA_MF_HDMA_COMPAT &&
+ map != EDMA_MF_HDMA_NATIVE)
return;
pdata->mf = map;
@@ -291,6 +292,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
pci_dbg(pdev, "Version:\teDMA Unroll (0x%x)\n", chip->mf);
else if (chip->mf == EDMA_MF_HDMA_COMPAT)
pci_dbg(pdev, "Version:\tHDMA Compatible (0x%x)\n", chip->mf);
+ else if (chip->mf == EDMA_MF_HDMA_NATIVE)
+ pci_dbg(pdev, "Version:\tHDMA Native (0x%x)\n", chip->mf);
else
pci_dbg(pdev, "Version:\tUnknown (0x%x)\n", chip->mf);
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index 443b2430466c..4976d7dde080 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -95,7 +95,7 @@ static void fsl_edma3_enable_request(struct fsl_edma_chan *fsl_chan)
}
val = edma_readl_chreg(fsl_chan, ch_csr);
- val |= EDMA_V3_CH_CSR_ERQ;
+ val |= EDMA_V3_CH_CSR_ERQ | EDMA_V3_CH_CSR_EEI;
edma_writel_chreg(fsl_chan, val, ch_csr);
}
@@ -821,7 +821,7 @@ void fsl_edma_issue_pending(struct dma_chan *chan)
int fsl_edma_alloc_chan_resources(struct dma_chan *chan)
{
struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
- int ret;
+ int ret = 0;
if (fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_HAS_CHCLK)
clk_prepare_enable(fsl_chan->clk);
@@ -831,17 +831,29 @@ int fsl_edma_alloc_chan_resources(struct dma_chan *chan)
sizeof(struct fsl_edma_hw_tcd64) : sizeof(struct fsl_edma_hw_tcd),
32, 0);
- if (fsl_chan->txirq) {
+ if (fsl_chan->txirq)
ret = request_irq(fsl_chan->txirq, fsl_chan->irq_handler, IRQF_SHARED,
fsl_chan->chan_name, fsl_chan);
- if (ret) {
- dma_pool_destroy(fsl_chan->tcd_pool);
- return ret;
- }
- }
+ if (ret)
+ goto err_txirq;
+
+ if (fsl_chan->errirq > 0)
+ ret = request_irq(fsl_chan->errirq, fsl_chan->errirq_handler, IRQF_SHARED,
+ fsl_chan->errirq_name, fsl_chan);
+
+ if (ret)
+ goto err_errirq;
return 0;
+
+err_errirq:
+ if (fsl_chan->txirq)
+ free_irq(fsl_chan->txirq, fsl_chan);
+err_txirq:
+ dma_pool_destroy(fsl_chan->tcd_pool);
+
+ return ret;
}
void fsl_edma_free_chan_resources(struct dma_chan *chan)
@@ -862,6 +874,8 @@ void fsl_edma_free_chan_resources(struct dma_chan *chan)
if (fsl_chan->txirq)
free_irq(fsl_chan->txirq, fsl_chan);
+ if (fsl_chan->errirq)
+ free_irq(fsl_chan->errirq, fsl_chan);
vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
dma_pool_destroy(fsl_chan->tcd_pool);
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index 10a5565ddfd7..205a96489094 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -71,6 +71,18 @@
#define EDMA_V3_CH_ES_ERR BIT(31)
#define EDMA_V3_MP_ES_VLD BIT(31)
+#define EDMA_V3_CH_ERR_DBE BIT(0)
+#define EDMA_V3_CH_ERR_SBE BIT(1)
+#define EDMA_V3_CH_ERR_SGE BIT(2)
+#define EDMA_V3_CH_ERR_NCE BIT(3)
+#define EDMA_V3_CH_ERR_DOE BIT(4)
+#define EDMA_V3_CH_ERR_DAE BIT(5)
+#define EDMA_V3_CH_ERR_SOE BIT(6)
+#define EDMA_V3_CH_ERR_SAE BIT(7)
+#define EDMA_V3_CH_ERR_ECX BIT(8)
+#define EDMA_V3_CH_ERR_UCE BIT(9)
+#define EDMA_V3_CH_ERR BIT(31)
+
enum fsl_edma_pm_state {
RUNNING = 0,
SUSPENDED,
@@ -162,6 +174,7 @@ struct fsl_edma_chan {
u32 dma_dev_size;
enum dma_data_direction dma_dir;
char chan_name[32];
+ char errirq_name[36];
void __iomem *tcd;
void __iomem *mux_addr;
u32 real_count;
@@ -174,7 +187,9 @@ struct fsl_edma_chan {
int priority;
int hw_chanid;
int txirq;
+ int errirq;
irqreturn_t (*irq_handler)(int irq, void *dev_id);
+ irqreturn_t (*errirq_handler)(int irq, void *dev_id);
bool is_rxchan;
bool is_remote;
bool is_multi_fifo;
@@ -208,6 +223,9 @@ struct fsl_edma_desc {
/* Need clean CHn_CSR DONE before enable TCD's MAJORELINK */
#define FSL_EDMA_DRV_CLEAR_DONE_E_LINK BIT(14)
#define FSL_EDMA_DRV_TCD64 BIT(15)
+/* All channel ERR IRQ share one IRQ line */
+#define FSL_EDMA_DRV_ERRIRQ_SHARE BIT(16)
+
#define FSL_EDMA_DRV_EDMA3 (FSL_EDMA_DRV_SPLIT_REG | \
FSL_EDMA_DRV_BUS_8BYTE | \
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 66bfa28d984e..97583c7d51a2 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -50,6 +50,83 @@ static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static void fsl_edma3_err_check(struct fsl_edma_chan *fsl_chan)
+{
+ unsigned int ch_err;
+ u32 val;
+
+ scoped_guard(spinlock, &fsl_chan->vchan.lock) {
+ ch_err = edma_readl_chreg(fsl_chan, ch_es);
+ if (!(ch_err & EDMA_V3_CH_ERR))
+ return;
+
+ edma_writel_chreg(fsl_chan, EDMA_V3_CH_ERR, ch_es);
+ val = edma_readl_chreg(fsl_chan, ch_csr);
+ val &= ~EDMA_V3_CH_CSR_ERQ;
+ edma_writel_chreg(fsl_chan, val, ch_csr);
+ }
+
+ /* Ignore this interrupt since channel has been disabled already */
+ if (!fsl_chan->edesc)
+ return;
+
+ if (ch_err & EDMA_V3_CH_ERR_DBE)
+ dev_err(&fsl_chan->pdev->dev, "Destination Bus Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_SBE)
+ dev_err(&fsl_chan->pdev->dev, "Source Bus Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_SGE)
+ dev_err(&fsl_chan->pdev->dev, "Scatter/Gather Configuration Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_NCE)
+ dev_err(&fsl_chan->pdev->dev, "NBYTES/CITER Configuration Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_DOE)
+ dev_err(&fsl_chan->pdev->dev, "Destination Offset Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_DAE)
+ dev_err(&fsl_chan->pdev->dev, "Destination Address Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_SOE)
+ dev_err(&fsl_chan->pdev->dev, "Source Offset Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_SAE)
+ dev_err(&fsl_chan->pdev->dev, "Source Address Error interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_ECX)
+ dev_err(&fsl_chan->pdev->dev, "Transfer Canceled interrupt.\n");
+
+ if (ch_err & EDMA_V3_CH_ERR_UCE)
+ dev_err(&fsl_chan->pdev->dev, "Uncorrectable TCD error during channel execution interrupt.\n");
+
+ fsl_chan->status = DMA_ERROR;
+}
+
+static irqreturn_t fsl_edma3_err_handler_per_chan(int irq, void *dev_id)
+{
+ struct fsl_edma_chan *fsl_chan = dev_id;
+
+ fsl_edma3_err_check(fsl_chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_edma3_err_handler_shared(int irq, void *dev_id)
+{
+ struct fsl_edma_engine *fsl_edma = dev_id;
+ unsigned int ch;
+
+ for (ch = 0; ch < fsl_edma->n_chans; ch++) {
+ if (fsl_edma->chan_masked & BIT(ch))
+ continue;
+
+ fsl_edma3_err_check(&fsl_edma->chans[ch]);
+ }
+
+ return IRQ_HANDLED;
+}
+
static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id)
{
struct fsl_edma_chan *fsl_chan = dev_id;
@@ -309,7 +386,8 @@ fsl_edma_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma
static int fsl_edma3_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma)
{
- int i;
+ char *errirq_name;
+ int i, ret;
for (i = 0; i < fsl_edma->n_chans; i++) {
@@ -324,6 +402,27 @@ static int fsl_edma3_irq_init(struct platform_device *pdev, struct fsl_edma_engi
return -EINVAL;
fsl_chan->irq_handler = fsl_edma3_tx_handler;
+
+ if (!(fsl_edma->drvdata->flags & FSL_EDMA_DRV_ERRIRQ_SHARE)) {
+ fsl_chan->errirq = fsl_chan->txirq;
+ fsl_chan->errirq_handler = fsl_edma3_err_handler_per_chan;
+ }
+ }
+
+ /* All channel err use one irq number */
+ if (fsl_edma->drvdata->flags & FSL_EDMA_DRV_ERRIRQ_SHARE) {
+ /* last one is error irq */
+ fsl_edma->errirq = platform_get_irq_optional(pdev, fsl_edma->n_chans);
+ if (fsl_edma->errirq < 0)
+ return 0; /* dts miss err irq, treat as no err irq case */
+
+ errirq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s-err",
+ dev_name(&pdev->dev));
+
+ ret = devm_request_irq(&pdev->dev, fsl_edma->errirq, fsl_edma3_err_handler_shared,
+ 0, errirq_name, fsl_edma);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Can't register eDMA err IRQ.\n");
}
return 0;
@@ -464,7 +563,8 @@ static struct fsl_edma_drvdata imx7ulp_data = {
};
static struct fsl_edma_drvdata imx8qm_data = {
- .flags = FSL_EDMA_DRV_HAS_PD | FSL_EDMA_DRV_EDMA3 | FSL_EDMA_DRV_MEM_REMOTE,
+ .flags = FSL_EDMA_DRV_HAS_PD | FSL_EDMA_DRV_EDMA3 | FSL_EDMA_DRV_MEM_REMOTE
+ | FSL_EDMA_DRV_ERRIRQ_SHARE,
.chreg_space_sz = 0x10000,
.chreg_off = 0x10000,
.setup_irq = fsl_edma3_irq_init,
@@ -481,14 +581,15 @@ static struct fsl_edma_drvdata imx8ulp_data = {
};
static struct fsl_edma_drvdata imx93_data3 = {
- .flags = FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA3,
+ .flags = FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA3 | FSL_EDMA_DRV_ERRIRQ_SHARE,
.chreg_space_sz = 0x10000,
.chreg_off = 0x10000,
.setup_irq = fsl_edma3_irq_init,
};
static struct fsl_edma_drvdata imx93_data4 = {
- .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4,
+ .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4
+ | FSL_EDMA_DRV_ERRIRQ_SHARE,
.chreg_space_sz = 0x8000,
.chreg_off = 0x10000,
.mux_off = 0x10000 + offsetof(struct fsl_edma3_ch_reg, ch_mux),
@@ -498,7 +599,7 @@ static struct fsl_edma_drvdata imx93_data4 = {
static struct fsl_edma_drvdata imx95_data5 = {
.flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4 |
- FSL_EDMA_DRV_TCD64,
+ FSL_EDMA_DRV_TCD64 | FSL_EDMA_DRV_ERRIRQ_SHARE,
.chreg_space_sz = 0x8000,
.chreg_off = 0x10000,
.mux_off = 0x200,
@@ -700,6 +801,9 @@ static int fsl_edma_probe(struct platform_device *pdev)
snprintf(fsl_chan->chan_name, sizeof(fsl_chan->chan_name), "%s-CH%02d",
dev_name(&pdev->dev), i);
+ snprintf(fsl_chan->errirq_name, sizeof(fsl_chan->errirq_name),
+ "%s-CH%02d-err", dev_name(&pdev->dev), i);
+
fsl_chan->edma = fsl_edma;
fsl_chan->pm_state = RUNNING;
fsl_chan->srcid = 0;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index b5e7d18b9766..9b126a260267 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -1226,6 +1226,8 @@ static int fsldma_of_probe(struct platform_device *op)
fdev->dev = &op->dev;
INIT_LIST_HEAD(&fdev->common.channels);
+ /* The DMA address bits supported for this device. */
+ fdev->addr_bits = (long)device_get_match_data(fdev->dev);
/* ioremap the registers for use */
fdev->regs = of_iomap(op->dev.of_node, 0);
@@ -1254,7 +1256,7 @@ static int fsldma_of_probe(struct platform_device *op)
fdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
fdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
- dma_set_mask(&(op->dev), DMA_BIT_MASK(36));
+ dma_set_mask(&(op->dev), DMA_BIT_MASK(fdev->addr_bits));
platform_set_drvdata(op, fdev);
@@ -1387,10 +1389,20 @@ static const struct dev_pm_ops fsldma_pm_ops = {
};
#endif
+/* The .data field is used for dma-bit-mask. */
static const struct of_device_id fsldma_of_ids[] = {
- { .compatible = "fsl,elo3-dma", },
- { .compatible = "fsl,eloplus-dma", },
- { .compatible = "fsl,elo-dma", },
+ {
+ .compatible = "fsl,elo3-dma",
+ .data = (void *)40,
+ },
+ {
+ .compatible = "fsl,eloplus-dma",
+ .data = (void *)36,
+ },
+ {
+ .compatible = "fsl,elo-dma",
+ .data = (void *)32,
+ },
{}
};
MODULE_DEVICE_TABLE(of, fsldma_of_ids);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 308bed0a560a..d7b7a3138b85 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -124,6 +124,7 @@ struct fsldma_device {
struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
u32 feature; /* The same as DMA channels */
int irq; /* Channel IRQ */
+ int addr_bits; /* DMA addressing bits supported */
};
/* Define macros for fsldma_chan->feature property */
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 6d12033649f8..7e4715f92773 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -349,7 +349,9 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
set_bit(h, evl->bmap);
h = (h + 1) % size;
}
- drain_workqueue(wq->wq);
+ if (wq->wq)
+ drain_workqueue(wq->wq);
+
mutex_unlock(&evl->lock);
}
@@ -442,10 +444,12 @@ static int idxd_submit_user_descriptor(struct idxd_user_context *ctx,
* DSA devices are capable of indirect ("batch") command submission.
* On devices where direct user submissions are not safe, we cannot
* allow this since there is no good way for us to verify these
- * indirect commands.
+ * indirect commands. Narrow the restriction of operations with the
+ * BATCH opcode to only DSA version 1 devices.
*/
if (is_dsa_dev(idxd_dev) && descriptor.opcode == DSA_OPCODE_BATCH &&
- !wq->idxd->user_submission_safe)
+ wq->idxd->hw.version == DEVICE_VERSION_1 &&
+ !wq->idxd->user_submission_safe)
return -EINVAL;
/*
* As per the programming specification, the completion address must be
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 214b8039439f..74e6695881e6 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -19,7 +19,6 @@
#define IDXD_DRIVER_VERSION "1.00"
-extern struct kmem_cache *idxd_desc_pool;
extern bool tc_override;
struct idxd_wq;
@@ -171,7 +170,6 @@ struct idxd_cdev {
#define DRIVER_NAME_SIZE 128
-#define IDXD_ALLOCATED_BATCH_SIZE 128U
#define WQ_NAME_SIZE 1024
#define WQ_TYPE_SIZE 10
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 6af493f6ba77..9f0701021af0 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1208,9 +1208,11 @@ static ssize_t op_cap_show_common(struct device *dev, char *buf, unsigned long *
/* On systems where direct user submissions are not safe, we need to clear out
* the BATCH capability from the capability mask in sysfs since we cannot support
- * that command on such systems.
+ * that command on such systems. Narrow the restriction of operations with the
+ * BATCH opcode to only DSA version 1 devices.
*/
- if (i == DSA_OPCODE_BATCH/64 && !confdev_to_idxd(dev)->user_submission_safe)
+ if (i == DSA_OPCODE_BATCH/64 && !confdev_to_idxd(dev)->user_submission_safe &&
+ confdev_to_idxd(dev)->hw.version == DEVICE_VERSION_1)
clear_bit(DSA_OPCODE_BATCH % 64, &val);
pos += sysfs_emit_at(buf, pos, "%*pb", 64, &val);
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
index 9235db551026..1f687b08d6b8 100644
--- a/drivers/dma/sh/rz-dmac.c
+++ b/drivers/dma/sh/rz-dmac.c
@@ -14,6 +14,7 @@
#include <linux/dmaengine.h>
#include <linux/interrupt.h>
#include <linux/iopoll.h>
+#include <linux/irqchip/irq-renesas-rzv2h.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -89,8 +90,14 @@ struct rz_dmac_chan {
#define to_rz_dmac_chan(c) container_of(c, struct rz_dmac_chan, vc.chan)
+struct rz_dmac_icu {
+ struct platform_device *pdev;
+ u8 dmac_index;
+};
+
struct rz_dmac {
struct dma_device engine;
+ struct rz_dmac_icu icu;
struct device *dev;
struct reset_control *rstc;
void __iomem *base;
@@ -99,6 +106,8 @@ struct rz_dmac {
unsigned int n_channels;
struct rz_dmac_chan *channels;
+ bool has_icu;
+
DECLARE_BITMAP(modules, 1024);
};
@@ -167,6 +176,9 @@ struct rz_dmac {
#define RZ_DMAC_MAX_CHANNELS 16
#define DMAC_NR_LMDESC 64
+/* RZ/V2H ICU related */
+#define RZV2H_MAX_DMAC_INDEX 4
+
/*
* -----------------------------------------------------------------------------
* Device access
@@ -324,7 +336,13 @@ static void rz_dmac_prepare_desc_for_memcpy(struct rz_dmac_chan *channel)
lmdesc->chext = 0;
lmdesc->header = HEADER_LV;
- rz_dmac_set_dmars_register(dmac, channel->index, 0);
+ if (dmac->has_icu) {
+ rzv2h_icu_register_dma_req(dmac->icu.pdev, dmac->icu.dmac_index,
+ channel->index,
+ RZV2H_ICU_DMAC_REQ_NO_DEFAULT);
+ } else {
+ rz_dmac_set_dmars_register(dmac, channel->index, 0);
+ }
channel->chcfg = chcfg;
channel->chctrl = CHCTRL_STG | CHCTRL_SETEN;
@@ -375,7 +393,13 @@ static void rz_dmac_prepare_descs_for_slave_sg(struct rz_dmac_chan *channel)
channel->lmdesc.tail = lmdesc;
- rz_dmac_set_dmars_register(dmac, channel->index, channel->mid_rid);
+ if (dmac->has_icu) {
+ rzv2h_icu_register_dma_req(dmac->icu.pdev, dmac->icu.dmac_index,
+ channel->index, channel->mid_rid);
+ } else {
+ rz_dmac_set_dmars_register(dmac, channel->index, channel->mid_rid);
+ }
+
channel->chctrl = CHCTRL_SETEN;
}
@@ -647,7 +671,13 @@ static void rz_dmac_device_synchronize(struct dma_chan *chan)
if (ret < 0)
dev_warn(dmac->dev, "DMA Timeout");
- rz_dmac_set_dmars_register(dmac, channel->index, 0);
+ if (dmac->has_icu) {
+ rzv2h_icu_register_dma_req(dmac->icu.pdev, dmac->icu.dmac_index,
+ channel->index,
+ RZV2H_ICU_DMAC_REQ_NO_DEFAULT);
+ } else {
+ rz_dmac_set_dmars_register(dmac, channel->index, 0);
+ }
}
/*
@@ -748,7 +778,8 @@ static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec,
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
- return dma_request_channel(mask, rz_dmac_chan_filter, dma_spec);
+ return __dma_request_channel(&mask, rz_dmac_chan_filter, dma_spec,
+ ofdma->of_node);
}
/*
@@ -823,6 +854,38 @@ static int rz_dmac_chan_probe(struct rz_dmac *dmac,
return 0;
}
+static int rz_dmac_parse_of_icu(struct device *dev, struct rz_dmac *dmac)
+{
+ struct device_node *np = dev->of_node;
+ struct of_phandle_args args;
+ uint32_t dmac_index;
+ int ret;
+
+ ret = of_parse_phandle_with_fixed_args(np, "renesas,icu", 1, 0, &args);
+ if (ret == -ENOENT)
+ return 0;
+ if (ret)
+ return ret;
+
+ dmac->has_icu = true;
+
+ dmac->icu.pdev = of_find_device_by_node(args.np);
+ of_node_put(args.np);
+ if (!dmac->icu.pdev) {
+ dev_err(dev, "ICU device not found.\n");
+ return -ENODEV;
+ }
+
+ dmac_index = args.args[0];
+ if (dmac_index > RZV2H_MAX_DMAC_INDEX) {
+ dev_err(dev, "DMAC index %u invalid.\n", dmac_index);
+ return -EINVAL;
+ }
+ dmac->icu.dmac_index = dmac_index;
+
+ return 0;
+}
+
static int rz_dmac_parse_of(struct device *dev, struct rz_dmac *dmac)
{
struct device_node *np = dev->of_node;
@@ -839,7 +902,7 @@ static int rz_dmac_parse_of(struct device *dev, struct rz_dmac *dmac)
return -EINVAL;
}
- return 0;
+ return rz_dmac_parse_of_icu(dev, dmac);
}
static int rz_dmac_probe(struct platform_device *pdev)
@@ -873,9 +936,11 @@ static int rz_dmac_probe(struct platform_device *pdev)
if (IS_ERR(dmac->base))
return PTR_ERR(dmac->base);
- dmac->ext_base = devm_platform_ioremap_resource(pdev, 1);
- if (IS_ERR(dmac->ext_base))
- return PTR_ERR(dmac->ext_base);
+ if (!dmac->has_icu) {
+ dmac->ext_base = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(dmac->ext_base))
+ return PTR_ERR(dmac->ext_base);
+ }
/* Register interrupt handler for error */
irq = platform_get_irq_byname(pdev, irqname);
@@ -990,9 +1055,12 @@ static void rz_dmac_remove(struct platform_device *pdev)
reset_control_assert(dmac->rstc);
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+
+ platform_device_put(dmac->icu.pdev);
}
static const struct of_device_id of_rz_dmac_match[] = {
+ { .compatible = "renesas,r9a09g057-dmac", },
{ .compatible = "renesas,rz-dmac", },
{ /* Sentinel */ }
};
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index ce80ac4b1a1b..fad896ff29a2 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -27,10 +27,10 @@
#define ADMA_CH_INT_CLEAR 0x1c
#define ADMA_CH_CTRL 0x24
-#define ADMA_CH_CTRL_DIR(val) (((val) & 0xf) << 12)
+#define ADMA_CH_CTRL_DIR(val, mask, shift) (((val) & (mask)) << (shift))
#define ADMA_CH_CTRL_DIR_AHUB2MEM 2
#define ADMA_CH_CTRL_DIR_MEM2AHUB 4
-#define ADMA_CH_CTRL_MODE_CONTINUOUS (2 << 8)
+#define ADMA_CH_CTRL_MODE_CONTINUOUS(shift) (2 << (shift))
#define ADMA_CH_CTRL_FLOWCTRL_EN BIT(1)
#define ADMA_CH_CTRL_XFER_PAUSE_SHIFT 0
@@ -41,15 +41,27 @@
#define ADMA_CH_CONFIG_MAX_BURST_SIZE 16
#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf)
#define ADMA_CH_CONFIG_MAX_BUFS 8
-#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4)
+#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) ((reqs) << 4)
+
+#define ADMA_GLOBAL_CH_CONFIG 0x400
+#define ADMA_GLOBAL_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0x7)
+#define ADMA_GLOBAL_CH_CONFIG_OUTSTANDING_REQS(reqs) ((reqs) << 8)
#define TEGRA186_ADMA_GLOBAL_PAGE_CHGRP 0x30
#define TEGRA186_ADMA_GLOBAL_PAGE_RX_REQ 0x70
#define TEGRA186_ADMA_GLOBAL_PAGE_TX_REQ 0x84
+#define TEGRA264_ADMA_GLOBAL_PAGE_CHGRP_0 0x44
+#define TEGRA264_ADMA_GLOBAL_PAGE_CHGRP_1 0x48
+#define TEGRA264_ADMA_GLOBAL_PAGE_RX_REQ_0 0x100
+#define TEGRA264_ADMA_GLOBAL_PAGE_RX_REQ_1 0x104
+#define TEGRA264_ADMA_GLOBAL_PAGE_TX_REQ_0 0x180
+#define TEGRA264_ADMA_GLOBAL_PAGE_TX_REQ_1 0x184
+#define TEGRA264_ADMA_GLOBAL_PAGE_OFFSET 0x8
#define ADMA_CH_FIFO_CTRL 0x2c
#define ADMA_CH_TX_FIFO_SIZE_SHIFT 8
#define ADMA_CH_RX_FIFO_SIZE_SHIFT 0
+#define ADMA_GLOBAL_CH_FIFO_CTRL 0x300
#define ADMA_CH_LOWER_SRC_ADDR 0x34
#define ADMA_CH_LOWER_TRG_ADDR 0x3c
@@ -73,36 +85,48 @@ struct tegra_adma;
* @adma_get_burst_config: Function callback used to set DMA burst size.
* @global_reg_offset: Register offset of DMA global register.
* @global_int_clear: Register offset of DMA global interrupt clear.
+ * @global_ch_fifo_base: Global channel fifo ctrl base offset
+ * @global_ch_config_base: Global channel config base offset
* @ch_req_tx_shift: Register offset for AHUB transmit channel select.
* @ch_req_rx_shift: Register offset for AHUB receive channel select.
+ * @ch_dir_shift: Channel direction bit position.
+ * @ch_mode_shift: Channel mode bit position.
* @ch_base_offset: Register offset of DMA channel registers.
+ * @ch_tc_offset_diff: From TC register onwards offset differs for Tegra264
* @ch_fifo_ctrl: Default value for channel FIFO CTRL register.
+ * @ch_config: Outstanding and WRR config values
* @ch_req_mask: Mask for Tx or Rx channel select.
+ * @ch_dir_mask: Mask for channel direction.
* @ch_req_max: Maximum number of Tx or Rx channels available.
* @ch_reg_size: Size of DMA channel register space.
* @nr_channels: Number of DMA channels available.
* @ch_fifo_size_mask: Mask for FIFO size field.
* @sreq_index_offset: Slave channel index offset.
* @max_page: Maximum ADMA Channel Page.
- * @has_outstanding_reqs: If DMA channel can have outstanding requests.
* @set_global_pg_config: Global page programming.
*/
struct tegra_adma_chip_data {
unsigned int (*adma_get_burst_config)(unsigned int burst_size);
unsigned int global_reg_offset;
unsigned int global_int_clear;
+ unsigned int global_ch_fifo_base;
+ unsigned int global_ch_config_base;
unsigned int ch_req_tx_shift;
unsigned int ch_req_rx_shift;
+ unsigned int ch_dir_shift;
+ unsigned int ch_mode_shift;
unsigned int ch_base_offset;
+ unsigned int ch_tc_offset_diff;
unsigned int ch_fifo_ctrl;
+ unsigned int ch_config;
unsigned int ch_req_mask;
+ unsigned int ch_dir_mask;
unsigned int ch_req_max;
unsigned int ch_reg_size;
unsigned int nr_channels;
unsigned int ch_fifo_size_mask;
unsigned int sreq_index_offset;
unsigned int max_page;
- bool has_outstanding_reqs;
void (*set_global_pg_config)(struct tegra_adma *tdma);
};
@@ -112,6 +136,7 @@ struct tegra_adma_chip_data {
struct tegra_adma_chan_regs {
unsigned int ctrl;
unsigned int config;
+ unsigned int global_config;
unsigned int src_addr;
unsigned int trg_addr;
unsigned int fifo_ctrl;
@@ -150,6 +175,9 @@ struct tegra_adma_chan {
/* Transfer count and position info */
unsigned int tx_buf_count;
unsigned int tx_buf_pos;
+
+ unsigned int global_ch_fifo_offset;
+ unsigned int global_ch_config_offset;
};
/*
@@ -246,6 +274,29 @@ static void tegra186_adma_global_page_config(struct tegra_adma *tdma)
tdma_write(tdma, TEGRA186_ADMA_GLOBAL_PAGE_TX_REQ + (tdma->ch_page_no * 0x4), 0xffffff);
}
+static void tegra264_adma_global_page_config(struct tegra_adma *tdma)
+{
+ u32 global_page_offset = tdma->ch_page_no * TEGRA264_ADMA_GLOBAL_PAGE_OFFSET;
+
+ /* If the default page (page1) is not used, then clear page1 registers */
+ if (tdma->ch_page_no) {
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_CHGRP_0, 0);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_CHGRP_1, 0);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_RX_REQ_0, 0);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_RX_REQ_1, 0);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_TX_REQ_0, 0);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_TX_REQ_1, 0);
+ }
+
+ /* Program global registers for selected page */
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_CHGRP_0 + global_page_offset, 0xffffffff);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_CHGRP_1 + global_page_offset, 0xffffffff);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_RX_REQ_0 + global_page_offset, 0xffffffff);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_RX_REQ_1 + global_page_offset, 0x1);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_TX_REQ_0 + global_page_offset, 0xffffffff);
+ tdma_write(tdma, TEGRA264_ADMA_GLOBAL_PAGE_TX_REQ_1 + global_page_offset, 0x1);
+}
+
static int tegra_adma_init(struct tegra_adma *tdma)
{
u32 status;
@@ -404,11 +455,21 @@ static void tegra_adma_start(struct tegra_adma_chan *tdc)
tdc->tx_buf_pos = 0;
tdc->tx_buf_count = 0;
- tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc);
+ tdma_ch_write(tdc, ADMA_CH_TC - tdc->tdma->cdata->ch_tc_offset_diff, ch_regs->tc);
tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl);
- tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr);
- tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr);
- tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR - tdc->tdma->cdata->ch_tc_offset_diff,
+ ch_regs->src_addr);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR - tdc->tdma->cdata->ch_tc_offset_diff,
+ ch_regs->trg_addr);
+
+ if (!tdc->tdma->cdata->global_ch_fifo_base)
+ tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl);
+ else if (tdc->global_ch_fifo_offset)
+ tdma_write(tdc->tdma, tdc->global_ch_fifo_offset, ch_regs->fifo_ctrl);
+
+ if (tdc->global_ch_config_offset)
+ tdma_write(tdc->tdma, tdc->global_ch_config_offset, ch_regs->global_config);
+
tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config);
/* Start ADMA */
@@ -421,7 +482,8 @@ static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc)
{
struct tegra_adma_desc *desc = tdc->desc;
unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1;
- unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS);
+ unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS -
+ tdc->tdma->cdata->ch_tc_offset_diff);
unsigned int periods_remaining;
/*
@@ -627,13 +689,16 @@ static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc,
return -EINVAL;
}
- ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) |
- ADMA_CH_CTRL_MODE_CONTINUOUS |
+ ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir, cdata->ch_dir_mask,
+ cdata->ch_dir_shift) |
+ ADMA_CH_CTRL_MODE_CONTINUOUS(cdata->ch_mode_shift) |
ADMA_CH_CTRL_FLOWCTRL_EN;
ch_regs->config |= cdata->adma_get_burst_config(burst_size);
- ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1);
- if (cdata->has_outstanding_reqs)
- ch_regs->config |= TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8);
+
+ if (cdata->global_ch_config_base)
+ ch_regs->global_config |= cdata->ch_config;
+ else
+ ch_regs->config |= cdata->ch_config;
/*
* 'sreq_index' represents the current ADMAIF channel number and as per
@@ -788,12 +853,23 @@ static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev)
/* skip if channel is not active */
if (!ch_reg->cmd)
continue;
- ch_reg->tc = tdma_ch_read(tdc, ADMA_CH_TC);
- ch_reg->src_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_SRC_ADDR);
- ch_reg->trg_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_TRG_ADDR);
+ ch_reg->tc = tdma_ch_read(tdc, ADMA_CH_TC - tdma->cdata->ch_tc_offset_diff);
+ ch_reg->src_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_SRC_ADDR -
+ tdma->cdata->ch_tc_offset_diff);
+ ch_reg->trg_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_TRG_ADDR -
+ tdma->cdata->ch_tc_offset_diff);
ch_reg->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL);
- ch_reg->fifo_ctrl = tdma_ch_read(tdc, ADMA_CH_FIFO_CTRL);
+
+ if (tdc->global_ch_config_offset)
+ ch_reg->global_config = tdma_read(tdc->tdma, tdc->global_ch_config_offset);
+
+ if (!tdc->tdma->cdata->global_ch_fifo_base)
+ ch_reg->fifo_ctrl = tdma_ch_read(tdc, ADMA_CH_FIFO_CTRL);
+ else if (tdc->global_ch_fifo_offset)
+ ch_reg->fifo_ctrl = tdma_read(tdc->tdma, tdc->global_ch_fifo_offset);
+
ch_reg->config = tdma_ch_read(tdc, ADMA_CH_CONFIG);
+
}
clk_disable:
@@ -832,12 +908,23 @@ static int __maybe_unused tegra_adma_runtime_resume(struct device *dev)
/* skip if channel was not active earlier */
if (!ch_reg->cmd)
continue;
- tdma_ch_write(tdc, ADMA_CH_TC, ch_reg->tc);
- tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_reg->src_addr);
- tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_reg->trg_addr);
+ tdma_ch_write(tdc, ADMA_CH_TC - tdma->cdata->ch_tc_offset_diff, ch_reg->tc);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR - tdma->cdata->ch_tc_offset_diff,
+ ch_reg->src_addr);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR - tdma->cdata->ch_tc_offset_diff,
+ ch_reg->trg_addr);
tdma_ch_write(tdc, ADMA_CH_CTRL, ch_reg->ctrl);
- tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_reg->fifo_ctrl);
+
+ if (!tdc->tdma->cdata->global_ch_fifo_base)
+ tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_reg->fifo_ctrl);
+ else if (tdc->global_ch_fifo_offset)
+ tdma_write(tdc->tdma, tdc->global_ch_fifo_offset, ch_reg->fifo_ctrl);
+
+ if (tdc->global_ch_config_offset)
+ tdma_write(tdc->tdma, tdc->global_ch_config_offset, ch_reg->global_config);
+
tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_reg->config);
+
tdma_ch_write(tdc, ADMA_CH_CMD, ch_reg->cmd);
}
@@ -848,17 +935,23 @@ static const struct tegra_adma_chip_data tegra210_chip_data = {
.adma_get_burst_config = tegra210_adma_get_burst_config,
.global_reg_offset = 0xc00,
.global_int_clear = 0x20,
+ .global_ch_fifo_base = 0,
+ .global_ch_config_base = 0,
.ch_req_tx_shift = 28,
.ch_req_rx_shift = 24,
+ .ch_dir_shift = 12,
+ .ch_mode_shift = 8,
.ch_base_offset = 0,
+ .ch_tc_offset_diff = 0,
+ .ch_config = ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1),
.ch_req_mask = 0xf,
+ .ch_dir_mask = 0xf,
.ch_req_max = 10,
.ch_reg_size = 0x80,
.nr_channels = 22,
.ch_fifo_size_mask = 0xf,
.sreq_index_offset = 2,
.max_page = 0,
- .has_outstanding_reqs = false,
.set_global_pg_config = NULL,
};
@@ -866,23 +959,56 @@ static const struct tegra_adma_chip_data tegra186_chip_data = {
.adma_get_burst_config = tegra186_adma_get_burst_config,
.global_reg_offset = 0,
.global_int_clear = 0x402c,
+ .global_ch_fifo_base = 0,
+ .global_ch_config_base = 0,
.ch_req_tx_shift = 27,
.ch_req_rx_shift = 22,
+ .ch_dir_shift = 12,
+ .ch_mode_shift = 8,
.ch_base_offset = 0x10000,
+ .ch_tc_offset_diff = 0,
+ .ch_config = ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1) |
+ TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8),
.ch_req_mask = 0x1f,
+ .ch_dir_mask = 0xf,
.ch_req_max = 20,
.ch_reg_size = 0x100,
.nr_channels = 32,
.ch_fifo_size_mask = 0x1f,
.sreq_index_offset = 4,
.max_page = 4,
- .has_outstanding_reqs = true,
.set_global_pg_config = tegra186_adma_global_page_config,
};
+static const struct tegra_adma_chip_data tegra264_chip_data = {
+ .adma_get_burst_config = tegra186_adma_get_burst_config,
+ .global_reg_offset = 0,
+ .global_int_clear = 0x800c,
+ .global_ch_fifo_base = ADMA_GLOBAL_CH_FIFO_CTRL,
+ .global_ch_config_base = ADMA_GLOBAL_CH_CONFIG,
+ .ch_req_tx_shift = 26,
+ .ch_req_rx_shift = 20,
+ .ch_dir_shift = 10,
+ .ch_mode_shift = 7,
+ .ch_base_offset = 0x10000,
+ .ch_tc_offset_diff = 4,
+ .ch_config = ADMA_GLOBAL_CH_CONFIG_WEIGHT_FOR_WRR(1) |
+ ADMA_GLOBAL_CH_CONFIG_OUTSTANDING_REQS(8),
+ .ch_req_mask = 0x3f,
+ .ch_dir_mask = 7,
+ .ch_req_max = 32,
+ .ch_reg_size = 0x100,
+ .nr_channels = 64,
+ .ch_fifo_size_mask = 0x7f,
+ .sreq_index_offset = 0,
+ .max_page = 10,
+ .set_global_pg_config = tegra264_adma_global_page_config,
+};
+
static const struct of_device_id tegra_adma_of_match[] = {
{ .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data },
{ .compatible = "nvidia,tegra186-adma", .data = &tegra186_chip_data },
+ { .compatible = "nvidia,tegra264-adma", .data = &tegra264_chip_data },
{ },
};
MODULE_DEVICE_TABLE(of, tegra_adma_of_match);
@@ -985,6 +1111,15 @@ static int tegra_adma_probe(struct platform_device *pdev)
tdc->chan_addr = tdma->ch_base_addr + (cdata->ch_reg_size * i);
+ if (tdma->base_addr) {
+ if (cdata->global_ch_fifo_base)
+ tdc->global_ch_fifo_offset = cdata->global_ch_fifo_base + (4 * i);
+
+ if (cdata->global_ch_config_base)
+ tdc->global_ch_config_offset =
+ cdata->global_ch_config_base + (4 * i);
+ }
+
tdc->irq = of_irq_get(pdev->dev.of_node, i);
if (tdc->irq <= 0) {
ret = tdc->irq ?: -ENXIO;
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index b6255c0601bb..aa2dc762140f 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -5624,7 +5624,8 @@ static int udma_probe(struct platform_device *pdev)
uc->config.dir = DMA_MEM_TO_MEM;
uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d",
dev_name(dev), i);
-
+ if (!uc->name)
+ return -ENOMEM;
vchan_init(&uc->vc, &ud->ddev);
/* Use custom vchan completion handling */
tasklet_setup(&uc->vc.task, udma_vchan_complete);
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 3ad44afd0e74..a34d8f0ceed8 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -2909,6 +2909,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
return -EINVAL;
}
+ xdev->common.directions |= chan->direction;
+
/* Request the interrupt */
chan->irq = of_irq_get(node, chan->tdest);
if (chan->irq < 0)
@@ -3115,6 +3117,8 @@ static int xilinx_dma_probe(struct platform_device *pdev)
}
}
+ dma_set_max_seg_size(xdev->dev, xdev->max_buffer_len);
+
if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
xdev->has_axistream_connected =
of_property_read_bool(node, "xlnx,axistream-connected");
diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c
index 1c12e6ec1370..69b32c19e8ff 100644
--- a/drivers/irqchip/irq-renesas-rzv2h.c
+++ b/drivers/irqchip/irq-renesas-rzv2h.c
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/io.h>
#include <linux/irqchip.h>
+#include <linux/irqchip/irq-renesas-rzv2h.h>
#include <linux/irqdomain.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
@@ -41,6 +42,8 @@
#define ICU_TSCLR 0x24
#define ICU_TITSR(k) (0x28 + (k) * 4)
#define ICU_TSSR(k) (0x30 + (k) * 4)
+#define ICU_DMkSELy(k, y) (0x420 + (k) * 0x20 + (y) * 4)
+#define ICU_DMACKSELk(k) (0x500 + (k) * 4)
/* NMI */
#define ICU_NMI_EDGE_FALLING 0
@@ -103,6 +106,15 @@ struct rzv2h_hw_info {
u8 field_width;
};
+/* DMAC */
+#define ICU_DMAC_DkRQ_SEL_MASK GENMASK(9, 0)
+
+#define ICU_DMAC_DMAREQ_SHIFT(up) ((up) * 16)
+#define ICU_DMAC_DMAREQ_MASK(up) (ICU_DMAC_DkRQ_SEL_MASK \
+ << ICU_DMAC_DMAREQ_SHIFT(up))
+#define ICU_DMAC_PREP_DMAREQ(sel, up) (FIELD_PREP(ICU_DMAC_DkRQ_SEL_MASK, (sel)) \
+ << ICU_DMAC_DMAREQ_SHIFT(up))
+
/**
* struct rzv2h_icu_priv - Interrupt Control Unit controller private data structure.
* @base: Controller's base address
@@ -117,6 +129,27 @@ struct rzv2h_icu_priv {
const struct rzv2h_hw_info *info;
};
+void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, u8 dmac_channel,
+ u16 req_no)
+{
+ struct rzv2h_icu_priv *priv = platform_get_drvdata(icu_dev);
+ u32 icu_dmksely, dmareq, dmareq_mask;
+ u8 y, upper;
+
+ y = dmac_channel / 2;
+ upper = dmac_channel % 2;
+
+ dmareq = ICU_DMAC_PREP_DMAREQ(req_no, upper);
+ dmareq_mask = ICU_DMAC_DMAREQ_MASK(upper);
+
+ guard(raw_spinlock_irqsave)(&priv->lock);
+
+ icu_dmksely = readl(priv->base + ICU_DMkSELy(dmac_index, y));
+ icu_dmksely = (icu_dmksely & ~dmareq_mask) | dmareq;
+ writel(icu_dmksely, priv->base + ICU_DMkSELy(dmac_index, y));
+}
+EXPORT_SYMBOL_GPL(rzv2h_icu_register_dma_req);
+
static inline struct rzv2h_icu_priv *irq_data_to_priv(struct irq_data *data)
{
return data->domain->host_data;
@@ -491,6 +524,8 @@ static int rzv2h_icu_init_common(struct device_node *node, struct device_node *p
if (!rzv2h_icu_data)
return -ENOMEM;
+ platform_set_drvdata(pdev, rzv2h_icu_data);
+
rzv2h_icu_data->base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL);
if (IS_ERR(rzv2h_icu_data->base))
return PTR_ERR(rzv2h_icu_data->base);
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index 7d3066691d5d..52301511ed1b 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -966,7 +966,7 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
u32 status, tx_nr_packets_max;
netdev = alloc_candev(sizeof(struct kvaser_pciefd_can),
- KVASER_PCIEFD_CAN_TX_MAX_COUNT);
+ roundup_pow_of_two(KVASER_PCIEFD_CAN_TX_MAX_COUNT));
if (!netdev)
return -ENOMEM;
@@ -995,7 +995,6 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1);
can->can.clock.freq = pcie->freq;
- can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count);
spin_lock_init(&can->lock);
can->can.bittiming_const = &kvaser_pciefd_bittiming_const;
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 132683ed3abe..862bdccb7439 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -22,6 +22,7 @@
#include <linux/gpio.h>
#include <linux/kernel.h>
#include <linux/math.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/platform_data/b53.h>
#include <linux/phy.h>
@@ -1322,41 +1323,17 @@ static void b53_adjust_63xx_rgmii(struct dsa_switch *ds, int port,
phy_interface_t interface)
{
struct b53_device *dev = ds->priv;
- u8 rgmii_ctrl = 0, off;
-
- if (port == dev->imp_port)
- off = B53_RGMII_CTRL_IMP;
- else
- off = B53_RGMII_CTRL_P(port);
+ u8 rgmii_ctrl = 0;
- b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl);
-
- switch (interface) {
- case PHY_INTERFACE_MODE_RGMII_ID:
- rgmii_ctrl |= (RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
- break;
- case PHY_INTERFACE_MODE_RGMII_RXID:
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_TXC);
- rgmii_ctrl |= RGMII_CTRL_DLL_RXC;
- break;
- case PHY_INTERFACE_MODE_RGMII_TXID:
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC);
- rgmii_ctrl |= RGMII_CTRL_DLL_TXC;
- break;
- case PHY_INTERFACE_MODE_RGMII:
- default:
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
- break;
- }
+ b53_read8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), &rgmii_ctrl);
+ rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
- if (port != dev->imp_port) {
- if (is63268(dev))
- rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE;
+ if (is63268(dev))
+ rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE;
- rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII;
- }
+ rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII;
- b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl);
+ b53_write8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), rgmii_ctrl);
dev_dbg(ds->dev, "Configured port %d for %s\n", port,
phy_modes(interface));
@@ -1377,8 +1354,7 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port,
* tx_clk aligned timing (restoring to reset defaults)
*/
b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl);
- rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC |
- RGMII_CTRL_TIMING_SEL);
+ rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC);
/* PHY_INTERFACE_MODE_RGMII_TXID means TX internal delay, make
* sure that we enable the port TX clock internal delay to
@@ -1398,7 +1374,10 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port,
rgmii_ctrl |= RGMII_CTRL_DLL_TXC;
if (interface == PHY_INTERFACE_MODE_RGMII)
rgmii_ctrl |= RGMII_CTRL_DLL_TXC | RGMII_CTRL_DLL_RXC;
- rgmii_ctrl |= RGMII_CTRL_TIMING_SEL;
+
+ if (dev->chip_id != BCM53115_DEVICE_ID)
+ rgmii_ctrl |= RGMII_CTRL_TIMING_SEL;
+
b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl);
dev_info(ds->dev, "Configured port %d for %s\n", port,
@@ -1462,6 +1441,10 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port,
__set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_REVMII, config->supported_interfaces);
+ /* BCM63xx RGMII ports support RGMII */
+ if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4))
+ phy_interface_set_rgmii(config->supported_interfaces);
+
config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_10 | MAC_100;
@@ -1501,7 +1484,7 @@ static void b53_phylink_mac_config(struct phylink_config *config,
struct b53_device *dev = ds->priv;
int port = dp->index;
- if (is63xx(dev) && port >= B53_63XX_RGMII0)
+ if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4))
b53_adjust_63xx_rgmii(ds, port, interface);
if (mode == MLO_AN_FIXED) {
@@ -2353,6 +2336,9 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
{
int ret;
+ if (!b53_support_eee(ds, port))
+ return 0;
+
ret = phy_init_eee(phy, false);
if (ret)
return 0;
@@ -2367,7 +2353,7 @@ bool b53_support_eee(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
- return !is5325(dev) && !is5365(dev);
+ return !is5325(dev) && !is5365(dev) && !is63xx(dev);
}
EXPORT_SYMBOL(b53_support_eee);
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index d1d3b854361e..a7ec609d64de 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -84,6 +84,8 @@ static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr)
val = (addr[3] << 16) | (addr[4] << 8) | addr[5];
airoha_fe_wr(eth, REG_FE_MAC_LMIN(reg), val);
airoha_fe_wr(eth, REG_FE_MAC_LMAX(reg), val);
+
+ airoha_ppe_init_upd_mem(port);
}
static void airoha_set_gdm_port_fwd_cfg(struct airoha_eth *eth, u32 addr,
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index b815697302bf..a970b789cf23 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -614,6 +614,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data);
int airoha_ppe_init(struct airoha_eth *eth);
void airoha_ppe_deinit(struct airoha_eth *eth);
+void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port);
struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe,
u32 hash);
void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash,
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 12d32c92717a..9067d2fc7706 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -223,6 +223,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
int dsa_port = airoha_get_dsa_port(&dev);
struct airoha_foe_mac_info_common *l2;
u32 qdata, ports_pad, val;
+ u8 smac_id = 0xf;
memset(hwe, 0, sizeof(*hwe));
@@ -257,6 +258,8 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
*/
if (airhoa_is_lan_gdm_port(port))
val |= AIROHA_FOE_IB2_FAST_PATH;
+
+ smac_id = port->id;
}
if (is_multicast_ether_addr(data->eth.h_dest))
@@ -291,7 +294,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
hwe->ipv4.l2.src_mac_lo =
get_unaligned_be16(data->eth.h_source + 4);
} else {
- l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, 0xf);
+ l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, smac_id);
}
if (data->vlan.num) {
@@ -636,7 +639,6 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP;
struct airoha_foe_entry *hwe_p, hwe;
struct airoha_flow_table_entry *f;
- struct airoha_foe_mac_info *l2;
int type;
hwe_p = airoha_ppe_foe_get_entry(ppe, hash);
@@ -653,18 +655,25 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
memcpy(&hwe, hwe_p, sizeof(*hwe_p));
hwe.ib1 = (hwe.ib1 & mask) | (e->data.ib1 & ~mask);
- l2 = &hwe.bridge.l2;
- memcpy(l2, &e->data.bridge.l2, sizeof(*l2));
type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, hwe.ib1);
- if (type == PPE_PKT_TYPE_IPV4_HNAPT)
- memcpy(&hwe.ipv4.new_tuple, &hwe.ipv4.orig_tuple,
- sizeof(hwe.ipv4.new_tuple));
- else if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T &&
- l2->common.etype == ETH_P_IP)
- l2->common.etype = ETH_P_IPV6;
-
- hwe.bridge.ib2 = e->data.bridge.ib2;
+ if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T) {
+ memcpy(&hwe.ipv6.l2, &e->data.bridge.l2, sizeof(hwe.ipv6.l2));
+ hwe.ipv6.ib2 = e->data.bridge.ib2;
+ /* setting smac_id to 0xf instruct the hw to keep original
+ * source mac address
+ */
+ hwe.ipv6.l2.src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID,
+ 0xf);
+ } else {
+ memcpy(&hwe.bridge.l2, &e->data.bridge.l2,
+ sizeof(hwe.bridge.l2));
+ hwe.bridge.ib2 = e->data.bridge.ib2;
+ if (type == PPE_PKT_TYPE_IPV4_HNAPT)
+ memcpy(&hwe.ipv4.new_tuple, &hwe.ipv4.orig_tuple,
+ sizeof(hwe.ipv4.new_tuple));
+ }
+
hwe.bridge.data = e->data.bridge.data;
airoha_ppe_foe_commit_entry(ppe, &hwe, hash);
@@ -1238,6 +1247,27 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
airoha_ppe_foe_insert_entry(ppe, skb, hash);
}
+void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port)
+{
+ struct airoha_eth *eth = port->qdma->eth;
+ struct net_device *dev = port->dev;
+ const u8 *addr = dev->dev_addr;
+ u32 val;
+
+ val = (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) | addr[5];
+ airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val);
+ airoha_fe_wr(eth, REG_UPDMEM_CTRL(0),
+ FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) |
+ PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK);
+
+ val = (addr[0] << 8) | addr[1];
+ airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val);
+ airoha_fe_wr(eth, REG_UPDMEM_CTRL(0),
+ FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) |
+ FIELD_PREP(PPE_UPDMEM_OFFSET_MASK, 1) |
+ PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK);
+}
+
int airoha_ppe_init(struct airoha_eth *eth)
{
struct airoha_ppe *ppe;
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index d931530fc96f..04187eb40ec6 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -313,6 +313,16 @@
#define REG_PPE_RAM_BASE(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x320)
#define REG_PPE_RAM_ENTRY(_m, _n) (REG_PPE_RAM_BASE(_m) + ((_n) << 2))
+#define REG_UPDMEM_CTRL(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x370)
+#define PPE_UPDMEM_ACK_MASK BIT(31)
+#define PPE_UPDMEM_ADDR_MASK GENMASK(11, 8)
+#define PPE_UPDMEM_OFFSET_MASK GENMASK(7, 4)
+#define PPE_UPDMEM_SEL_MASK GENMASK(3, 2)
+#define PPE_UPDMEM_WR_MASK BIT(1)
+#define PPE_UPDMEM_REQ_MASK BIT(0)
+
+#define REG_UPDMEM_DATA(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x374)
+
#define REG_FE_GDM_TX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x280)
#define REG_FE_GDM_TX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x284)
#define REG_FE_GDM_TX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x288)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index e1ffbd561fac..7cd1eda0b449 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -2153,7 +2153,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
- .value = cpu_to_be64(priv->rx[0].fill_cnt),
+ .value = cpu_to_be64(priv->rx[idx].fill_cnt),
.queue_id = cpu_to_be32(idx),
};
}
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index a27f1574a733..9d705d94b065 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -764,6 +764,9 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
s16 completion_tag;
pkt = gve_alloc_pending_packet(tx);
+ if (!pkt)
+ return -ENOMEM;
+
pkt->skb = skb;
completion_tag = pkt - tx->dqo.pending_packets;
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
index 093aa6d775ff..497f2a36f35d 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c
@@ -324,8 +324,6 @@ static __init int hinic3_nic_lld_init(void)
{
int err;
- pr_info("%s: %s\n", HINIC3_NIC_DRV_NAME, HINIC3_NIC_DRV_DESC);
-
err = hinic3_lld_init();
if (err)
return err;
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 9de3e0ba3731..f7a98ff43a57 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -268,7 +268,6 @@ struct iavf_adapter {
struct list_head vlan_filter_list;
int num_vlan_filters;
struct list_head mac_filter_list;
- struct mutex crit_lock;
/* Lock to protect accesses to MAC and VLAN lists */
spinlock_t mac_vlan_list_lock;
char misc_vector_name[IFNAMSIZ + 9];
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 288bb5b2e72e..2b2b315205b5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -4,6 +4,8 @@
#include <linux/bitfield.h>
#include <linux/uaccess.h>
+#include <net/netdev_lock.h>
+
/* ethtool support for iavf */
#include "iavf.h"
@@ -1256,9 +1258,10 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
{
struct ethtool_rx_flow_spec *fsp = &cmd->fs;
struct iavf_fdir_fltr *fltr;
- int count = 50;
int err;
+ netdev_assert_locked(adapter->netdev);
+
if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
return -EOPNOTSUPP;
@@ -1277,14 +1280,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
if (!fltr)
return -ENOMEM;
- while (!mutex_trylock(&adapter->crit_lock)) {
- if (--count == 0) {
- kfree(fltr);
- return -EINVAL;
- }
- udelay(1);
- }
-
err = iavf_add_fdir_fltr_info(adapter, fsp, fltr);
if (!err)
err = iavf_fdir_add_fltr(adapter, fltr);
@@ -1292,7 +1287,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
if (err)
kfree(fltr);
- mutex_unlock(&adapter->crit_lock);
return err;
}
@@ -1435,11 +1429,13 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
{
struct iavf_adv_rss *rss_old, *rss_new;
bool rss_new_add = false;
- int count = 50, err = 0;
bool symm = false;
u64 hash_flds;
+ int err = 0;
u32 hdrs;
+ netdev_assert_locked(adapter->netdev);
+
if (!ADV_RSS_SUPPORT(adapter))
return -EOPNOTSUPP;
@@ -1463,15 +1459,6 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
return -EINVAL;
}
- while (!mutex_trylock(&adapter->crit_lock)) {
- if (--count == 0) {
- kfree(rss_new);
- return -EINVAL;
- }
-
- udelay(1);
- }
-
spin_lock_bh(&adapter->adv_rss_lock);
rss_old = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs);
if (rss_old) {
@@ -1500,8 +1487,6 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
if (!err)
iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_ADV_RSS_CFG);
- mutex_unlock(&adapter->crit_lock);
-
if (!rss_new_add)
kfree(rss_new);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 6d7ba4d67a19..2c0bb41809a4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1287,11 +1287,11 @@ static void iavf_configure(struct iavf_adapter *adapter)
/**
* iavf_up_complete - Finish the last steps of bringing up a connection
* @adapter: board private structure
- *
- * Expects to be called while holding crit_lock.
- **/
+ */
static void iavf_up_complete(struct iavf_adapter *adapter)
{
+ netdev_assert_locked(adapter->netdev);
+
iavf_change_state(adapter, __IAVF_RUNNING);
clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -1410,13 +1410,13 @@ static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter)
/**
* iavf_down - Shutdown the connection processing
* @adapter: board private structure
- *
- * Expects to be called while holding crit_lock.
- **/
+ */
void iavf_down(struct iavf_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
+ netdev_assert_locked(netdev);
+
if (adapter->state <= __IAVF_DOWN_PENDING)
return;
@@ -2025,22 +2025,21 @@ err:
* iavf_finish_config - do all netdev work that needs RTNL
* @work: our work_struct
*
- * Do work that needs both RTNL and crit_lock.
- **/
+ * Do work that needs RTNL.
+ */
static void iavf_finish_config(struct work_struct *work)
{
struct iavf_adapter *adapter;
- bool locks_released = false;
+ bool netdev_released = false;
int pairs, err;
adapter = container_of(work, struct iavf_adapter, finish_config);
/* Always take RTNL first to prevent circular lock dependency;
- * The dev->lock is needed to update the queue number
+ * the dev->lock (== netdev lock) is needed to update the queue number.
*/
rtnl_lock();
netdev_lock(adapter->netdev);
- mutex_lock(&adapter->crit_lock);
if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
adapter->netdev->reg_state == NETREG_REGISTERED &&
@@ -2059,22 +2058,21 @@ static void iavf_finish_config(struct work_struct *work)
netif_set_real_num_tx_queues(adapter->netdev, pairs);
if (adapter->netdev->reg_state != NETREG_REGISTERED) {
- mutex_unlock(&adapter->crit_lock);
netdev_unlock(adapter->netdev);
- locks_released = true;
+ netdev_released = true;
err = register_netdevice(adapter->netdev);
if (err) {
dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
err);
/* go back and try again.*/
- mutex_lock(&adapter->crit_lock);
+ netdev_lock(adapter->netdev);
iavf_free_rss(adapter);
iavf_free_misc_irq(adapter);
iavf_reset_interrupt_capability(adapter);
iavf_change_state(adapter,
__IAVF_INIT_CONFIG_ADAPTER);
- mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(adapter->netdev);
goto out;
}
}
@@ -2090,10 +2088,8 @@ static void iavf_finish_config(struct work_struct *work)
}
out:
- if (!locks_released) {
- mutex_unlock(&adapter->crit_lock);
+ if (!netdev_released)
netdev_unlock(adapter->netdev);
- }
rtnl_unlock();
}
@@ -2911,28 +2907,15 @@ err:
iavf_change_state(adapter, __IAVF_INIT_FAILED);
}
-/**
- * iavf_watchdog_task - Periodic call-back task
- * @work: pointer to work_struct
- **/
-static void iavf_watchdog_task(struct work_struct *work)
+static const int IAVF_NO_RESCHED = -1;
+
+/* return: msec delay for requeueing itself */
+static int iavf_watchdog_step(struct iavf_adapter *adapter)
{
- struct iavf_adapter *adapter = container_of(work,
- struct iavf_adapter,
- watchdog_task.work);
- struct net_device *netdev = adapter->netdev;
struct iavf_hw *hw = &adapter->hw;
u32 reg_val;
- netdev_lock(netdev);
- if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state == __IAVF_REMOVE) {
- netdev_unlock(netdev);
- return;
- }
-
- goto restart_watchdog;
- }
+ netdev_assert_locked(adapter->netdev);
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
iavf_change_state(adapter, __IAVF_COMM_FAILED);
@@ -2940,39 +2923,19 @@ static void iavf_watchdog_task(struct work_struct *work)
switch (adapter->state) {
case __IAVF_STARTUP:
iavf_startup(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- msecs_to_jiffies(30));
- return;
+ return 30;
case __IAVF_INIT_VERSION_CHECK:
iavf_init_version_check(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- msecs_to_jiffies(30));
- return;
+ return 30;
case __IAVF_INIT_GET_RESOURCES:
iavf_init_get_resources(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- msecs_to_jiffies(1));
- return;
+ return 1;
case __IAVF_INIT_EXTENDED_CAPS:
iavf_init_process_extended_caps(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- msecs_to_jiffies(1));
- return;
+ return 1;
case __IAVF_INIT_CONFIG_ADAPTER:
iavf_init_config_adapter(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- msecs_to_jiffies(1));
- return;
+ return 1;
case __IAVF_INIT_FAILED:
if (test_bit(__IAVF_IN_REMOVE_TASK,
&adapter->crit_section)) {
@@ -2980,27 +2943,18 @@ static void iavf_watchdog_task(struct work_struct *work)
* watchdog task, iavf_remove should handle this state
* as it can loop forever
*/
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- return;
+ return IAVF_NO_RESCHED;
}
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
dev_err(&adapter->pdev->dev,
"Failed to communicate with PF; waiting before retry\n");
adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
iavf_shutdown_adminq(hw);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq,
- &adapter->watchdog_task, (5 * HZ));
- return;
+ return 5000;
}
/* Try again from failed step*/
iavf_change_state(adapter, adapter->last_state);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
- return;
+ return 1000;
case __IAVF_COMM_FAILED:
if (test_bit(__IAVF_IN_REMOVE_TASK,
&adapter->crit_section)) {
@@ -3010,9 +2964,7 @@ static void iavf_watchdog_task(struct work_struct *work)
*/
iavf_change_state(adapter, __IAVF_INIT_FAILED);
adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- return;
+ return IAVF_NO_RESCHED;
}
reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
@@ -3030,18 +2982,9 @@ static void iavf_watchdog_task(struct work_struct *work)
}
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq,
- &adapter->watchdog_task,
- msecs_to_jiffies(10));
- return;
+ return 10;
case __IAVF_RESETTING:
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- HZ * 2);
- return;
+ return 2000;
case __IAVF_DOWN:
case __IAVF_DOWN_PENDING:
case __IAVF_TESTING:
@@ -3068,9 +3011,7 @@ static void iavf_watchdog_task(struct work_struct *work)
break;
case __IAVF_REMOVE:
default:
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- return;
+ return IAVF_NO_RESCHED;
}
/* check for hw reset */
@@ -3080,24 +3021,29 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
- queue_delayed_work(adapter->wq,
- &adapter->watchdog_task, HZ * 2);
- return;
}
- mutex_unlock(&adapter->crit_lock);
-restart_watchdog:
- netdev_unlock(netdev);
+ return adapter->aq_required ? 20 : 2000;
+}
+
+static void iavf_watchdog_task(struct work_struct *work)
+{
+ struct iavf_adapter *adapter = container_of(work,
+ struct iavf_adapter,
+ watchdog_task.work);
+ struct net_device *netdev = adapter->netdev;
+ int msec_delay;
+
+ netdev_lock(netdev);
+ msec_delay = iavf_watchdog_step(adapter);
+ /* note that we schedule a different task */
if (adapter->state >= __IAVF_DOWN)
queue_work(adapter->wq, &adapter->adminq_task);
- if (adapter->aq_required)
- queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- msecs_to_jiffies(20));
- else
+
+ if (msec_delay != IAVF_NO_RESCHED)
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
- HZ * 2);
+ msecs_to_jiffies(msec_delay));
+ netdev_unlock(netdev);
}
/**
@@ -3105,14 +3051,15 @@ restart_watchdog:
* @adapter: board private structure
*
* Set communication failed flag and free all resources.
- * NOTE: This function is expected to be called with crit_lock being held.
- **/
+ */
static void iavf_disable_vf(struct iavf_adapter *adapter)
{
struct iavf_mac_filter *f, *ftmp;
struct iavf_vlan_filter *fv, *fvtmp;
struct iavf_cloud_filter *cf, *cftmp;
+ netdev_assert_locked(adapter->netdev);
+
adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
/* We don't use netif_running() because it may be true prior to
@@ -3212,17 +3159,7 @@ static void iavf_reset_task(struct work_struct *work)
int i = 0, err;
bool running;
- /* When device is being removed it doesn't make sense to run the reset
- * task, just return in such a case.
- */
netdev_lock(netdev);
- if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state != __IAVF_REMOVE)
- queue_work(adapter->wq, &adapter->reset_task);
-
- netdev_unlock(netdev);
- return;
- }
iavf_misc_irq_disable(adapter);
if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
@@ -3267,7 +3204,6 @@ static void iavf_reset_task(struct work_struct *work)
dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
reg_val);
iavf_disable_vf(adapter);
- mutex_unlock(&adapter->crit_lock);
netdev_unlock(netdev);
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -3411,7 +3347,6 @@ continue_reset:
adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
wake_up(&adapter->reset_waitqueue);
- mutex_unlock(&adapter->crit_lock);
netdev_unlock(netdev);
return;
@@ -3422,7 +3357,6 @@ reset_err:
}
iavf_disable_vf(adapter);
- mutex_unlock(&adapter->crit_lock);
netdev_unlock(netdev);
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
}
@@ -3435,6 +3369,7 @@ static void iavf_adminq_task(struct work_struct *work)
{
struct iavf_adapter *adapter =
container_of(work, struct iavf_adapter, adminq_task);
+ struct net_device *netdev = adapter->netdev;
struct iavf_hw *hw = &adapter->hw;
struct iavf_arq_event_info event;
enum virtchnl_ops v_op;
@@ -3442,13 +3377,7 @@ static void iavf_adminq_task(struct work_struct *work)
u32 val, oldval;
u16 pending;
- if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state == __IAVF_REMOVE)
- return;
-
- queue_work(adapter->wq, &adapter->adminq_task);
- goto out;
- }
+ netdev_lock(netdev);
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
goto unlock;
@@ -3515,8 +3444,7 @@ static void iavf_adminq_task(struct work_struct *work)
freedom:
kfree(event.msg_buf);
unlock:
- mutex_unlock(&adapter->crit_lock);
-out:
+ netdev_unlock(netdev);
/* re-enable Admin queue interrupt cause */
iavf_misc_irq_enable(adapter);
}
@@ -4209,8 +4137,8 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter,
struct flow_cls_offload *cls_flower)
{
int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
- struct iavf_cloud_filter *filter = NULL;
- int err = -EINVAL, count = 50;
+ struct iavf_cloud_filter *filter;
+ int err;
if (tc < 0) {
dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
@@ -4220,17 +4148,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter,
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!filter)
return -ENOMEM;
-
- while (!mutex_trylock(&adapter->crit_lock)) {
- if (--count == 0) {
- kfree(filter);
- return err;
- }
- udelay(1);
- }
-
filter->cookie = cls_flower->cookie;
+ netdev_lock(adapter->netdev);
+
/* bail out here if filter already exists */
spin_lock_bh(&adapter->cloud_filter_list_lock);
if (iavf_find_cf(adapter, &cls_flower->cookie)) {
@@ -4264,7 +4185,7 @@ err:
if (err)
kfree(filter);
- mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(adapter->netdev);
return err;
}
@@ -4568,28 +4489,13 @@ static int iavf_open(struct net_device *netdev)
return -EIO;
}
- while (!mutex_trylock(&adapter->crit_lock)) {
- /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
- * is already taken and iavf_open is called from an upper
- * device's notifier reacting on NETDEV_REGISTER event.
- * We have to leave here to avoid dead lock.
- */
- if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER)
- return -EBUSY;
-
- usleep_range(500, 1000);
- }
-
- if (adapter->state != __IAVF_DOWN) {
- err = -EBUSY;
- goto err_unlock;
- }
+ if (adapter->state != __IAVF_DOWN)
+ return -EBUSY;
if (adapter->state == __IAVF_RUNNING &&
!test_bit(__IAVF_VSI_DOWN, adapter->vsi.state)) {
dev_dbg(&adapter->pdev->dev, "VF is already open.\n");
- err = 0;
- goto err_unlock;
+ return 0;
}
/* allocate transmit descriptors */
@@ -4608,9 +4514,7 @@ static int iavf_open(struct net_device *netdev)
goto err_req_irq;
spin_lock_bh(&adapter->mac_vlan_list_lock);
-
iavf_add_filter(adapter, adapter->hw.mac.addr);
-
spin_unlock_bh(&adapter->mac_vlan_list_lock);
/* Restore filters that were removed with IFF_DOWN */
@@ -4623,8 +4527,6 @@ static int iavf_open(struct net_device *netdev)
iavf_irq_enable(adapter, true);
- mutex_unlock(&adapter->crit_lock);
-
return 0;
err_req_irq:
@@ -4634,8 +4536,6 @@ err_setup_rx:
iavf_free_all_rx_resources(adapter);
err_setup_tx:
iavf_free_all_tx_resources(adapter);
-err_unlock:
- mutex_unlock(&adapter->crit_lock);
return err;
}
@@ -4659,12 +4559,8 @@ static int iavf_close(struct net_device *netdev)
netdev_assert_locked(netdev);
- mutex_lock(&adapter->crit_lock);
-
- if (adapter->state <= __IAVF_DOWN_PENDING) {
- mutex_unlock(&adapter->crit_lock);
+ if (adapter->state <= __IAVF_DOWN_PENDING)
return 0;
- }
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
/* We cannot send IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS before
@@ -4695,7 +4591,6 @@ static int iavf_close(struct net_device *netdev)
iavf_change_state(adapter, __IAVF_DOWN_PENDING);
iavf_free_traffic_irqs(adapter);
- mutex_unlock(&adapter->crit_lock);
netdev_unlock(netdev);
/* We explicitly don't free resources here because the hardware is
@@ -4714,11 +4609,10 @@ static int iavf_close(struct net_device *netdev)
msecs_to_jiffies(500));
if (!status)
netdev_warn(netdev, "Device resources not yet released\n");
-
netdev_lock(netdev);
- mutex_lock(&adapter->crit_lock);
+
adapter->aq_required |= aq_to_restore;
- mutex_unlock(&adapter->crit_lock);
+
return 0;
}
@@ -5227,15 +5121,16 @@ iavf_shaper_set(struct net_shaper_binding *binding,
struct iavf_adapter *adapter = netdev_priv(binding->netdev);
const struct net_shaper_handle *handle = &shaper->handle;
struct iavf_ring *tx_ring;
- int ret = 0;
+ int ret;
+
+ netdev_assert_locked(adapter->netdev);
- mutex_lock(&adapter->crit_lock);
if (handle->id >= adapter->num_active_queues)
- goto unlock;
+ return 0;
ret = iavf_verify_shaper(binding, shaper, extack);
if (ret)
- goto unlock;
+ return ret;
tx_ring = &adapter->tx_rings[handle->id];
@@ -5245,9 +5140,7 @@ iavf_shaper_set(struct net_shaper_binding *binding,
adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW;
-unlock:
- mutex_unlock(&adapter->crit_lock);
- return ret;
+ return 0;
}
static int iavf_shaper_del(struct net_shaper_binding *binding,
@@ -5257,9 +5150,10 @@ static int iavf_shaper_del(struct net_shaper_binding *binding,
struct iavf_adapter *adapter = netdev_priv(binding->netdev);
struct iavf_ring *tx_ring;
- mutex_lock(&adapter->crit_lock);
+ netdev_assert_locked(adapter->netdev);
+
if (handle->id >= adapter->num_active_queues)
- goto unlock;
+ return 0;
tx_ring = &adapter->tx_rings[handle->id];
tx_ring->q_shaper.bw_min = 0;
@@ -5268,8 +5162,6 @@ static int iavf_shaper_del(struct net_shaper_binding *binding,
adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW;
-unlock:
- mutex_unlock(&adapter->crit_lock);
return 0;
}
@@ -5530,10 +5422,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_alloc_qos_cap;
}
- /* set up the locks for the AQ, do this only once in probe
- * and destroy them only once in remove
- */
- mutex_init(&adapter->crit_lock);
mutex_init(&hw->aq.asq_mutex);
mutex_init(&hw->aq.arq_mutex);
@@ -5596,22 +5484,24 @@ static int iavf_suspend(struct device *dev_d)
{
struct net_device *netdev = dev_get_drvdata(dev_d);
struct iavf_adapter *adapter = netdev_priv(netdev);
+ bool running;
netif_device_detach(netdev);
+ running = netif_running(netdev);
+ if (running)
+ rtnl_lock();
netdev_lock(netdev);
- mutex_lock(&adapter->crit_lock);
- if (netif_running(netdev)) {
- rtnl_lock();
+ if (running)
iavf_down(adapter);
- rtnl_unlock();
- }
+
iavf_free_misc_irq(adapter);
iavf_reset_interrupt_capability(adapter);
- mutex_unlock(&adapter->crit_lock);
netdev_unlock(netdev);
+ if (running)
+ rtnl_unlock();
return 0;
}
@@ -5688,20 +5578,20 @@ static void iavf_remove(struct pci_dev *pdev)
* There are flows where register/unregister netdev may race.
*/
while (1) {
- mutex_lock(&adapter->crit_lock);
+ netdev_lock(netdev);
if (adapter->state == __IAVF_RUNNING ||
adapter->state == __IAVF_DOWN ||
adapter->state == __IAVF_INIT_FAILED) {
- mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
break;
}
/* Simply return if we already went through iavf_shutdown */
if (adapter->state == __IAVF_REMOVE) {
- mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return;
}
- mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
usleep_range(500, 1000);
}
cancel_delayed_work_sync(&adapter->watchdog_task);
@@ -5711,7 +5601,6 @@ static void iavf_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
netdev_lock(netdev);
- mutex_lock(&adapter->crit_lock);
dev_info(&adapter->pdev->dev, "Removing device\n");
iavf_change_state(adapter, __IAVF_REMOVE);
@@ -5727,9 +5616,11 @@ static void iavf_remove(struct pci_dev *pdev)
iavf_misc_irq_disable(adapter);
/* Shut down all the garbage mashers on the detention level */
+ netdev_unlock(netdev);
cancel_work_sync(&adapter->reset_task);
cancel_delayed_work_sync(&adapter->watchdog_task);
cancel_work_sync(&adapter->adminq_task);
+ netdev_lock(netdev);
adapter->aq_required = 0;
adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
@@ -5747,8 +5638,6 @@ static void iavf_remove(struct pci_dev *pdev)
/* destroy the locks only once, here */
mutex_destroy(&hw->aq.arq_mutex);
mutex_destroy(&hw->aq.asq_mutex);
- mutex_unlock(&adapter->crit_lock);
- mutex_destroy(&adapter->crit_lock);
netdev_unlock(netdev);
iounmap(hw->hw_addr);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 20d3baf955e3..d97d4b25b30d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2741,6 +2741,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi)
}
/**
+ * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors
+ * @vsi: the VSI with XDP rings being unmapped
+ */
+static void ice_unmap_xdp_rings(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx)
+ if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+ break;
+
+ /* restore the value of last node prior to XDP setup */
+ q_vector->tx.tx_ring = ring;
+ }
+}
+
+/**
* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
* @vsi: VSI to bring up Tx rings used by XDP
* @prog: bpf program that will be assigned to VSI
@@ -2803,7 +2824,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
if (status) {
dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
status);
- goto clear_xdp_rings;
+ goto unmap_xdp_rings;
}
/* assign the prog only when it's not already present on VSI;
@@ -2819,6 +2840,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
ice_vsi_assign_bpf_prog(vsi, prog);
return 0;
+unmap_xdp_rings:
+ ice_unmap_xdp_rings(vsi);
clear_xdp_rings:
ice_for_each_xdp_txq(vsi, i)
if (vsi->xdp_rings[i]) {
@@ -2835,6 +2858,8 @@ err_map_xdp:
mutex_unlock(&pf->avail_q_mutex);
devm_kfree(dev, vsi->xdp_rings);
+ vsi->xdp_rings = NULL;
+
return -ENOMEM;
}
@@ -2850,7 +2875,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_pf *pf = vsi->back;
- int i, v_idx;
+ int i;
/* q_vectors are freed in reset path so there's no point in detaching
* rings
@@ -2858,17 +2883,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
if (cfg_type == ICE_XDP_CFG_PART)
goto free_qmap;
- ice_for_each_q_vector(vsi, v_idx) {
- struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
- struct ice_tx_ring *ring;
-
- ice_for_each_tx_ring(ring, q_vector->tx)
- if (!ring->tx_buf || !ice_ring_is_xdp(ring))
- break;
-
- /* restore the value of last node prior to XDP setup */
- q_vector->tx.tx_ring = ring;
- }
+ ice_unmap_xdp_rings(vsi);
free_qmap:
mutex_lock(&pf->avail_q_mutex);
@@ -3013,11 +3028,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
+ goto resume_if;
} else {
xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
ICE_XDP_CFG_FULL);
- if (xdp_ring_err)
+ if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+ goto resume_if;
+ }
}
xdp_features_set_redirect_target(vsi->netdev, true);
/* reallocate Rx queues that are used for zero-copy */
@@ -3035,6 +3053,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
}
+resume_if:
if (if_running)
ret = ice_up(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 6ca13c5dcb14..d9d09296d1d4 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -85,6 +85,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
}
/**
+ * ice_sched_find_next_vsi_node - find the next node for a given VSI
+ * @vsi_node: VSI support node to start search with
+ *
+ * Return: Next VSI support node, or NULL.
+ *
+ * The function returns a pointer to the next node from the VSI layer
+ * assigned to the given VSI, or NULL if there is no such a node.
+ */
+static struct ice_sched_node *
+ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node)
+{
+ unsigned int vsi_handle = vsi_node->vsi_handle;
+
+ while ((vsi_node = vsi_node->sibling) != NULL)
+ if (vsi_node->vsi_handle == vsi_handle)
+ break;
+
+ return vsi_node;
+}
+
+/**
* ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
* @hw: pointer to the HW struct
* @cmd_opc: cmd opcode
@@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
if (parent->num_children < max_child_nodes) {
new_num_nodes = max_child_nodes - parent->num_children;
} else {
- /* This parent is full, try the next sibling */
- parent = parent->sibling;
+ /* This parent is full,
+ * try the next available sibling.
+ */
+ parent = ice_sched_find_next_vsi_node(parent);
/* Don't modify the first node TEID memory if the
* first node was added already in the above call.
* Instead send some temp memory for all other
@@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
/* get the first queue group node from VSI sub-tree */
qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
while (qgrp_node) {
+ struct ice_sched_node *next_vsi_node;
+
/* make sure the qgroup node is part of the VSI subtree */
if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
if (qgrp_node->num_children < max_children &&
qgrp_node->owner == owner)
break;
qgrp_node = qgrp_node->sibling;
+ if (qgrp_node)
+ continue;
+
+ next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
+ if (!next_vsi_node)
+ break;
+
+ vsi_node = next_vsi_node;
+ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
}
/* Select the best queue group */
@@ -1604,16 +1638,16 @@ ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
/**
* ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
* @hw: pointer to the HW struct
- * @num_qs: number of queues
+ * @num_new_qs: number of new queues that will be added to the tree
* @num_nodes: num nodes array
*
* This function calculates the number of VSI child nodes based on the
* number of queues.
*/
static void
-ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_new_qs, u16 *num_nodes)
{
- u16 num = num_qs;
+ u16 num = num_new_qs;
u8 i, qgl, vsil;
qgl = ice_sched_get_qgrp_layer(hw);
@@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!parent)
return -EIO;
- if (i == vsil)
+ /* Do not modify the VSI handle for already existing VSI nodes,
+ * (if no new VSI node was added to the tree).
+ * Assign the VSI handle only to newly added VSI nodes.
+ */
+ if (i == vsil && num_added)
parent->vsi_handle = vsi_handle;
}
@@ -1813,6 +1851,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
}
/**
+ * ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count
+ * @hw: pointer to the HW struct
+ * @vsi_node: pointer to the leftmost VSI node that needs to be extended
+ * @new_numqs: new number of queues that has to be handled by the VSI
+ * @new_num_nodes: pointer to nodes count table to modify the VSI layer entry
+ *
+ * This function recalculates the number of supported nodes that need to
+ * be added after adding more Tx queues for a given VSI.
+ * The number of new VSI support nodes that shall be added will be saved
+ * to the @new_num_nodes table for the VSI layer.
+ */
+static void
+ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw,
+ struct ice_sched_node *vsi_node,
+ unsigned int new_numqs, u16 *new_num_nodes)
+{
+ u32 vsi_nodes_cnt = 1;
+ u32 max_queue_cnt = 1;
+ u32 qgl, vsil;
+
+ qgl = ice_sched_get_qgrp_layer(hw);
+ vsil = ice_sched_get_vsi_layer(hw);
+
+ for (u32 i = vsil; i <= qgl; i++)
+ max_queue_cnt *= hw->max_children[i];
+
+ while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL)
+ vsi_nodes_cnt++;
+
+ if (new_numqs > (max_queue_cnt * vsi_nodes_cnt))
+ new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) -
+ vsi_nodes_cnt;
+}
+
+/**
* ice_sched_update_vsi_child_nodes - update VSI child nodes
* @pi: port information structure
* @vsi_handle: software VSI handle
@@ -1863,15 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
return status;
}
- if (new_numqs)
- ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
- /* Keep the max number of queue configuration all the time. Update the
- * tree only if number of queues > previous number of queues. This may
+ ice_sched_recalc_vsi_support_nodes(hw, vsi_node,
+ new_numqs, new_num_nodes);
+ ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs,
+ new_num_nodes);
+
+ /* Never decrease the number of queues in the tree. Update the tree
+ * only if number of queues > previous number of queues. This may
* leave some extra nodes in the tree if number of queues < previous
* number but that wouldn't harm anything. Removing those extra nodes
* may complicate the code if those nodes are part of SRL or
* individually rate limited.
+ * Also, add the required VSI support nodes if the existing ones cannot
+ * handle the requested new number of queues.
*/
+ status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
+ new_num_nodes);
+ if (status)
+ return status;
+
status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
new_num_nodes, owner);
if (status)
@@ -2013,6 +2096,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
}
/**
+ * ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI
+ * @pi: port information structure
+ * @vsi_node: pointer to the leftmost node of the VSI to be removed
+ * @owner: LAN or RDMA
+ * @tc: TC number
+ *
+ * Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC.
+ *
+ * This function removes all the VSI support nodes associated with a given VSI
+ * and its LAN or RDMA children nodes from the scheduler tree.
+ */
+static int
+ice_sched_rm_vsi_subtree(struct ice_port_info *pi,
+ struct ice_sched_node *vsi_node, u8 owner, u8 tc)
+{
+ u16 vsi_handle = vsi_node->vsi_handle;
+ bool all_vsi_nodes_removed = true;
+ int j = 0;
+
+ while (vsi_node) {
+ struct ice_sched_node *next_vsi_node;
+
+ if (ice_sched_is_leaf_node_present(vsi_node)) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc);
+ return -EBUSY;
+ }
+ while (j < vsi_node->num_children) {
+ if (vsi_node->children[j]->owner == owner)
+ ice_free_sched_node(pi, vsi_node->children[j]);
+ else
+ j++;
+ }
+
+ next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
+
+ /* remove the VSI if it has no children */
+ if (!vsi_node->num_children)
+ ice_free_sched_node(pi, vsi_node);
+ else
+ all_vsi_nodes_removed = false;
+
+ vsi_node = next_vsi_node;
+ }
+
+ /* clean up aggregator related VSI info if any */
+ if (all_vsi_nodes_removed)
+ ice_sched_rm_agg_vsi_info(pi, vsi_handle);
+
+ return 0;
+}
+
+/**
* ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
* @pi: port information structure
* @vsi_handle: software VSI handle
@@ -2038,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
ice_for_each_traffic_class(i) {
struct ice_sched_node *vsi_node, *tc_node;
- u8 j = 0;
tc_node = ice_sched_get_tc_node(pi, i);
if (!tc_node)
@@ -2048,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
if (!vsi_node)
continue;
- if (ice_sched_is_leaf_node_present(vsi_node)) {
- ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i);
- status = -EBUSY;
+ status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i);
+ if (status)
goto exit_sched_rm_vsi_cfg;
- }
- while (j < vsi_node->num_children) {
- if (vsi_node->children[j]->owner == owner) {
- ice_free_sched_node(pi, vsi_node->children[j]);
- /* reset the counter again since the num
- * children will be updated after node removal
- */
- j = 0;
- } else {
- j++;
- }
- }
- /* remove the VSI if it has no children */
- if (!vsi_node->num_children) {
- ice_free_sched_node(pi, vsi_node);
- vsi_ctx->sched.vsi_node[i] = NULL;
+ vsi_ctx->sched.vsi_node[i] = NULL;
- /* clean up aggregator related VSI info if any */
- ice_sched_rm_agg_vsi_info(pi, vsi_handle);
- }
if (owner == ICE_SCHED_NODE_OWNER_LAN)
vsi_ctx->sched.max_lanq[i] = 0;
else
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index bab12ecb2df5..4eb20ec2accb 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1801,11 +1801,19 @@ void idpf_vc_event_task(struct work_struct *work)
if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
return;
- if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
- test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
- set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
- idpf_init_hard_reset(adapter);
- }
+ if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags))
+ goto func_reset;
+
+ if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags))
+ goto drv_load;
+
+ return;
+
+func_reset:
+ idpf_vc_xn_shutdown(adapter->vcxn_mngr);
+drv_load:
+ set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+ idpf_init_hard_reset(adapter);
}
/**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
index 2e356dd10812..993c354aa27a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -362,17 +362,18 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
{
struct idpf_tx_offload_params offload = { };
struct idpf_tx_buf *first;
+ int csum, tso, needed;
unsigned int count;
__be16 protocol;
- int csum, tso;
count = idpf_tx_desc_count_required(tx_q, skb);
if (unlikely(!count))
return idpf_tx_drop_skb(tx_q, skb);
- if (idpf_tx_maybe_stop_common(tx_q,
- count + IDPF_TX_DESCS_PER_CACHE_LINE +
- IDPF_TX_DESCS_FOR_CTX)) {
+ needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX;
+ if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+ IDPF_DESC_UNUSED(tx_q),
+ needed, needed)) {
idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
u64_stats_update_begin(&tx_q->stats_sync);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 631679cdaa6f..5cf440e09d0a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2184,6 +2184,19 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
}
+/* Global conditions to tell whether the txq (and related resources)
+ * has room to allow the use of "size" descriptors.
+ */
+static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size)
+{
+ if (IDPF_DESC_UNUSED(tx_q) < size ||
+ IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
+ IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) ||
+ IDPF_TX_BUF_RSV_LOW(tx_q))
+ return 0;
+ return 1;
+}
+
/**
* idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
* @tx_q: the queue to be checked
@@ -2194,29 +2207,11 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
unsigned int descs_needed)
{
- if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
- goto out;
-
- /* If there are too many outstanding completions expected on the
- * completion queue, stop the TX queue to give the device some time to
- * catch up
- */
- if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
- IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq)))
- goto splitq_stop;
-
- /* Also check for available book keeping buffers; if we are low, stop
- * the queue to wait for more completions
- */
- if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q)))
- goto splitq_stop;
-
- return 0;
-
-splitq_stop:
- netif_stop_subqueue(tx_q->netdev, tx_q->idx);
+ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+ idpf_txq_has_room(tx_q, descs_needed),
+ 1, 1))
+ return 0;
-out:
u64_stats_update_begin(&tx_q->stats_sync);
u64_stats_inc(&tx_q->q_stats.q_busy);
u64_stats_update_end(&tx_q->stats_sync);
@@ -2242,12 +2237,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
tx_q->next_to_use = val;
- if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) {
- u64_stats_update_begin(&tx_q->stats_sync);
- u64_stats_inc(&tx_q->q_stats.q_busy);
- u64_stats_update_end(&tx_q->stats_sync);
- }
-
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index c779fe71df99..36a0f828a6f8 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -1049,12 +1049,4 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
u16 cleaned_count);
int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
-static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
- u32 needed)
-{
- return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
- IDPF_DESC_UNUSED(tx_q),
- needed, needed);
-}
-
#endif /* !_IDPF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index 07a9f5ae34fd..24febaaa8fbb 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -347,7 +347,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr)
* All waiting threads will be woken-up and their transaction aborted. Further
* operations on that object will fail.
*/
-static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
+void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
{
int i;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
index 3522c1238ea2..77578206bada 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h
@@ -150,5 +150,6 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport);
int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
+void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr);
#endif /* _IDPF_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index b175119a6a7d..b83886a41121 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1463,6 +1463,8 @@ static __maybe_unused int mtk_star_suspend(struct device *dev)
if (netif_running(ndev))
mtk_star_disable(ndev);
+ netif_device_detach(ndev);
+
clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
return 0;
@@ -1487,6 +1489,8 @@ static __maybe_unused int mtk_star_resume(struct device *dev)
clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
}
+ netif_device_attach(ndev);
+
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index cd754cd76bde..d73a2044dc26 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -249,7 +249,7 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = {
static u32 freq_to_shift(u16 freq)
{
u32 freq_khz = freq * 1000;
- u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+ u64 max_val_cycles = freq_khz * 1000ULL * MLX4_EN_WRAP_AROUND_SEC;
u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1);
/* calculate max possible multiplier in order to fit in 64bit */
u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 427bdc0e4908..7001584f1b7a 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -879,6 +879,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
lan966x_vlan_port_set_vlan_aware(port, 0);
lan966x_vlan_port_set_vid(port, HOST_PVID, false, false);
lan966x_vlan_port_apply(port);
+ lan966x_vlan_port_rew_host(port);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 1f9df67f0504..4f75f0688369 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -497,6 +497,7 @@ void lan966x_vlan_port_apply(struct lan966x_port *port);
bool lan966x_vlan_cpu_member_cpu_vlan_mask(struct lan966x *lan966x, u16 vid);
void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port,
bool vlan_aware);
+void lan966x_vlan_port_rew_host(struct lan966x_port *port);
int lan966x_vlan_port_set_vid(struct lan966x_port *port,
u16 vid,
bool pvid,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 1c88120eb291..bcb4db76b75c 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -297,6 +297,7 @@ static void lan966x_port_bridge_leave(struct lan966x_port *port,
lan966x_vlan_port_set_vlan_aware(port, false);
lan966x_vlan_port_set_vid(port, HOST_PVID, false, false);
lan966x_vlan_port_apply(port);
+ lan966x_vlan_port_rew_host(port);
}
int lan966x_port_changeupper(struct net_device *dev,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c
index fa34a739c748..7da22520724c 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c
@@ -149,6 +149,27 @@ void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port,
port->vlan_aware = vlan_aware;
}
+/* When the interface is in host mode, the interface should not be vlan aware
+ * but it should insert all the tags that it gets from the network stack.
+ * The tags are not in the data of the frame but actually in the skb and the ifh
+ * is configured already to get this tag. So what we need to do is to update the
+ * rewriter to insert the vlan tag for all frames which have a vlan tag
+ * different than 0.
+ */
+void lan966x_vlan_port_rew_host(struct lan966x_port *port)
+{
+ struct lan966x *lan966x = port->lan966x;
+ u32 val;
+
+ /* Tag all frames except when VID=0*/
+ val = REW_TAG_CFG_TAG_CFG_SET(2);
+
+ /* Update only some bits in the register */
+ lan_rmw(val,
+ REW_TAG_CFG_TAG_CFG,
+ lan966x, REW_TAG_CFG(port->chip_port));
+}
+
void lan966x_vlan_port_apply(struct lan966x_port *port)
{
struct lan966x *lan966x = port->lan966x;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
index c9693f77e1f6..ac6f2e3a3fcd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
@@ -32,6 +32,11 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg,
int i, ret = 0;
u32 ctrl;
+ if (!ptp_rate) {
+ netdev_warn(priv->dev, "Invalid PTP rate");
+ return -EINVAL;
+ }
+
ret |= est_write(est_addr, EST_BTR_LOW, cfg->btr[0], false);
ret |= est_write(est_addr, EST_BTR_HIGH, cfg->btr[1], false);
ret |= est_write(est_addr, EST_TER, cfg->ter, false);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 085c09039af4..1369fa70bc58 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -805,6 +805,11 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
return -EOPNOTSUPP;
+ if (!priv->plat->clk_ptp_rate) {
+ netdev_err(priv->dev, "Invalid PTP clock rate");
+ return -EINVAL;
+ }
+
stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
priv->systime_flags = systime_flags;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 43c869f64c39..b80c1efdb323 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -430,6 +430,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
struct device_node *np = pdev->dev.of_node;
struct plat_stmmacenet_data *plat;
struct stmmac_dma_cfg *dma_cfg;
+ static int bus_id = -ENODEV;
int phy_mode;
void *ret;
int rc;
@@ -465,8 +466,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
of_property_read_u32(np, "max-speed", &plat->max_speed);
plat->bus_id = of_alias_get_id(np, "ethernet");
- if (plat->bus_id < 0)
- plat->bus_id = 0;
+ if (plat->bus_id < 0) {
+ if (bus_id < 0)
+ bus_id = of_alias_get_highest_id("ethernet");
+ /* No ethernet alias found, init at -1 so first bus_id is 0 */
+ if (bus_id < 0)
+ bus_id = -1;
+ plat->bus_id = ++bus_id;
+ }
/* Default to phy auto-detection */
plat->phy_addr = -1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 429b2d357813..3767ba495e78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -317,7 +317,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
/* Calculate the clock domain crossing (CDC) error if necessary */
priv->plat->cdc_error_adj = 0;
- if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate)
+ if (priv->plat->has_gmac4)
priv->plat->cdc_error_adj = (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate;
/* Update the ptp clock parameters based on feature discovery, when
diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c
index e8241e998aa9..7159baa0155c 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_stats.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c
@@ -28,6 +28,14 @@ void emac_update_hardware_stats(struct prueth_emac *emac)
spin_lock(&prueth->stats_lock);
for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) {
+ /* In MII mode TX lines are swapped inside ICSSG, so read Tx stats
+ * from slice1 for port0 and slice0 for port1 to get accurate Tx
+ * stats for a given port
+ */
+ if (emac->phy_if == PHY_INTERFACE_MODE_MII &&
+ icssg_all_miig_stats[i].offset >= ICSSG_TX_PACKET_OFFSET &&
+ icssg_all_miig_stats[i].offset <= ICSSG_TX_BYTE_OFFSET)
+ base = stats_base[slice ^ 1];
regmap_read(prueth->miig_rt,
base + icssg_all_miig_stats[i].offset,
&val);
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index e01c5997a551..1dd3755d9e6d 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- ret = dev_xdp_propagate(vf_netdev, &xdp);
+ ret = netif_xdp_propagate(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 14a0d04e21ae..c41a025c66f0 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2462,8 +2462,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
- netvsc_vf_setxdp(vf_netdev, NULL);
-
reinit_completion(&net_device_ctx->vf_add);
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
@@ -2631,7 +2629,9 @@ static int netvsc_probe(struct hv_device *dev,
continue;
netvsc_prepare_bonding(vf_netdev);
+ netdev_lock_ops(vf_netdev);
netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+ netdev_unlock_ops(vf_netdev);
__netvsc_vf_setup(net, vf_netdev);
break;
}
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index 10d8afecec55..ebf1e849506b 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -134,7 +134,7 @@ void ovpn_decrypt_post(void *data, int ret)
rcu_read_lock();
sock = rcu_dereference(peer->sock);
- if (sock && sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ if (sock && sock->sk->sk_protocol == IPPROTO_UDP)
/* check if this peer changed local or remote endpoint */
ovpn_peer_endpoints_update(peer, skb);
rcu_read_unlock();
@@ -270,12 +270,12 @@ void ovpn_encrypt_post(void *data, int ret)
if (unlikely(!sock))
goto err_unlock;
- switch (sock->sock->sk->sk_protocol) {
+ switch (sock->sk->sk_protocol) {
case IPPROTO_UDP:
- ovpn_udp_send_skb(peer, sock->sock, skb);
+ ovpn_udp_send_skb(peer, sock->sk, skb);
break;
case IPPROTO_TCP:
- ovpn_tcp_send_skb(peer, sock->sock, skb);
+ ovpn_tcp_send_skb(peer, sock->sk, skb);
break;
default:
/* no transport configured yet */
diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
index bea03913bfb1..a4ec53def46e 100644
--- a/drivers/net/ovpn/netlink.c
+++ b/drivers/net/ovpn/netlink.c
@@ -501,7 +501,7 @@ int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info)
/* when using a TCP socket the remote IP is not expected */
rcu_read_lock();
sock = rcu_dereference(peer->sock);
- if (sock && sock->sock->sk->sk_protocol == IPPROTO_TCP &&
+ if (sock && sock->sk->sk_protocol == IPPROTO_TCP &&
(attrs[OVPN_A_PEER_REMOTE_IPV4] ||
attrs[OVPN_A_PEER_REMOTE_IPV6])) {
rcu_read_unlock();
@@ -559,14 +559,14 @@ static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info,
goto err_unlock;
}
- if (!net_eq(genl_info_net(info), sock_net(sock->sock->sk))) {
+ if (!net_eq(genl_info_net(info), sock_net(sock->sk))) {
id = peernet2id_alloc(genl_info_net(info),
- sock_net(sock->sock->sk),
+ sock_net(sock->sk),
GFP_ATOMIC);
if (nla_put_s32(skb, OVPN_A_PEER_SOCKET_NETNSID, id))
goto err_unlock;
}
- local_port = inet_sk(sock->sock->sk)->inet_sport;
+ local_port = inet_sk(sock->sk)->inet_sport;
rcu_read_unlock();
if (nla_put_u32(skb, OVPN_A_PEER_ID, peer->id))
@@ -1153,8 +1153,8 @@ int ovpn_nl_peer_del_notify(struct ovpn_peer *peer)
ret = -EINVAL;
goto err_unlock;
}
- genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
- msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
+ genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, 0,
+ OVPN_NLGRP_PEERS, GFP_ATOMIC);
rcu_read_unlock();
return 0;
@@ -1218,8 +1218,8 @@ int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id)
ret = -EINVAL;
goto err_unlock;
}
- genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
- msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
+ genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, 0,
+ OVPN_NLGRP_PEERS, GFP_ATOMIC);
rcu_read_unlock();
return 0;
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index a1fd27b9c038..4bfcab0c8652 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -1145,7 +1145,7 @@ static void ovpn_peer_release_p2p(struct ovpn_priv *ovpn, struct sock *sk,
if (sk) {
ovpn_sock = rcu_access_pointer(peer->sock);
- if (!ovpn_sock || ovpn_sock->sock->sk != sk) {
+ if (!ovpn_sock || ovpn_sock->sk != sk) {
spin_unlock_bh(&ovpn->lock);
ovpn_peer_put(peer);
return;
@@ -1175,7 +1175,7 @@ static void ovpn_peers_release_mp(struct ovpn_priv *ovpn, struct sock *sk,
if (sk) {
rcu_read_lock();
ovpn_sock = rcu_dereference(peer->sock);
- remove = ovpn_sock && ovpn_sock->sock->sk == sk;
+ remove = ovpn_sock && ovpn_sock->sk == sk;
rcu_read_unlock();
}
diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
index a83cbab72591..9750871ab65c 100644
--- a/drivers/net/ovpn/socket.c
+++ b/drivers/net/ovpn/socket.c
@@ -24,9 +24,9 @@ static void ovpn_socket_release_kref(struct kref *kref)
struct ovpn_socket *sock = container_of(kref, struct ovpn_socket,
refcount);
- if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ if (sock->sk->sk_protocol == IPPROTO_UDP)
ovpn_udp_socket_detach(sock);
- else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
+ else if (sock->sk->sk_protocol == IPPROTO_TCP)
ovpn_tcp_socket_detach(sock);
}
@@ -75,14 +75,6 @@ void ovpn_socket_release(struct ovpn_peer *peer)
if (!sock)
return;
- /* sanity check: we should not end up here if the socket
- * was already closed
- */
- if (!sock->sock->sk) {
- DEBUG_NET_WARN_ON_ONCE(1);
- return;
- }
-
/* Drop the reference while holding the sock lock to avoid
* concurrent ovpn_socket_new call to mess up with a partially
* detached socket.
@@ -90,22 +82,24 @@ void ovpn_socket_release(struct ovpn_peer *peer)
* Holding the lock ensures that a socket with refcnt 0 is fully
* detached before it can be picked by a concurrent reader.
*/
- lock_sock(sock->sock->sk);
+ lock_sock(sock->sk);
released = ovpn_socket_put(peer, sock);
- release_sock(sock->sock->sk);
+ release_sock(sock->sk);
/* align all readers with sk_user_data being NULL */
synchronize_rcu();
/* following cleanup should happen with lock released */
if (released) {
- if (sock->sock->sk->sk_protocol == IPPROTO_UDP) {
+ if (sock->sk->sk_protocol == IPPROTO_UDP) {
netdev_put(sock->ovpn->dev, &sock->dev_tracker);
- } else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) {
+ } else if (sock->sk->sk_protocol == IPPROTO_TCP) {
/* wait for TCP jobs to terminate */
ovpn_tcp_socket_wait_finish(sock);
ovpn_peer_put(sock->peer);
}
+ /* drop reference acquired in ovpn_socket_new() */
+ sock_put(sock->sk);
/* we can call plain kfree() because we already waited one RCU
* period due to synchronize_rcu()
*/
@@ -118,12 +112,14 @@ static bool ovpn_socket_hold(struct ovpn_socket *sock)
return kref_get_unless_zero(&sock->refcount);
}
-static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
+static int ovpn_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct socket *sock,
+ struct ovpn_peer *peer)
{
- if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
- return ovpn_udp_socket_attach(sock, peer->ovpn);
- else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
- return ovpn_tcp_socket_attach(sock, peer);
+ if (sock->sk->sk_protocol == IPPROTO_UDP)
+ return ovpn_udp_socket_attach(ovpn_sock, sock, peer->ovpn);
+ else if (sock->sk->sk_protocol == IPPROTO_TCP)
+ return ovpn_tcp_socket_attach(ovpn_sock, peer);
return -EOPNOTSUPP;
}
@@ -138,14 +134,15 @@ static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
{
struct ovpn_socket *ovpn_sock;
+ struct sock *sk = sock->sk;
int ret;
- lock_sock(sock->sk);
+ lock_sock(sk);
/* a TCP socket can only be owned by a single peer, therefore there
* can't be any other user
*/
- if (sock->sk->sk_protocol == IPPROTO_TCP && sock->sk->sk_user_data) {
+ if (sk->sk_protocol == IPPROTO_TCP && sk->sk_user_data) {
ovpn_sock = ERR_PTR(-EBUSY);
goto sock_release;
}
@@ -153,8 +150,8 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
/* a UDP socket can be shared across multiple peers, but we must make
* sure it is not owned by something else
*/
- if (sock->sk->sk_protocol == IPPROTO_UDP) {
- u8 type = READ_ONCE(udp_sk(sock->sk)->encap_type);
+ if (sk->sk_protocol == IPPROTO_UDP) {
+ u8 type = READ_ONCE(udp_sk(sk)->encap_type);
/* socket owned by other encapsulation module */
if (type && type != UDP_ENCAP_OVPNINUDP) {
@@ -163,7 +160,7 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
}
rcu_read_lock();
- ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
+ ovpn_sock = rcu_dereference_sk_user_data(sk);
if (ovpn_sock) {
/* socket owned by another ovpn instance, we can't use it */
if (ovpn_sock->ovpn != peer->ovpn) {
@@ -200,11 +197,22 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
goto sock_release;
}
- ovpn_sock->sock = sock;
+ ovpn_sock->sk = sk;
kref_init(&ovpn_sock->refcount);
- ret = ovpn_socket_attach(ovpn_sock, peer);
+ /* the newly created ovpn_socket is holding reference to sk,
+ * therefore we increase its refcounter.
+ *
+ * This ovpn_socket instance is referenced by all peers
+ * using the same socket.
+ *
+ * ovpn_socket_release() will take care of dropping the reference.
+ */
+ sock_hold(sk);
+
+ ret = ovpn_socket_attach(ovpn_sock, sock, peer);
if (ret < 0) {
+ sock_put(sk);
kfree(ovpn_sock);
ovpn_sock = ERR_PTR(ret);
goto sock_release;
@@ -213,11 +221,11 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
/* TCP sockets are per-peer, therefore they are linked to their unique
* peer
*/
- if (sock->sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_protocol == IPPROTO_TCP) {
INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work);
ovpn_sock->peer = peer;
ovpn_peer_hold(peer);
- } else if (sock->sk->sk_protocol == IPPROTO_UDP) {
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
/* in UDP we only link the ovpn instance since the socket is
* shared among multiple peers
*/
@@ -226,8 +234,8 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
GFP_KERNEL);
}
- rcu_assign_sk_user_data(sock->sk, ovpn_sock);
+ rcu_assign_sk_user_data(sk, ovpn_sock);
sock_release:
- release_sock(sock->sk);
+ release_sock(sk);
return ovpn_sock;
}
diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
index 00d856b1a5d8..4afcec71040d 100644
--- a/drivers/net/ovpn/socket.h
+++ b/drivers/net/ovpn/socket.h
@@ -22,7 +22,7 @@ struct ovpn_peer;
* @ovpn: ovpn instance owning this socket (UDP only)
* @dev_tracker: reference tracker for associated dev (UDP only)
* @peer: unique peer transmitting over this socket (TCP only)
- * @sock: the low level sock object
+ * @sk: the low level sock object
* @refcount: amount of contexts currently referencing this object
* @work: member used to schedule release routine (it may block)
* @tcp_tx_work: work for deferring outgoing packet processing (TCP only)
@@ -36,7 +36,7 @@ struct ovpn_socket {
struct ovpn_peer *peer;
};
- struct socket *sock;
+ struct sock *sk;
struct kref refcount;
struct work_struct work;
struct work_struct tcp_tx_work;
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
index 7c42d84987ad..289f62c5d2c7 100644
--- a/drivers/net/ovpn/tcp.c
+++ b/drivers/net/ovpn/tcp.c
@@ -124,14 +124,18 @@ static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
* this peer, therefore ovpn_peer_hold() is not expected to fail
*/
if (WARN_ON(!ovpn_peer_hold(peer)))
- goto err;
+ goto err_nopeer;
ovpn_recv(peer, skb);
return;
err:
+ /* take reference for deferred peer deletion. should never fail */
+ if (WARN_ON(!ovpn_peer_hold(peer)))
+ goto err_nopeer;
+ schedule_work(&peer->tcp.defer_del_work);
dev_dstats_rx_dropped(peer->ovpn->dev);
+err_nopeer:
kfree_skb(skb);
- ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
}
static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
@@ -186,18 +190,18 @@ out:
void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock)
{
struct ovpn_peer *peer = ovpn_sock->peer;
- struct socket *sock = ovpn_sock->sock;
+ struct sock *sk = ovpn_sock->sk;
strp_stop(&peer->tcp.strp);
skb_queue_purge(&peer->tcp.user_queue);
/* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
- sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
- sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
- sock->sk->sk_prot = peer->tcp.sk_cb.prot;
- sock->sk->sk_socket->ops = peer->tcp.sk_cb.ops;
+ sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
+ sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
+ sk->sk_prot = peer->tcp.sk_cb.prot;
+ sk->sk_socket->ops = peer->tcp.sk_cb.ops;
- rcu_assign_sk_user_data(sock->sk, NULL);
+ rcu_assign_sk_user_data(sk, NULL);
}
void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock)
@@ -283,10 +287,10 @@ void ovpn_tcp_tx_work(struct work_struct *work)
sock = container_of(work, struct ovpn_socket, tcp_tx_work);
- lock_sock(sock->sock->sk);
+ lock_sock(sock->sk);
if (sock->peer)
- ovpn_tcp_send_sock(sock->peer, sock->sock->sk);
- release_sock(sock->sock->sk);
+ ovpn_tcp_send_sock(sock->peer, sock->sk);
+ release_sock(sock->sk);
}
static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
@@ -307,15 +311,15 @@ static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
ovpn_tcp_send_sock(peer, sk);
}
-void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk,
struct sk_buff *skb)
{
u16 len = skb->len;
*(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
- spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
- if (sock_owned_by_user(sock->sk)) {
+ spin_lock_nested(&sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
+ if (sock_owned_by_user(sk)) {
if (skb_queue_len(&peer->tcp.out_queue) >=
READ_ONCE(net_hotdata.max_backlog)) {
dev_dstats_tx_dropped(peer->ovpn->dev);
@@ -324,10 +328,10 @@ void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
}
__skb_queue_tail(&peer->tcp.out_queue, skb);
} else {
- ovpn_tcp_send_sock_skb(peer, sock->sk, skb);
+ ovpn_tcp_send_sock_skb(peer, sk, skb);
}
unlock:
- spin_unlock(&sock->sk->sk_lock.slock);
+ spin_unlock(&sk->sk_lock.slock);
}
static void ovpn_tcp_release(struct sock *sk)
@@ -474,7 +478,6 @@ static void ovpn_tcp_peer_del_work(struct work_struct *work)
int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
struct ovpn_peer *peer)
{
- struct socket *sock = ovpn_sock->sock;
struct strp_callbacks cb = {
.rcv_msg = ovpn_tcp_rcv,
.parse_msg = ovpn_tcp_parse,
@@ -482,20 +485,20 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
int ret;
/* make sure no pre-existing encapsulation handler exists */
- if (sock->sk->sk_user_data)
+ if (ovpn_sock->sk->sk_user_data)
return -EBUSY;
/* only a fully connected socket is expected. Connection should be
* handled in userspace
*/
- if (sock->sk->sk_state != TCP_ESTABLISHED) {
+ if (ovpn_sock->sk->sk_state != TCP_ESTABLISHED) {
net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n",
netdev_name(peer->ovpn->dev),
- sock->sk->sk_state);
+ ovpn_sock->sk->sk_state);
return -EINVAL;
}
- ret = strp_init(&peer->tcp.strp, sock->sk, &cb);
+ ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb);
if (ret < 0) {
DEBUG_NET_WARN_ON_ONCE(1);
return ret;
@@ -503,31 +506,31 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work);
- __sk_dst_reset(sock->sk);
+ __sk_dst_reset(ovpn_sock->sk);
skb_queue_head_init(&peer->tcp.user_queue);
skb_queue_head_init(&peer->tcp.out_queue);
/* save current CBs so that they can be restored upon socket release */
- peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready;
- peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space;
- peer->tcp.sk_cb.prot = sock->sk->sk_prot;
- peer->tcp.sk_cb.ops = sock->sk->sk_socket->ops;
+ peer->tcp.sk_cb.sk_data_ready = ovpn_sock->sk->sk_data_ready;
+ peer->tcp.sk_cb.sk_write_space = ovpn_sock->sk->sk_write_space;
+ peer->tcp.sk_cb.prot = ovpn_sock->sk->sk_prot;
+ peer->tcp.sk_cb.ops = ovpn_sock->sk->sk_socket->ops;
/* assign our static CBs and prot/ops */
- sock->sk->sk_data_ready = ovpn_tcp_data_ready;
- sock->sk->sk_write_space = ovpn_tcp_write_space;
+ ovpn_sock->sk->sk_data_ready = ovpn_tcp_data_ready;
+ ovpn_sock->sk->sk_write_space = ovpn_tcp_write_space;
- if (sock->sk->sk_family == AF_INET) {
- sock->sk->sk_prot = &ovpn_tcp_prot;
- sock->sk->sk_socket->ops = &ovpn_tcp_ops;
+ if (ovpn_sock->sk->sk_family == AF_INET) {
+ ovpn_sock->sk->sk_prot = &ovpn_tcp_prot;
+ ovpn_sock->sk->sk_socket->ops = &ovpn_tcp_ops;
} else {
- sock->sk->sk_prot = &ovpn_tcp6_prot;
- sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
+ ovpn_sock->sk->sk_prot = &ovpn_tcp6_prot;
+ ovpn_sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
}
/* avoid using task_frag */
- sock->sk->sk_allocation = GFP_ATOMIC;
- sock->sk->sk_use_task_frag = false;
+ ovpn_sock->sk->sk_allocation = GFP_ATOMIC;
+ ovpn_sock->sk->sk_use_task_frag = false;
/* enqueue the RX worker */
strp_check_rcv(&peer->tcp.strp);
diff --git a/drivers/net/ovpn/tcp.h b/drivers/net/ovpn/tcp.h
index 10aefa834cf3..a3aa3570ae5e 100644
--- a/drivers/net/ovpn/tcp.h
+++ b/drivers/net/ovpn/tcp.h
@@ -30,7 +30,8 @@ void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock);
* Required by the OpenVPN protocol in order to extract packets from
* the TCP stream on the receiver side.
*/
-void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock, struct sk_buff *skb);
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk,
+ struct sk_buff *skb);
void ovpn_tcp_tx_work(struct work_struct *work);
#endif /* _NET_OVPN_TCP_H_ */
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index aef8c0406ec9..bff00946eae2 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -43,7 +43,7 @@ static struct ovpn_socket *ovpn_socket_from_udp_sock(struct sock *sk)
return NULL;
/* make sure that sk matches our stored transport socket */
- if (unlikely(!ovpn_sock->sock || sk != ovpn_sock->sock->sk))
+ if (unlikely(!ovpn_sock->sk || sk != ovpn_sock->sk))
return NULL;
return ovpn_sock;
@@ -335,32 +335,22 @@ out:
/**
* ovpn_udp_send_skb - prepare skb and send it over via UDP
* @peer: the destination peer
- * @sock: the RCU protected peer socket
+ * @sk: peer socket
* @skb: the packet to send
*/
-void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+void ovpn_udp_send_skb(struct ovpn_peer *peer, struct sock *sk,
struct sk_buff *skb)
{
- int ret = -1;
+ int ret;
skb->dev = peer->ovpn->dev;
/* no checksum performed at this layer */
skb->ip_summed = CHECKSUM_NONE;
- /* get socket info */
- if (unlikely(!sock)) {
- net_warn_ratelimited("%s: no sock for remote peer %u\n",
- netdev_name(peer->ovpn->dev), peer->id);
- goto out;
- }
-
/* crypto layer -> transport (UDP) */
- ret = ovpn_udp_output(peer, &peer->dst_cache, sock->sk, skb);
-out:
- if (unlikely(ret < 0)) {
+ ret = ovpn_udp_output(peer, &peer->dst_cache, sk, skb);
+ if (unlikely(ret < 0))
kfree_skb(skb);
- return;
- }
}
static void ovpn_udp_encap_destroy(struct sock *sk)
@@ -383,6 +373,7 @@ static void ovpn_udp_encap_destroy(struct sock *sk)
/**
* ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it to ovpn
* @ovpn_sock: socket to configure
+ * @sock: the socket container to be passed to setup_udp_tunnel_sock()
* @ovpn: the openvp instance to link
*
* After invoking this function, the sock will be controlled by ovpn so that
@@ -390,7 +381,7 @@ static void ovpn_udp_encap_destroy(struct sock *sk)
*
* Return: 0 on success or a negative error code otherwise
*/
-int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
+int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, struct socket *sock,
struct ovpn_priv *ovpn)
{
struct udp_tunnel_sock_cfg cfg = {
@@ -398,17 +389,16 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
.encap_rcv = ovpn_udp_encap_recv,
.encap_destroy = ovpn_udp_encap_destroy,
};
- struct socket *sock = ovpn_sock->sock;
struct ovpn_socket *old_data;
int ret;
/* make sure no pre-existing encapsulation handler exists */
rcu_read_lock();
- old_data = rcu_dereference_sk_user_data(sock->sk);
+ old_data = rcu_dereference_sk_user_data(ovpn_sock->sk);
if (!old_data) {
/* socket is currently unused - we can take it */
rcu_read_unlock();
- setup_udp_tunnel_sock(sock_net(sock->sk), sock, &cfg);
+ setup_udp_tunnel_sock(sock_net(ovpn_sock->sk), sock, &cfg);
return 0;
}
@@ -421,7 +411,7 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
* Unlikely TCP, a single UDP socket can be used to talk to many remote
* hosts and therefore openvpn instantiates one only for all its peers
*/
- if ((READ_ONCE(udp_sk(sock->sk)->encap_type) == UDP_ENCAP_OVPNINUDP) &&
+ if ((READ_ONCE(udp_sk(ovpn_sock->sk)->encap_type) == UDP_ENCAP_OVPNINUDP) &&
old_data->ovpn == ovpn) {
netdev_dbg(ovpn->dev,
"provided socket already owned by this interface\n");
@@ -442,8 +432,16 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
*/
void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock)
{
- struct udp_tunnel_sock_cfg cfg = { };
+ struct sock *sk = ovpn_sock->sk;
+
+ /* Re-enable multicast loopback */
+ inet_set_bit(MC_LOOP, sk);
+ /* Disable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
+ inet_dec_convert_csum(sk);
+
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ WRITE_ONCE(udp_sk(sk)->encap_rcv, NULL);
+ WRITE_ONCE(udp_sk(sk)->encap_destroy, NULL);
- setup_udp_tunnel_sock(sock_net(ovpn_sock->sock->sk), ovpn_sock->sock,
- &cfg);
+ rcu_assign_sk_user_data(sk, NULL);
}
diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h
index 9994eb6e0428..fe26fbe25c5a 100644
--- a/drivers/net/ovpn/udp.h
+++ b/drivers/net/ovpn/udp.h
@@ -15,11 +15,11 @@ struct ovpn_peer;
struct ovpn_priv;
struct socket;
-int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
+int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, struct socket *sock,
struct ovpn_priv *ovpn);
void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock);
-void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+void ovpn_udp_send_skb(struct ovpn_peer *peer, struct sock *sk,
struct sk_buff *skb);
#endif /* _NET_OVPN_UDP_H_ */
diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
index 453a2cf82753..9201ee10a13f 100644
--- a/drivers/net/usb/aqc111.c
+++ b/drivers/net/usb/aqc111.c
@@ -31,11 +31,11 @@ static int aqc111_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value,
USB_RECIP_DEVICE, value, index, data, size);
if (unlikely(ret < size)) {
- ret = ret < 0 ? ret : -ENODATA;
-
netdev_warn(dev->net,
"Failed to read(0x%x) reg index 0x%04x: %d\n",
cmd, index, ret);
+
+ ret = ret < 0 ? ret : -ENODATA;
}
return ret;
@@ -50,11 +50,11 @@ static int aqc111_read_cmd(struct usbnet *dev, u8 cmd, u16 value,
USB_RECIP_DEVICE, value, index, data, size);
if (unlikely(ret < size)) {
- ret = ret < 0 ? ret : -ENODATA;
-
netdev_warn(dev->net,
"Failed to read(0x%x) reg index 0x%04x: %d\n",
cmd, index, ret);
+
+ ret = ret < 0 ? ret : -ENODATA;
}
return ret;
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index f69d9b902da0..a206ffa76f1b 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -178,6 +178,7 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(netdev);
unsigned char buff[2];
+ int ret;
netdev_dbg(netdev, "%s phy_id:%02x loc:%02x\n",
__func__, phy_id, loc);
@@ -185,8 +186,10 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
if (phy_id != 0)
return -ENODEV;
- control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02,
- CONTROL_TIMEOUT_MS);
+ ret = control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02,
+ CONTROL_TIMEOUT_MS);
+ if (ret < 0)
+ return ret;
return (buff[0] | buff[1] << 8);
}
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 2440e30c5bd1..0572f6a9bdb6 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1572,6 +1572,30 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb,
return (hlen + (hdr.tcp->doff << 2));
}
+static void
+vmxnet3_lro_tunnel(struct sk_buff *skb, __be16 ip_proto)
+{
+ struct udphdr *uh = NULL;
+
+ if (ip_proto == htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)skb->data;
+
+ if (iph->protocol == IPPROTO_UDP)
+ uh = (struct udphdr *)(iph + 1);
+ } else {
+ struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+
+ if (iph->nexthdr == IPPROTO_UDP)
+ uh = (struct udphdr *)(iph + 1);
+ }
+ if (uh) {
+ if (uh->check)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ }
+}
+
static int
vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
struct vmxnet3_adapter *adapter, int quota)
@@ -1885,6 +1909,8 @@ sop_done:
if (segCnt != 0 && mss != 0) {
skb_shinfo(skb)->gso_type = rcd->v4 ?
SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
+ if (encap_lro)
+ vmxnet3_lro_tunnel(skb, skb->protocol);
skb_shinfo(skb)->gso_size = mss;
skb_shinfo(skb)->gso_segs = segCnt;
} else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) {
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 3ffeeba5dccf..4a529f1f9bea 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -366,6 +366,7 @@ static int wg_newlink(struct net_device *dev,
if (ret < 0)
goto err_free_handshake_queue;
+ dev_set_threaded(dev, true);
ret = register_netdevice(dev);
if (ret < 0)
goto err_uninit_ratelimiter;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c
index 73ed8d5cab43..9d2c087360e7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c
@@ -349,10 +349,6 @@ int iwl_mld_load_fw(struct iwl_mld *mld)
if (ret)
goto err;
- ret = iwl_mld_init_mcc(mld);
- if (ret)
- goto err;
-
mld->fw_status.running = true;
return 0;
@@ -546,6 +542,10 @@ int iwl_mld_start_fw(struct iwl_mld *mld)
if (ret)
goto error;
+ ret = iwl_mld_init_mcc(mld);
+ if (ret)
+ goto error;
+
return 0;
error:
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
index 8cdd960c5245..e8820e7cf8fa 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
@@ -653,7 +653,8 @@ iwl_mld_nic_error(struct iwl_op_mode *op_mode,
* It might not actually be true that we'll restart, but the
* setting doesn't matter if we're going to be unbound either.
*/
- if (type != IWL_ERR_TYPE_RESET_HS_TIMEOUT)
+ if (type != IWL_ERR_TYPE_RESET_HS_TIMEOUT &&
+ mld->fw_status.running)
mld->fw_status.in_hw_restart = true;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 0f056a6641bd..956b491ae5a4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -6360,8 +6360,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
(struct iwl_mvm_internal_rxq_notif *)cmd->payload;
struct iwl_host_cmd hcmd = {
.id = WIDE_ID(DATA_PATH_GROUP, TRIGGER_RX_QUEUES_NOTIF_CMD),
- .data[0] = &cmd,
- .len[0] = sizeof(cmd),
+ .data[0] = cmd,
+ .len[0] = __struct_size(cmd),
.data[1] = data,
.len[1] = size,
.flags = CMD_SEND_IN_RFKILL | (sync ? 0 : CMD_ASYNC),
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 38ad719161e6..c8f4f3a1d2eb 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -125,7 +125,7 @@ void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
reset_done =
inta_hw & MSIX_HW_INT_CAUSES_REG_RESET_DONE;
} else {
- inta_hw = iwl_read32(trans, CSR_INT_MASK);
+ inta_hw = iwl_read32(trans, CSR_INT);
reset_done = inta_hw & CSR_INT_BIT_RESET_DONE;
}
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
index 8755c5e6a65b..c814fbd756a1 100644
--- a/drivers/net/wwan/mhi_wwan_mbim.c
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -550,8 +550,8 @@ static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
struct mhi_mbim_context *mbim = ctxt;
- link->session = if_id;
link->mbim = mbim;
+ link->session = mhi_mbim_get_link_mux_id(link->mbim->mdev->mhi_cntrl) + if_id;
link->ndev = ndev;
u64_stats_init(&link->rx_syncp);
u64_stats_init(&link->tx_syncp);
@@ -607,7 +607,7 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
{
struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
struct mhi_mbim_context *mbim;
- int err, link_id;
+ int err;
mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
if (!mbim)
@@ -628,11 +628,8 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
/* Number of transfer descriptors determines size of the queue */
mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
- /* Get the corresponding mux_id from mhi */
- link_id = mhi_mbim_get_link_mux_id(cntrl);
-
/* Register wwan link ops with MHI controller representing WWAN instance */
- return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, link_id);
+ return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
}
static void mhi_mbim_remove(struct mhi_device *mhi_dev)
diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c
index 91fa082e9cab..fc0a7cb181df 100644
--- a/drivers/net/wwan/t7xx/t7xx_netdev.c
+++ b/drivers/net/wwan/t7xx/t7xx_netdev.c
@@ -302,7 +302,7 @@ static int t7xx_ccmni_wwan_newlink(void *ctxt, struct net_device *dev, u32 if_id
ccmni->ctlb = ctlb;
ccmni->dev = dev;
atomic_set(&ccmni->usage, 0);
- ctlb->ccmni_inst[if_id] = ccmni;
+ WRITE_ONCE(ctlb->ccmni_inst[if_id], ccmni);
ret = register_netdevice(dev);
if (ret)
@@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l
if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni))
return;
+ WRITE_ONCE(ctlb->ccmni_inst[if_id], NULL);
unregister_netdevice(dev);
}
@@ -419,7 +420,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
skb_cb = T7XX_SKB_CB(skb);
netif_id = skb_cb->netif_idx;
- ccmni = ccmni_ctlb->ccmni_inst[netif_id];
+ ccmni = READ_ONCE(ccmni_ctlb->ccmni_inst[netif_id]);
if (!ccmni) {
dev_kfree_skb(skb);
return;
@@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
{
- struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
+ struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
struct netdev_queue *net_queue;
if (netif_running(ccmni->dev) && atomic_read(&ccmni->usage) > 0) {
@@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno
static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
{
- struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
+ struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
struct netdev_queue *net_queue;
if (atomic_read(&ccmni->usage) > 0) {
@@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev,
if (ctlb->md_sta != MD_STATE_READY)
return;
- if (!ctlb->ccmni_inst[0]) {
+ if (!READ_ONCE(ctlb->ccmni_inst[0])) {
dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n");
return;
}
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 8d58efe998ec..58c911e1b2d2 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -43,6 +43,14 @@ config PHY_PISTACHIO_USB
help
Enable this to support the USB2.0 PHY on the IMG Pistachio SoC.
+config PHY_SNPS_EUSB2
+ tristate "SNPS eUSB2 PHY Driver"
+ depends on OF && (ARCH_EXYNOS || ARCH_QCOM || COMPILE_TEST)
+ select GENERIC_PHY
+ help
+ Enable support for the USB high-speed SNPS eUSB2 phy on select
+ SoCs. The PHY is usually paired with a Synopsys DWC3 USB controller.
+
config PHY_XGENE
tristate "APM X-Gene 15Gbps PHY support"
depends on HAS_IOMEM && OF && (ARCH_XGENE || COMPILE_TEST)
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index e281442acc75..c670a8dac468 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_PHY_CAN_TRANSCEIVER) += phy-can-transceiver.o
obj-$(CONFIG_PHY_LPC18XX_USB_OTG) += phy-lpc18xx-usb-otg.o
obj-$(CONFIG_PHY_XGENE) += phy-xgene.o
obj-$(CONFIG_PHY_PISTACHIO_USB) += phy-pistachio-usb.o
+obj-$(CONFIG_PHY_SNPS_EUSB2) += phy-snps-eusb2.o
obj-$(CONFIG_USB_LGM_PHY) += phy-lgm-usb.o
obj-$(CONFIG_PHY_AIROHA_PCIE) += phy-airoha-pcie.o
obj-$(CONFIG_PHY_NXP_PTN3222) += phy-nxp-ptn3222.o
diff --git a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
index 08a86962d949..c4a56b9d3289 100644
--- a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
+++ b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
@@ -377,13 +377,9 @@ static int phy_meson_axg_mipi_dphy_probe(struct platform_device *pdev)
return ret;
phy = devm_phy_create(dev, NULL, &phy_meson_axg_mipi_dphy_ops);
- if (IS_ERR(phy)) {
- ret = PTR_ERR(phy);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to create PHY\n");
-
- return ret;
- }
+ if (IS_ERR(phy))
+ return dev_err_probe(dev, PTR_ERR(phy),
+ "failed to create PHY\n");
phy_set_drvdata(phy, priv);
diff --git a/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c b/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c
index ae898f93f97b..c0ba2852dbb8 100644
--- a/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c
+++ b/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c
@@ -200,7 +200,6 @@ static int phy_axg_mipi_pcie_analog_probe(struct platform_device *pdev)
struct phy_axg_mipi_pcie_analog_priv *priv;
struct device_node *np = dev->of_node, *parent_np;
struct regmap *map;
- int ret;
priv = devm_kmalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -219,12 +218,9 @@ static int phy_axg_mipi_pcie_analog_probe(struct platform_device *pdev)
priv->regmap = map;
priv->phy = devm_phy_create(dev, np, &phy_axg_mipi_pcie_analog_ops);
- if (IS_ERR(priv->phy)) {
- ret = PTR_ERR(priv->phy);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to create PHY\n");
- return ret;
- }
+ if (IS_ERR(priv->phy))
+ return dev_err_probe(dev, PTR_ERR(priv->phy),
+ "failed to create PHY\n");
phy_set_drvdata(priv->phy, priv);
dev_set_drvdata(dev, priv);
diff --git a/drivers/phy/amlogic/phy-meson-axg-pcie.c b/drivers/phy/amlogic/phy-meson-axg-pcie.c
index 60be5cdc600b..14dee73f9cb5 100644
--- a/drivers/phy/amlogic/phy-meson-axg-pcie.c
+++ b/drivers/phy/amlogic/phy-meson-axg-pcie.c
@@ -131,20 +131,11 @@ static int phy_axg_pcie_probe(struct platform_device *pdev)
struct phy_axg_pcie_priv *priv;
struct device_node *np = dev->of_node;
void __iomem *base;
- int ret;
priv = devm_kmalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
- priv->phy = devm_phy_create(dev, np, &phy_axg_pcie_ops);
- if (IS_ERR(priv->phy)) {
- ret = PTR_ERR(priv->phy);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to create PHY\n");
- return ret;
- }
-
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
return PTR_ERR(base);
@@ -162,6 +153,11 @@ static int phy_axg_pcie_probe(struct platform_device *pdev)
if (IS_ERR(priv->analog))
return PTR_ERR(priv->analog);
+ priv->phy = devm_phy_create(dev, np, &phy_axg_pcie_ops);
+ if (IS_ERR(priv->phy))
+ return dev_err_probe(dev, PTR_ERR(priv->phy),
+ "failed to create PHY\n");
+
phy_set_drvdata(priv->phy, priv);
dev_set_drvdata(dev, priv);
pphy = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb2.c b/drivers/phy/amlogic/phy-meson-g12a-usb2.c
index 0e0b5c00b676..66bf0b7ef8ed 100644
--- a/drivers/phy/amlogic/phy-meson-g12a-usb2.c
+++ b/drivers/phy/amlogic/phy-meson-g12a-usb2.c
@@ -339,13 +339,9 @@ static int phy_meson_g12a_usb2_probe(struct platform_device *pdev)
return ret;
phy = devm_phy_create(dev, NULL, &phy_meson_g12a_usb2_ops);
- if (IS_ERR(phy)) {
- ret = PTR_ERR(phy);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to create PHY\n");
-
- return ret;
- }
+ if (IS_ERR(phy))
+ return dev_err_probe(dev, PTR_ERR(phy),
+ "failed to create PHY\n");
phy_set_bus_width(phy, 8);
phy_set_drvdata(phy, priv);
diff --git a/drivers/phy/amlogic/phy-meson-gxl-usb2.c b/drivers/phy/amlogic/phy-meson-gxl-usb2.c
index 14ea89927ab1..6b390304f723 100644
--- a/drivers/phy/amlogic/phy-meson-gxl-usb2.c
+++ b/drivers/phy/amlogic/phy-meson-gxl-usb2.c
@@ -237,7 +237,6 @@ static int phy_meson_gxl_usb2_probe(struct platform_device *pdev)
struct phy_meson_gxl_usb2_priv *priv;
struct phy *phy;
void __iomem *base;
- int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -266,13 +265,9 @@ static int phy_meson_gxl_usb2_probe(struct platform_device *pdev)
return PTR_ERR(priv->reset);
phy = devm_phy_create(dev, NULL, &phy_meson_gxl_usb2_ops);
- if (IS_ERR(phy)) {
- ret = PTR_ERR(phy);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to create PHY\n");
-
- return ret;
- }
+ if (IS_ERR(phy))
+ return dev_err_probe(dev, PTR_ERR(phy),
+ "failed to create PHY\n");
phy_set_drvdata(phy, priv);
diff --git a/drivers/phy/amlogic/phy-meson8b-usb2.c b/drivers/phy/amlogic/phy-meson8b-usb2.c
index d63147c41b8c..a553231a9f7c 100644
--- a/drivers/phy/amlogic/phy-meson8b-usb2.c
+++ b/drivers/phy/amlogic/phy-meson8b-usb2.c
@@ -5,6 +5,7 @@
* Copyright (C) 2016 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
*/
+#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/io.h>
@@ -39,9 +40,7 @@
#define REG_CTRL_TX_BITSTUFF_ENN BIT(18)
#define REG_CTRL_COMMON_ON BIT(19)
#define REG_CTRL_REF_CLK_SEL_MASK GENMASK(21, 20)
- #define REG_CTRL_REF_CLK_SEL_SHIFT 20
#define REG_CTRL_FSEL_MASK GENMASK(24, 22)
- #define REG_CTRL_FSEL_SHIFT 22
#define REG_CTRL_PORT_RESET BIT(25)
#define REG_CTRL_THREAD_ID_MASK GENMASK(31, 26)
@@ -166,33 +165,29 @@ static int phy_meson8b_usb2_power_on(struct phy *phy)
return ret;
}
- regmap_update_bits(priv->regmap, REG_CONFIG, REG_CONFIG_CLK_32k_ALTSEL,
- REG_CONFIG_CLK_32k_ALTSEL);
+ regmap_set_bits(priv->regmap, REG_CONFIG, REG_CONFIG_CLK_32k_ALTSEL);
regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_REF_CLK_SEL_MASK,
- 0x2 << REG_CTRL_REF_CLK_SEL_SHIFT);
+ FIELD_PREP(REG_CTRL_REF_CLK_SEL_MASK, 0x2));
regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_FSEL_MASK,
- 0x5 << REG_CTRL_FSEL_SHIFT);
+ FIELD_PREP(REG_CTRL_FSEL_MASK, 0x5));
/* reset the PHY */
- regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET,
- REG_CTRL_POWER_ON_RESET);
+ regmap_set_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET);
udelay(RESET_COMPLETE_TIME);
- regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET, 0);
+ regmap_clear_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET);
udelay(RESET_COMPLETE_TIME);
- regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_SOF_TOGGLE_OUT,
- REG_CTRL_SOF_TOGGLE_OUT);
+ regmap_set_bits(priv->regmap, REG_CTRL, REG_CTRL_SOF_TOGGLE_OUT);
if (priv->dr_mode == USB_DR_MODE_HOST) {
- regmap_update_bits(priv->regmap, REG_DBG_UART,
- REG_DBG_UART_SET_IDDQ, 0);
+ regmap_clear_bits(priv->regmap, REG_DBG_UART,
+ REG_DBG_UART_SET_IDDQ);
if (priv->match->host_enable_aca) {
- regmap_update_bits(priv->regmap, REG_ADP_BC,
- REG_ADP_BC_ACA_ENABLE,
- REG_ADP_BC_ACA_ENABLE);
+ regmap_set_bits(priv->regmap, REG_ADP_BC,
+ REG_ADP_BC_ACA_ENABLE);
udelay(ACA_ENABLE_COMPLETE_TIME);
@@ -215,17 +210,15 @@ static int phy_meson8b_usb2_power_off(struct phy *phy)
struct phy_meson8b_usb2_priv *priv = phy_get_drvdata(phy);
if (priv->dr_mode == USB_DR_MODE_HOST)
- regmap_update_bits(priv->regmap, REG_DBG_UART,
- REG_DBG_UART_SET_IDDQ,
- REG_DBG_UART_SET_IDDQ);
+ regmap_set_bits(priv->regmap, REG_DBG_UART,
+ REG_DBG_UART_SET_IDDQ);
clk_disable_unprepare(priv->clk_usb);
clk_disable_unprepare(priv->clk_usb_general);
reset_control_rearm(priv->reset);
/* power off the PHY by putting it into reset mode */
- regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET,
- REG_CTRL_POWER_ON_RESET);
+ regmap_set_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET);
return 0;
}
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index dc452610934a..8a5ed50f2da0 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -43,6 +43,8 @@
#define USB_CTRL_SETUP_tca_drv_sel_MASK BIT(24)
#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK BIT(25)
#define USB_CTRL_USB_PM 0x04
+#define USB_CTRL_USB_PM_REF_S2_CLK_SWITCH_EN_MASK BIT(1)
+#define USB_CTRL_USB_PM_UTMI_S2_CLK_SWITCH_EN_MASK BIT(2)
#define USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK BIT(3)
#define USB_CTRL_USB_PM_XHC_PME_EN_MASK BIT(4)
#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK BIT(22)
@@ -61,6 +63,13 @@
#define USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK BIT(18)
#define USB_CTRL_P0_U2PHY_CFG1 0x68
#define USB_CTRL_P0_U2PHY_CFG1_COMMONONN_MASK BIT(10)
+#define USB_CTRL_P0_U2PHY_CFG2 0x6c
+#define USB_CTRL_P0_U2PHY_CFG2_TXVREFTUNE0_MASK GENMASK(20, 17)
+#define USB_CTRL_P0_U2PHY_CFG2_TXVREFTUNE0_SHIFT 17
+#define USB_CTRL_P0_U2PHY_CFG2_TXRESTUNE0_MASK GENMASK(24, 23)
+#define USB_CTRL_P0_U2PHY_CFG2_TXRESTUNE0_SHIFT 23
+#define USB_CTRL_P0_U2PHY_CFG2_TXPREEMPAMPTUNE0_MASK GENMASK(26, 25)
+#define USB_CTRL_P0_U2PHY_CFG2_TXPREEMPAMPTUNE0_SHIFT 25
/* Register definitions for the USB_PHY block in 7211b0 */
#define USB_PHY_PLL_CTL 0x00
@@ -369,6 +378,42 @@ static void usb_uninit_common_7216(struct brcm_usb_init_params *params)
}
}
+static void usb_init_common_74110(struct brcm_usb_init_params *params)
+{
+ void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
+ u32 reg;
+
+ reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_PM));
+ reg &= ~(USB_CTRL_MASK(USB_PM, REF_S2_CLK_SWITCH_EN) |
+ USB_CTRL_MASK(USB_PM, UTMI_S2_CLK_SWITCH_EN));
+ brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_PM));
+
+ usb_init_common_7216(params);
+
+ reg = brcm_usb_readl(USB_CTRL_REG(ctrl, P0_U2PHY_CFG2));
+ reg &= ~(USB_CTRL_P0_U2PHY_CFG2_TXVREFTUNE0_MASK |
+ USB_CTRL_P0_U2PHY_CFG2_TXRESTUNE0_MASK |
+ USB_CTRL_P0_U2PHY_CFG2_TXPREEMPAMPTUNE0_MASK);
+ reg |= (0x6 << USB_CTRL_P0_U2PHY_CFG2_TXVREFTUNE0_SHIFT) |
+ (0x3 << USB_CTRL_P0_U2PHY_CFG2_TXRESTUNE0_SHIFT) |
+ (0x2 << USB_CTRL_P0_U2PHY_CFG2_TXPREEMPAMPTUNE0_SHIFT);
+ brcm_usb_writel(reg, USB_CTRL_REG(ctrl, P0_U2PHY_CFG2));
+}
+
+static void usb_uninit_common_74110(struct brcm_usb_init_params *params)
+{
+ void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
+ u32 reg;
+
+ if (params->wake_enabled) {
+ reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_PM));
+ reg |= (USB_CTRL_MASK(USB_PM, REF_S2_CLK_SWITCH_EN) |
+ USB_CTRL_MASK(USB_PM, UTMI_S2_CLK_SWITCH_EN));
+ brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_PM));
+ }
+ usb_uninit_common_7216(params);
+}
+
static void usb_uninit_common_7211b0(struct brcm_usb_init_params *params)
{
void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
@@ -426,6 +471,16 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params)
brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
}
+static const struct brcm_usb_init_ops bcm74110_ops = {
+ .init_ipp = usb_init_ipp,
+ .init_common = usb_init_common_74110,
+ .init_xhci = usb_init_xhci,
+ .uninit_common = usb_uninit_common_74110,
+ .uninit_xhci = usb_uninit_xhci,
+ .get_dual_select = usb_get_dual_select,
+ .set_dual_select = usb_set_dual_select,
+};
+
static const struct brcm_usb_init_ops bcm7216_ops = {
.init_ipp = usb_init_ipp,
.init_common = usb_init_common_7216,
@@ -446,6 +501,12 @@ static const struct brcm_usb_init_ops bcm7211b0_ops = {
.set_dual_select = usb_set_dual_select,
};
+void brcm_usb_dvr_init_74110(struct brcm_usb_init_params *params)
+{
+ params->family_name = "74110";
+ params->ops = &bcm74110_ops;
+}
+
void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params)
{
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h
index c1a88f5cd4cd..4c7be78d0b14 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init.h
+++ b/drivers/phy/broadcom/phy-brcm-usb-init.h
@@ -72,6 +72,7 @@ struct brcm_usb_init_params {
bool wake_enabled;
};
+void brcm_usb_dvr_init_74110(struct brcm_usb_init_params *params);
void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params);
void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params);
void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params);
diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index 6362ca5b7fb6..0666864c2f77 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -283,6 +283,16 @@ static const struct attribute_group brcm_usb_phy_group = {
.attrs = brcm_usb_phy_attrs,
};
+static const struct match_chip_info chip_info_74110 = {
+ .init_func = &brcm_usb_dvr_init_74110,
+ .required_regs = {
+ BRCM_REGS_CTRL,
+ BRCM_REGS_XHCI_EC,
+ BRCM_REGS_XHCI_GBL,
+ -1,
+ },
+};
+
static const struct match_chip_info chip_info_4908 = {
.init_func = &brcm_usb_dvr_init_4908,
.required_regs = {
@@ -326,6 +336,10 @@ static const struct match_chip_info chip_info_7445 = {
static const struct of_device_id brcm_usb_dt_ids[] = {
{
+ .compatible = "brcm,bcm74110-usb-phy",
+ .data = &chip_info_74110,
+ },
+ {
.compatible = "brcm,bcm4908-usb-phy",
.data = &chip_info_4908,
},
diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
index 7355d9921b64..68fcc8114d75 100644
--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -238,24 +238,21 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev)
imx8_phy->clkreq_unused = false;
imx8_phy->clk = devm_clk_get(dev, "ref");
- if (IS_ERR(imx8_phy->clk)) {
- dev_err(dev, "failed to get imx pcie phy clock\n");
- return PTR_ERR(imx8_phy->clk);
- }
+ if (IS_ERR(imx8_phy->clk))
+ return dev_err_probe(dev, PTR_ERR(imx8_phy->clk),
+ "failed to get imx pcie phy clock\n");
/* Grab GPR config register range */
imx8_phy->iomuxc_gpr =
syscon_regmap_lookup_by_compatible(imx8_phy->drvdata->gpr);
- if (IS_ERR(imx8_phy->iomuxc_gpr)) {
- dev_err(dev, "unable to find iomuxc registers\n");
- return PTR_ERR(imx8_phy->iomuxc_gpr);
- }
+ if (IS_ERR(imx8_phy->iomuxc_gpr))
+ return dev_err_probe(dev, PTR_ERR(imx8_phy->iomuxc_gpr),
+ "unable to find iomuxc registers\n");
imx8_phy->reset = devm_reset_control_get_exclusive(dev, "pciephy");
- if (IS_ERR(imx8_phy->reset)) {
- dev_err(dev, "Failed to get PCIEPHY reset control\n");
- return PTR_ERR(imx8_phy->reset);
- }
+ if (IS_ERR(imx8_phy->reset))
+ return dev_err_probe(dev, PTR_ERR(imx8_phy->reset),
+ "Failed to get PCIEPHY reset control\n");
if (imx8_phy->drvdata->variant == IMX8MP) {
imx8_phy->perst =
diff --git a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
index a974ef94de9a..b94f242420fc 100644
--- a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
+++ b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
@@ -293,6 +293,28 @@ static u32 phy_tx_vref_tune_from_property(u32 percent)
return DIV_ROUND_CLOSEST(percent - 94U, 2);
}
+static u32 imx95_phy_tx_vref_tune_from_property(u32 percent)
+{
+ percent = clamp(percent, 90U, 108U);
+
+ switch (percent) {
+ case 90 ... 91:
+ percent = 0;
+ break;
+ case 92 ... 96:
+ percent -= 91;
+ break;
+ case 97 ... 104:
+ percent -= 92;
+ break;
+ case 105 ... 108:
+ percent -= 93;
+ break;
+ }
+
+ return percent;
+}
+
static u32 phy_tx_rise_tune_from_property(u32 percent)
{
switch (percent) {
@@ -307,6 +329,22 @@ static u32 phy_tx_rise_tune_from_property(u32 percent)
}
}
+static u32 imx95_phy_tx_rise_tune_from_property(u32 percent)
+{
+ percent = clamp(percent, 90U, 120U);
+
+ switch (percent) {
+ case 90 ... 99:
+ return 3;
+ case 101 ... 115:
+ return 1;
+ case 116 ... 120:
+ return 0;
+ default:
+ return 2;
+ }
+}
+
static u32 phy_tx_preemp_amp_tune_from_property(u32 microamp)
{
microamp = min(microamp, 1800U);
@@ -317,12 +355,12 @@ static u32 phy_tx_preemp_amp_tune_from_property(u32 microamp)
static u32 phy_tx_vboost_level_from_property(u32 microvolt)
{
switch (microvolt) {
- case 0 ... 960:
- return 0;
- case 961 ... 1160:
- return 2;
- default:
+ case 1156:
+ return 5;
+ case 844:
return 3;
+ default:
+ return 4;
}
}
@@ -352,6 +390,29 @@ static u32 phy_comp_dis_tune_from_property(u32 percent)
return 7;
}
}
+
+static u32 imx95_phy_comp_dis_tune_from_property(u32 percent)
+{
+ percent = clamp(percent, 94, 104);
+
+ switch (percent) {
+ case 94 ... 95:
+ percent = 0;
+ break;
+ case 96 ... 98:
+ percent -= 95;
+ break;
+ case 99 ... 102:
+ percent -= 96;
+ break;
+ case 103 ... 104:
+ percent -= 97;
+ break;
+ }
+
+ return percent;
+}
+
static u32 phy_pcs_tx_swing_full_from_property(u32 percent)
{
percent = min(percent, 100U);
@@ -362,10 +423,17 @@ static u32 phy_pcs_tx_swing_full_from_property(u32 percent)
static void imx8m_get_phy_tuning_data(struct imx8mq_usb_phy *imx_phy)
{
struct device *dev = imx_phy->phy->dev.parent;
+ bool is_imx95 = false;
+
+ if (device_is_compatible(dev, "fsl,imx95-usb-phy"))
+ is_imx95 = true;
if (device_property_read_u32(dev, "fsl,phy-tx-vref-tune-percent",
&imx_phy->tx_vref_tune))
imx_phy->tx_vref_tune = PHY_TUNE_DEFAULT;
+ else if (is_imx95)
+ imx_phy->tx_vref_tune =
+ imx95_phy_tx_vref_tune_from_property(imx_phy->tx_vref_tune);
else
imx_phy->tx_vref_tune =
phy_tx_vref_tune_from_property(imx_phy->tx_vref_tune);
@@ -373,6 +441,9 @@ static void imx8m_get_phy_tuning_data(struct imx8mq_usb_phy *imx_phy)
if (device_property_read_u32(dev, "fsl,phy-tx-rise-tune-percent",
&imx_phy->tx_rise_tune))
imx_phy->tx_rise_tune = PHY_TUNE_DEFAULT;
+ else if (is_imx95)
+ imx_phy->tx_rise_tune =
+ imx95_phy_tx_rise_tune_from_property(imx_phy->tx_rise_tune);
else
imx_phy->tx_rise_tune =
phy_tx_rise_tune_from_property(imx_phy->tx_rise_tune);
@@ -394,6 +465,9 @@ static void imx8m_get_phy_tuning_data(struct imx8mq_usb_phy *imx_phy)
if (device_property_read_u32(dev, "fsl,phy-comp-dis-tune-percent",
&imx_phy->comp_dis_tune))
imx_phy->comp_dis_tune = PHY_TUNE_DEFAULT;
+ else if (is_imx95)
+ imx_phy->comp_dis_tune =
+ imx95_phy_comp_dis_tune_from_property(imx_phy->comp_dis_tune);
else
imx_phy->comp_dis_tune =
phy_comp_dis_tune_from_property(imx_phy->comp_dis_tune);
diff --git a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c
index 10fbe8dee116..191c282246d9 100644
--- a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c
+++ b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c
@@ -456,6 +456,8 @@ static int fsl_samsung_hdmi_phy_configure(struct fsl_samsung_hdmi_phy *phy,
int i, ret;
u8 val;
+ phy->cur_cfg = cfg;
+
/* HDMI PHY init */
writeb(REG33_FIX_DA, phy->regs + PHY_REG(33));
@@ -508,7 +510,14 @@ static const struct phy_config *fsl_samsung_hdmi_phy_lookup_rate(unsigned long r
if (phy_pll_cfg[i].pixclk <= rate)
break;
- return &phy_pll_cfg[i];
+ /* If there is an exact match, or the array has been searched, return the value*/
+ if (phy_pll_cfg[i].pixclk == rate || i + 1 > ARRAY_SIZE(phy_pll_cfg) - 1)
+ return &phy_pll_cfg[i];
+
+ /* See if the next entry is closer to nominal than this one */
+ return (abs((long) rate - (long) phy_pll_cfg[i].pixclk) <
+ abs((long) rate - (long) phy_pll_cfg[i+1].pixclk) ?
+ &phy_pll_cfg[i] : &phy_pll_cfg[i+1]);
}
static void fsl_samsung_hdmi_calculate_phy(struct phy_config *cal_phy, unsigned long rate,
@@ -521,18 +530,9 @@ static void fsl_samsung_hdmi_calculate_phy(struct phy_config *cal_phy, unsigned
/* pll_div_regs 3-6 are fixed and pre-defined already */
}
-static u32 fsl_samsung_hdmi_phy_get_closest_rate(unsigned long rate,
- u32 int_div_clk, u32 frac_div_clk)
-{
- /* Calculate the absolute value of the differences and return whichever is closest */
- if (abs((long)rate - (long)int_div_clk) < abs((long)(rate - (long)frac_div_clk)))
- return int_div_clk;
-
- return frac_div_clk;
-}
-
-static long phy_clk_round_rate(struct clk_hw *hw,
- unsigned long rate, unsigned long *parent_rate)
+static
+const struct phy_config *fsl_samsung_hdmi_phy_find_settings(struct fsl_samsung_hdmi_phy *phy,
+ unsigned long rate)
{
const struct phy_config *fract_div_phy;
u32 int_div_clk;
@@ -541,83 +541,66 @@ static long phy_clk_round_rate(struct clk_hw *hw,
/* If the clock is out of range return error instead of searching */
if (rate > 297000000 || rate < 22250000)
- return -EINVAL;
+ return NULL;
/* Search the fractional divider lookup table */
fract_div_phy = fsl_samsung_hdmi_phy_lookup_rate(rate);
+ if (fract_div_phy->pixclk == rate) {
+ dev_dbg(phy->dev, "fractional divider match = %u\n", fract_div_phy->pixclk);
+ return fract_div_phy;
+ }
- /* If the rate is an exact match, return that value */
- if (rate == fract_div_phy->pixclk)
- return fract_div_phy->pixclk;
-
- /* If the exact match isn't found, calculate the integer divider */
+ /* Calculate the integer divider */
int_div_clk = fsl_samsung_hdmi_phy_find_pms(rate, &p, &m, &s);
+ fsl_samsung_hdmi_calculate_phy(&calculated_phy_pll_cfg, int_div_clk, p, m, s);
+ if (int_div_clk == rate) {
+ dev_dbg(phy->dev, "integer divider match = %u\n", calculated_phy_pll_cfg.pixclk);
+ return &calculated_phy_pll_cfg;
+ }
- /* If the int_div_clk rate is an exact match, return that value */
- if (int_div_clk == rate)
- return int_div_clk;
+ /* Calculate the absolute value of the differences and return whichever is closest */
+ if (abs((long)rate - (long)int_div_clk) <
+ abs((long)rate - (long)fract_div_phy->pixclk)) {
+ dev_dbg(phy->dev, "integer divider = %u\n", calculated_phy_pll_cfg.pixclk);
+ return &calculated_phy_pll_cfg;
+ }
- /* If neither rate is an exact match, use the value from the LUT */
- return fract_div_phy->pixclk;
-}
+ dev_dbg(phy->dev, "fractional divider = %u\n", phy->cur_cfg->pixclk);
-static int phy_use_fract_div(struct fsl_samsung_hdmi_phy *phy, const struct phy_config *fract_div_phy)
-{
- phy->cur_cfg = fract_div_phy;
- dev_dbg(phy->dev, "fsl_samsung_hdmi_phy: using fractional divider rate = %u\n",
- phy->cur_cfg->pixclk);
- return fsl_samsung_hdmi_phy_configure(phy, phy->cur_cfg);
+ return fract_div_phy;
}
-static int phy_use_integer_div(struct fsl_samsung_hdmi_phy *phy,
- const struct phy_config *int_div_clk)
+static long fsl_samsung_hdmi_phy_clk_round_rate(struct clk_hw *hw,
+ unsigned long rate, unsigned long *parent_rate)
{
- phy->cur_cfg = &calculated_phy_pll_cfg;
- dev_dbg(phy->dev, "fsl_samsung_hdmi_phy: integer divider rate = %u\n",
- phy->cur_cfg->pixclk);
- return fsl_samsung_hdmi_phy_configure(phy, phy->cur_cfg);
+ struct fsl_samsung_hdmi_phy *phy = to_fsl_samsung_hdmi_phy(hw);
+ const struct phy_config *target_settings = fsl_samsung_hdmi_phy_find_settings(phy, rate);
+
+ if (target_settings == NULL)
+ return -EINVAL;
+
+ dev_dbg(phy->dev, "round_rate, closest rate = %u\n", target_settings->pixclk);
+ return target_settings->pixclk;
}
-static int phy_clk_set_rate(struct clk_hw *hw,
+static int fsl_samsung_hdmi_phy_clk_set_rate(struct clk_hw *hw,
unsigned long rate, unsigned long parent_rate)
{
struct fsl_samsung_hdmi_phy *phy = to_fsl_samsung_hdmi_phy(hw);
- const struct phy_config *fract_div_phy;
- u32 int_div_clk;
- u16 m;
- u8 p, s;
+ const struct phy_config *target_settings = fsl_samsung_hdmi_phy_find_settings(phy, rate);
- /* Search the fractional divider lookup table */
- fract_div_phy = fsl_samsung_hdmi_phy_lookup_rate(rate);
-
- /* If the rate is an exact match, use that value */
- if (fract_div_phy->pixclk == rate)
- return phy_use_fract_div(phy, fract_div_phy);
+ if (target_settings == NULL)
+ return -EINVAL;
- /*
- * If the rate from the fractional divider is not exact, check the integer divider,
- * and use it if that value is an exact match.
- */
- int_div_clk = fsl_samsung_hdmi_phy_find_pms(rate, &p, &m, &s);
- fsl_samsung_hdmi_calculate_phy(&calculated_phy_pll_cfg, int_div_clk, p, m, s);
- if (int_div_clk == rate)
- return phy_use_integer_div(phy, &calculated_phy_pll_cfg);
+ dev_dbg(phy->dev, "set_rate, closest rate = %u\n", target_settings->pixclk);
- /*
- * Compare the difference between the integer clock and the fractional clock against
- * the desired clock and which whichever is closest.
- */
- if (fsl_samsung_hdmi_phy_get_closest_rate(rate, int_div_clk,
- fract_div_phy->pixclk) == fract_div_phy->pixclk)
- return phy_use_fract_div(phy, fract_div_phy);
- else
- return phy_use_integer_div(phy, &calculated_phy_pll_cfg);
+ return fsl_samsung_hdmi_phy_configure(phy, target_settings);
}
static const struct clk_ops phy_clk_ops = {
.recalc_rate = phy_clk_recalc_rate,
- .round_rate = phy_clk_round_rate,
- .set_rate = phy_clk_set_rate,
+ .round_rate = fsl_samsung_hdmi_phy_clk_round_rate,
+ .set_rate = fsl_samsung_hdmi_phy_clk_set_rate,
};
static int phy_clk_register(struct fsl_samsung_hdmi_phy *phy)
diff --git a/drivers/phy/marvell/Kconfig b/drivers/phy/marvell/Kconfig
index bdb87c976243..bccd72dccb77 100644
--- a/drivers/phy/marvell/Kconfig
+++ b/drivers/phy/marvell/Kconfig
@@ -29,7 +29,7 @@ config PHY_MVEBU_A3700_COMPHY
depends on ARCH_MVEBU || COMPILE_TEST
depends on OF
depends on HAVE_ARM_SMCCC
- default y
+ default ARCH_MVEBU
select GENERIC_PHY
help
This driver allows to control the comphy, a hardware block providing
@@ -40,7 +40,7 @@ config PHY_MVEBU_A3700_UTMI
tristate "Marvell A3700 UTMI driver"
depends on ARCH_MVEBU || COMPILE_TEST
depends on OF
- default y
+ default ARCH_MVEBU
select GENERIC_PHY
help
Enable this to support Marvell A3700 UTMI PHY driver.
diff --git a/drivers/phy/mediatek/phy-mtk-xsphy.c b/drivers/phy/mediatek/phy-mtk-xsphy.c
index 7c248f5cfca5..c0ddb9273cc3 100644
--- a/drivers/phy/mediatek/phy-mtk-xsphy.c
+++ b/drivers/phy/mediatek/phy-mtk-xsphy.c
@@ -11,10 +11,12 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
+#include <linux/regmap.h>
#include "phy-mtk-io.h"
@@ -81,12 +83,22 @@
#define XSP_SR_COEF_DIVISOR 1000
#define XSP_FM_DET_CYCLE_CNT 1024
+/* PHY switch between pcie/usb3/sgmii */
+#define USB_PHY_SWITCH_CTRL 0x0
+#define RG_PHY_SW_TYPE GENMASK(3, 0)
+#define RG_PHY_SW_PCIE 0x0
+#define RG_PHY_SW_USB3 0x1
+#define RG_PHY_SW_SGMII 0x2
+
struct xsphy_instance {
struct phy *phy;
void __iomem *port_base;
struct clk *ref_clk; /* reference clock of anolog phy */
u32 index;
u32 type;
+ struct regmap *type_sw;
+ u32 type_sw_reg;
+ u32 type_sw_index;
/* only for HQA test */
int efuse_intr;
int efuse_tx_imp;
@@ -259,6 +271,10 @@ static void phy_parse_property(struct mtk_xsphy *xsphy,
inst->efuse_intr, inst->efuse_tx_imp,
inst->efuse_rx_imp);
break;
+ case PHY_TYPE_PCIE:
+ case PHY_TYPE_SGMII:
+ /* nothing to do */
+ break;
default:
dev_err(xsphy->dev, "incompatible phy type\n");
return;
@@ -305,6 +321,62 @@ static void u3_phy_props_set(struct mtk_xsphy *xsphy,
RG_XTP_LN0_RX_IMPSEL, inst->efuse_rx_imp);
}
+/* type switch for usb3/pcie/sgmii */
+static int phy_type_syscon_get(struct xsphy_instance *instance,
+ struct device_node *dn)
+{
+ struct of_phandle_args args;
+ int ret;
+
+ /* type switch function is optional */
+ if (!of_property_present(dn, "mediatek,syscon-type"))
+ return 0;
+
+ ret = of_parse_phandle_with_fixed_args(dn, "mediatek,syscon-type",
+ 2, 0, &args);
+ if (ret)
+ return ret;
+
+ instance->type_sw_reg = args.args[0];
+ instance->type_sw_index = args.args[1] & 0x3; /* <=3 */
+ instance->type_sw = syscon_node_to_regmap(args.np);
+ of_node_put(args.np);
+ dev_info(&instance->phy->dev, "type_sw - reg %#x, index %d\n",
+ instance->type_sw_reg, instance->type_sw_index);
+
+ return PTR_ERR_OR_ZERO(instance->type_sw);
+}
+
+static int phy_type_set(struct xsphy_instance *instance)
+{
+ int type;
+ u32 offset;
+
+ if (!instance->type_sw)
+ return 0;
+
+ switch (instance->type) {
+ case PHY_TYPE_USB3:
+ type = RG_PHY_SW_USB3;
+ break;
+ case PHY_TYPE_PCIE:
+ type = RG_PHY_SW_PCIE;
+ break;
+ case PHY_TYPE_SGMII:
+ type = RG_PHY_SW_SGMII;
+ break;
+ case PHY_TYPE_USB2:
+ default:
+ return 0;
+ }
+
+ offset = instance->type_sw_index * BITS_PER_BYTE;
+ regmap_update_bits(instance->type_sw, instance->type_sw_reg,
+ RG_PHY_SW_TYPE << offset, type << offset);
+
+ return 0;
+}
+
static int mtk_phy_init(struct phy *phy)
{
struct xsphy_instance *inst = phy_get_drvdata(phy);
@@ -325,6 +397,10 @@ static int mtk_phy_init(struct phy *phy)
case PHY_TYPE_USB3:
u3_phy_props_set(xsphy, inst);
break;
+ case PHY_TYPE_PCIE:
+ case PHY_TYPE_SGMII:
+ /* nothing to do, only used to set type */
+ break;
default:
dev_err(xsphy->dev, "incompatible phy type\n");
clk_disable_unprepare(inst->ref_clk);
@@ -403,12 +479,15 @@ static struct phy *mtk_phy_xlate(struct device *dev,
inst->type = args->args[0];
if (!(inst->type == PHY_TYPE_USB2 ||
- inst->type == PHY_TYPE_USB3)) {
+ inst->type == PHY_TYPE_USB3 ||
+ inst->type == PHY_TYPE_PCIE ||
+ inst->type == PHY_TYPE_SGMII)) {
dev_err(dev, "unsupported phy type: %d\n", inst->type);
return ERR_PTR(-EINVAL);
}
phy_parse_property(xsphy, inst);
+ phy_type_set(inst);
return inst->phy;
}
@@ -510,6 +589,10 @@ static int mtk_xsphy_probe(struct platform_device *pdev)
dev_err(dev, "failed to get ref_clk(id-%d)\n", port);
return PTR_ERR(inst->ref_clk);
}
+
+ retval = phy_type_syscon_get(inst, child_np);
+ if (retval)
+ return retval;
}
provider = devm_of_phy_provider_register(dev, mtk_phy_xlate);
diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c
new file mode 100644
index 000000000000..b73a1d7e57b3
--- /dev/null
+++ b/drivers/phy/phy-snps-eusb2.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023, Linaro Limited
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/mod_devicetable.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+
+#define EXYNOS_USB_PHY_HS_PHY_CTRL_RST (0x0)
+#define USB_PHY_RST_MASK GENMASK(1, 0)
+#define UTMI_PORT_RST_MASK GENMASK(5, 4)
+
+#define EXYNOS_USB_PHY_HS_PHY_CTRL_COMMON (0x4)
+#define RPTR_MODE BIT(10)
+#define FSEL_20_MHZ_VAL (0x1)
+#define FSEL_24_MHZ_VAL (0x2)
+#define FSEL_26_MHZ_VAL (0x3)
+#define FSEL_48_MHZ_VAL (0x2)
+
+#define EXYNOS_USB_PHY_CFG_PLLCFG0 (0x8)
+#define PHY_CFG_PLL_FB_DIV_19_8_MASK GENMASK(19, 8)
+#define DIV_19_8_19_2_MHZ_VAL (0x170)
+#define DIV_19_8_20_MHZ_VAL (0x160)
+#define DIV_19_8_24_MHZ_VAL (0x120)
+#define DIV_19_8_26_MHZ_VAL (0x107)
+#define DIV_19_8_48_MHZ_VAL (0x120)
+
+#define EXYNOS_USB_PHY_CFG_PLLCFG1 (0xc)
+#define EXYNOS_PHY_CFG_PLL_FB_DIV_11_8_MASK GENMASK(11, 8)
+#define EXYNOS_DIV_11_8_19_2_MHZ_VAL (0x0)
+#define EXYNOS_DIV_11_8_20_MHZ_VAL (0x0)
+#define EXYNOS_DIV_11_8_24_MHZ_VAL (0x0)
+#define EXYNOS_DIV_11_8_26_MHZ_VAL (0x0)
+#define EXYNOS_DIV_11_8_48_MHZ_VAL (0x1)
+
+#define EXYNOS_PHY_CFG_TX (0x14)
+#define EXYNOS_PHY_CFG_TX_FSLS_VREF_TUNE_MASK GENMASK(2, 1)
+
+#define EXYNOS_USB_PHY_UTMI_TESTSE (0x20)
+#define TEST_IDDQ BIT(6)
+
+#define QCOM_USB_PHY_UTMI_CTRL0 (0x3c)
+#define SLEEPM BIT(0)
+#define OPMODE_MASK GENMASK(4, 3)
+#define OPMODE_NONDRIVING BIT(3)
+
+#define QCOM_USB_PHY_UTMI_CTRL5 (0x50)
+#define POR BIT(1)
+
+#define QCOM_USB_PHY_HS_PHY_CTRL_COMMON0 (0x54)
+#define PHY_ENABLE BIT(0)
+#define SIDDQ_SEL BIT(1)
+#define SIDDQ BIT(2)
+#define RETENABLEN BIT(3)
+#define FSEL_MASK GENMASK(6, 4)
+#define FSEL_19_2_MHZ_VAL (0x0)
+#define FSEL_38_4_MHZ_VAL (0x4)
+
+#define QCOM_USB_PHY_CFG_CTRL_1 (0x58)
+#define PHY_CFG_PLL_CPBIAS_CNTRL_MASK GENMASK(7, 1)
+
+#define QCOM_USB_PHY_CFG_CTRL_2 (0x5c)
+#define PHY_CFG_PLL_FB_DIV_7_0_MASK GENMASK(7, 0)
+#define DIV_7_0_19_2_MHZ_VAL (0x90)
+#define DIV_7_0_38_4_MHZ_VAL (0xc8)
+
+#define QCOM_USB_PHY_CFG_CTRL_3 (0x60)
+#define PHY_CFG_PLL_FB_DIV_11_8_MASK GENMASK(3, 0)
+#define DIV_11_8_19_2_MHZ_VAL (0x1)
+#define DIV_11_8_38_4_MHZ_VAL (0x0)
+
+#define PHY_CFG_PLL_REF_DIV GENMASK(7, 4)
+#define PLL_REF_DIV_VAL (0x0)
+
+#define QCOM_USB_PHY_HS_PHY_CTRL2 (0x64)
+#define VBUSVLDEXT0 BIT(0)
+#define USB2_SUSPEND_N BIT(2)
+#define USB2_SUSPEND_N_SEL BIT(3)
+#define VBUS_DET_EXT_SEL BIT(4)
+
+#define QCOM_USB_PHY_CFG_CTRL_4 (0x68)
+#define PHY_CFG_PLL_GMP_CNTRL_MASK GENMASK(1, 0)
+#define PHY_CFG_PLL_INT_CNTRL_MASK GENMASK(7, 2)
+
+#define QCOM_USB_PHY_CFG_CTRL_5 (0x6c)
+#define PHY_CFG_PLL_PROP_CNTRL_MASK GENMASK(4, 0)
+#define PHY_CFG_PLL_VREF_TUNE_MASK GENMASK(7, 6)
+
+#define QCOM_USB_PHY_CFG_CTRL_6 (0x70)
+#define PHY_CFG_PLL_VCO_CNTRL_MASK GENMASK(2, 0)
+
+#define QCOM_USB_PHY_CFG_CTRL_7 (0x74)
+
+#define QCOM_USB_PHY_CFG_CTRL_8 (0x78)
+#define PHY_CFG_TX_FSLS_VREF_TUNE_MASK GENMASK(1, 0)
+#define PHY_CFG_TX_FSLS_VREG_BYPASS BIT(2)
+#define PHY_CFG_TX_HS_VREF_TUNE_MASK GENMASK(5, 3)
+#define PHY_CFG_TX_HS_XV_TUNE_MASK GENMASK(7, 6)
+
+#define QCOM_USB_PHY_CFG_CTRL_9 (0x7c)
+#define PHY_CFG_TX_PREEMP_TUNE_MASK GENMASK(2, 0)
+#define PHY_CFG_TX_RES_TUNE_MASK GENMASK(4, 3)
+#define PHY_CFG_TX_RISE_TUNE_MASK GENMASK(6, 5)
+#define PHY_CFG_RCAL_BYPASS BIT(7)
+
+#define QCOM_USB_PHY_CFG_CTRL_10 (0x80)
+
+#define QCOM_USB_PHY_CFG0 (0x94)
+#define DATAPATH_CTRL_OVERRIDE_EN BIT(0)
+#define CMN_CTRL_OVERRIDE_EN BIT(1)
+
+#define QCOM_UTMI_PHY_CMN_CTRL0 (0x98)
+#define TESTBURNIN BIT(6)
+
+#define QCOM_USB_PHY_FSEL_SEL (0xb8)
+#define FSEL_SEL BIT(0)
+
+#define QCOM_USB_PHY_APB_ACCESS_CMD (0x130)
+#define RW_ACCESS BIT(0)
+#define APB_START_CMD BIT(1)
+#define APB_LOGIC_RESET BIT(2)
+
+#define QCOM_USB_PHY_APB_ACCESS_STATUS (0x134)
+#define ACCESS_DONE BIT(0)
+#define TIMED_OUT BIT(1)
+#define ACCESS_ERROR BIT(2)
+#define ACCESS_IN_PROGRESS BIT(3)
+
+#define QCOM_USB_PHY_APB_ADDRESS (0x138)
+#define APB_REG_ADDR_MASK GENMASK(7, 0)
+
+#define QCOM_USB_PHY_APB_WRDATA_LSB (0x13c)
+#define APB_REG_WRDATA_7_0_MASK GENMASK(3, 0)
+
+#define QCOM_USB_PHY_APB_WRDATA_MSB (0x140)
+#define APB_REG_WRDATA_15_8_MASK GENMASK(7, 4)
+
+#define QCOM_USB_PHY_APB_RDDATA_LSB (0x144)
+#define APB_REG_RDDATA_7_0_MASK GENMASK(3, 0)
+
+#define QCOM_USB_PHY_APB_RDDATA_MSB (0x148)
+#define APB_REG_RDDATA_15_8_MASK GENMASK(7, 4)
+
+static const char * const eusb2_hsphy_vreg_names[] = {
+ "vdd", "vdda12",
+};
+
+#define EUSB2_NUM_VREGS ARRAY_SIZE(eusb2_hsphy_vreg_names)
+
+struct snps_eusb2_phy_drvdata {
+ int (*phy_init)(struct phy *p);
+ const char * const *clk_names;
+ int num_clks;
+};
+
+struct snps_eusb2_hsphy {
+ struct phy *phy;
+ void __iomem *base;
+
+ struct clk *ref_clk;
+ struct clk_bulk_data *clks;
+ struct reset_control *phy_reset;
+
+ struct regulator_bulk_data vregs[EUSB2_NUM_VREGS];
+
+ enum phy_mode mode;
+
+ struct phy *repeater;
+
+ const struct snps_eusb2_phy_drvdata *data;
+};
+
+static int snps_eusb2_hsphy_set_mode(struct phy *p, enum phy_mode mode, int submode)
+{
+ struct snps_eusb2_hsphy *phy = phy_get_drvdata(p);
+
+ phy->mode = mode;
+
+ return phy_set_mode_ext(phy->repeater, mode, submode);
+}
+
+static void snps_eusb2_hsphy_write_mask(void __iomem *base, u32 offset,
+ u32 mask, u32 val)
+{
+ u32 reg;
+
+ reg = readl_relaxed(base + offset);
+ reg &= ~mask;
+ reg |= val & mask;
+ writel_relaxed(reg, base + offset);
+
+ /* Ensure above write is completed */
+ readl_relaxed(base + offset);
+}
+
+static void qcom_eusb2_default_parameters(struct snps_eusb2_hsphy *phy)
+{
+ /* default parameters: tx pre-emphasis */
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_9,
+ PHY_CFG_TX_PREEMP_TUNE_MASK,
+ FIELD_PREP(PHY_CFG_TX_PREEMP_TUNE_MASK, 0));
+
+ /* tx rise/fall time */
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_9,
+ PHY_CFG_TX_RISE_TUNE_MASK,
+ FIELD_PREP(PHY_CFG_TX_RISE_TUNE_MASK, 0x2));
+
+ /* source impedance adjustment */
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_9,
+ PHY_CFG_TX_RES_TUNE_MASK,
+ FIELD_PREP(PHY_CFG_TX_RES_TUNE_MASK, 0x1));
+
+ /* dc voltage level adjustement */
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_8,
+ PHY_CFG_TX_HS_VREF_TUNE_MASK,
+ FIELD_PREP(PHY_CFG_TX_HS_VREF_TUNE_MASK, 0x3));
+
+ /* transmitter HS crossover adjustement */
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_8,
+ PHY_CFG_TX_HS_XV_TUNE_MASK,
+ FIELD_PREP(PHY_CFG_TX_HS_XV_TUNE_MASK, 0x0));
+}
+
+struct snps_eusb2_ref_clk {
+ unsigned long freq;
+ u32 fsel_val;
+ u32 div_7_0_val;
+ u32 div_11_8_val;
+};
+
+static const struct snps_eusb2_ref_clk exynos_eusb2_ref_clk[] = {
+ { 19200000, FSEL_19_2_MHZ_VAL, DIV_19_8_19_2_MHZ_VAL, EXYNOS_DIV_11_8_19_2_MHZ_VAL },
+ { 20000000, FSEL_20_MHZ_VAL, DIV_19_8_20_MHZ_VAL, EXYNOS_DIV_11_8_20_MHZ_VAL },
+ { 24000000, FSEL_24_MHZ_VAL, DIV_19_8_24_MHZ_VAL, EXYNOS_DIV_11_8_24_MHZ_VAL },
+ { 26000000, FSEL_26_MHZ_VAL, DIV_19_8_26_MHZ_VAL, EXYNOS_DIV_11_8_26_MHZ_VAL },
+ { 48000000, FSEL_48_MHZ_VAL, DIV_19_8_48_MHZ_VAL, EXYNOS_DIV_11_8_48_MHZ_VAL },
+};
+
+static int exynos_eusb2_ref_clk_init(struct snps_eusb2_hsphy *phy)
+{
+ const struct snps_eusb2_ref_clk *config = NULL;
+ unsigned long ref_clk_freq = clk_get_rate(phy->ref_clk);
+
+ for (int i = 0; i < ARRAY_SIZE(exynos_eusb2_ref_clk); i++) {
+ if (exynos_eusb2_ref_clk[i].freq == ref_clk_freq) {
+ config = &exynos_eusb2_ref_clk[i];
+ break;
+ }
+ }
+
+ if (!config) {
+ dev_err(&phy->phy->dev, "unsupported ref_clk_freq:%lu\n", ref_clk_freq);
+ return -EINVAL;
+ }
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_HS_PHY_CTRL_COMMON,
+ FSEL_MASK,
+ FIELD_PREP(FSEL_MASK, config->fsel_val));
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_CFG_PLLCFG0,
+ PHY_CFG_PLL_FB_DIV_19_8_MASK,
+ FIELD_PREP(PHY_CFG_PLL_FB_DIV_19_8_MASK,
+ config->div_7_0_val));
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_CFG_PLLCFG1,
+ EXYNOS_PHY_CFG_PLL_FB_DIV_11_8_MASK,
+ config->div_11_8_val);
+ return 0;
+}
+
+static const struct snps_eusb2_ref_clk qcom_eusb2_ref_clk[] = {
+ { 19200000, FSEL_19_2_MHZ_VAL, DIV_7_0_19_2_MHZ_VAL, DIV_11_8_19_2_MHZ_VAL },
+ { 38400000, FSEL_38_4_MHZ_VAL, DIV_7_0_38_4_MHZ_VAL, DIV_11_8_38_4_MHZ_VAL },
+};
+
+static int qcom_eusb2_ref_clk_init(struct snps_eusb2_hsphy *phy)
+{
+ const struct snps_eusb2_ref_clk *config = NULL;
+ unsigned long ref_clk_freq = clk_get_rate(phy->ref_clk);
+
+ for (int i = 0; i < ARRAY_SIZE(qcom_eusb2_ref_clk); i++) {
+ if (qcom_eusb2_ref_clk[i].freq == ref_clk_freq) {
+ config = &qcom_eusb2_ref_clk[i];
+ break;
+ }
+ }
+
+ if (!config) {
+ dev_err(&phy->phy->dev, "unsupported ref_clk_freq:%lu\n", ref_clk_freq);
+ return -EINVAL;
+ }
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL_COMMON0,
+ FSEL_MASK,
+ FIELD_PREP(FSEL_MASK, config->fsel_val));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_2,
+ PHY_CFG_PLL_FB_DIV_7_0_MASK,
+ config->div_7_0_val);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_3,
+ PHY_CFG_PLL_FB_DIV_11_8_MASK,
+ config->div_11_8_val);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_3,
+ PHY_CFG_PLL_REF_DIV, PLL_REF_DIV_VAL);
+
+ return 0;
+}
+
+static int exynos_snps_eusb2_hsphy_init(struct phy *p)
+{
+ struct snps_eusb2_hsphy *phy = phy_get_drvdata(p);
+ int ret;
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_HS_PHY_CTRL_RST,
+ USB_PHY_RST_MASK | UTMI_PORT_RST_MASK,
+ USB_PHY_RST_MASK | UTMI_PORT_RST_MASK);
+ fsleep(50); /* required after holding phy in reset */
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_HS_PHY_CTRL_COMMON,
+ RPTR_MODE, RPTR_MODE);
+
+ /* update ref_clk related registers */
+ ret = exynos_eusb2_ref_clk_init(phy);
+ if (ret)
+ return ret;
+
+ /* default parameter: tx fsls-vref */
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_PHY_CFG_TX,
+ EXYNOS_PHY_CFG_TX_FSLS_VREF_TUNE_MASK,
+ FIELD_PREP(EXYNOS_PHY_CFG_TX_FSLS_VREF_TUNE_MASK, 0x0));
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_UTMI_TESTSE,
+ TEST_IDDQ, 0);
+ fsleep(10); /* required after releasing test_iddq */
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_HS_PHY_CTRL_RST,
+ USB_PHY_RST_MASK, 0);
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_HS_PHY_CTRL_COMMON,
+ PHY_ENABLE, PHY_ENABLE);
+
+ snps_eusb2_hsphy_write_mask(phy->base, EXYNOS_USB_PHY_HS_PHY_CTRL_RST,
+ UTMI_PORT_RST_MASK, 0);
+
+ return 0;
+}
+
+static const char * const exynos_eusb2_hsphy_clock_names[] = {
+ "ref", "bus", "ctrl",
+};
+
+static const struct snps_eusb2_phy_drvdata exynos2200_snps_eusb2_phy = {
+ .phy_init = exynos_snps_eusb2_hsphy_init,
+ .clk_names = exynos_eusb2_hsphy_clock_names,
+ .num_clks = ARRAY_SIZE(exynos_eusb2_hsphy_clock_names),
+};
+
+static int qcom_snps_eusb2_hsphy_init(struct phy *p)
+{
+ struct snps_eusb2_hsphy *phy = phy_get_drvdata(p);
+ int ret;
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG0,
+ CMN_CTRL_OVERRIDE_EN, CMN_CTRL_OVERRIDE_EN);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_UTMI_CTRL5, POR, POR);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL_COMMON0,
+ PHY_ENABLE | RETENABLEN, PHY_ENABLE | RETENABLEN);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_APB_ACCESS_CMD,
+ APB_LOGIC_RESET, APB_LOGIC_RESET);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_UTMI_PHY_CMN_CTRL0, TESTBURNIN, 0);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_FSEL_SEL,
+ FSEL_SEL, FSEL_SEL);
+
+ /* update ref_clk related registers */
+ ret = qcom_eusb2_ref_clk_init(phy);
+ if (ret)
+ return ret;
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_1,
+ PHY_CFG_PLL_CPBIAS_CNTRL_MASK,
+ FIELD_PREP(PHY_CFG_PLL_CPBIAS_CNTRL_MASK, 0x1));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_4,
+ PHY_CFG_PLL_INT_CNTRL_MASK,
+ FIELD_PREP(PHY_CFG_PLL_INT_CNTRL_MASK, 0x8));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_4,
+ PHY_CFG_PLL_GMP_CNTRL_MASK,
+ FIELD_PREP(PHY_CFG_PLL_GMP_CNTRL_MASK, 0x1));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_5,
+ PHY_CFG_PLL_PROP_CNTRL_MASK,
+ FIELD_PREP(PHY_CFG_PLL_PROP_CNTRL_MASK, 0x10));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_6,
+ PHY_CFG_PLL_VCO_CNTRL_MASK,
+ FIELD_PREP(PHY_CFG_PLL_VCO_CNTRL_MASK, 0x0));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_CFG_CTRL_5,
+ PHY_CFG_PLL_VREF_TUNE_MASK,
+ FIELD_PREP(PHY_CFG_PLL_VREF_TUNE_MASK, 0x1));
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL2,
+ VBUS_DET_EXT_SEL, VBUS_DET_EXT_SEL);
+
+ /* set default parameters */
+ qcom_eusb2_default_parameters(phy);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL2,
+ USB2_SUSPEND_N_SEL | USB2_SUSPEND_N,
+ USB2_SUSPEND_N_SEL | USB2_SUSPEND_N);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_UTMI_CTRL0, SLEEPM, SLEEPM);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL_COMMON0,
+ SIDDQ_SEL, SIDDQ_SEL);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL_COMMON0,
+ SIDDQ, 0);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_UTMI_CTRL5, POR, 0);
+
+ snps_eusb2_hsphy_write_mask(phy->base, QCOM_USB_PHY_HS_PHY_CTRL2,
+ USB2_SUSPEND_N_SEL, 0);
+
+ return 0;
+}
+
+static const char * const qcom_eusb2_hsphy_clock_names[] = {
+ "ref",
+};
+
+static const struct snps_eusb2_phy_drvdata sm8550_snps_eusb2_phy = {
+ .phy_init = qcom_snps_eusb2_hsphy_init,
+ .clk_names = qcom_eusb2_hsphy_clock_names,
+ .num_clks = ARRAY_SIZE(qcom_eusb2_hsphy_clock_names),
+};
+
+static int snps_eusb2_hsphy_init(struct phy *p)
+{
+ struct snps_eusb2_hsphy *phy = phy_get_drvdata(p);
+ int ret;
+
+ ret = regulator_bulk_enable(ARRAY_SIZE(phy->vregs), phy->vregs);
+ if (ret)
+ return ret;
+
+ ret = phy_init(phy->repeater);
+ if (ret) {
+ dev_err(&p->dev, "repeater init failed. %d\n", ret);
+ goto disable_vreg;
+ }
+
+ ret = clk_bulk_prepare_enable(phy->data->num_clks, phy->clks);
+ if (ret) {
+ dev_err(&p->dev, "failed to enable ref clock, %d\n", ret);
+ goto disable_vreg;
+ }
+
+ ret = reset_control_assert(phy->phy_reset);
+ if (ret) {
+ dev_err(&p->dev, "failed to assert phy_reset, %d\n", ret);
+ goto disable_ref_clk;
+ }
+
+ usleep_range(100, 150);
+
+ ret = reset_control_deassert(phy->phy_reset);
+ if (ret) {
+ dev_err(&p->dev, "failed to de-assert phy_reset, %d\n", ret);
+ goto disable_ref_clk;
+ }
+
+ ret = phy->data->phy_init(p);
+ if (ret)
+ goto disable_ref_clk;
+
+ return 0;
+
+disable_ref_clk:
+ clk_bulk_disable_unprepare(phy->data->num_clks, phy->clks);
+
+disable_vreg:
+ regulator_bulk_disable(ARRAY_SIZE(phy->vregs), phy->vregs);
+
+ return ret;
+}
+
+static int snps_eusb2_hsphy_exit(struct phy *p)
+{
+ struct snps_eusb2_hsphy *phy = phy_get_drvdata(p);
+
+ clk_disable_unprepare(phy->ref_clk);
+
+ regulator_bulk_disable(ARRAY_SIZE(phy->vregs), phy->vregs);
+
+ phy_exit(phy->repeater);
+
+ return 0;
+}
+
+static const struct phy_ops snps_eusb2_hsphy_ops = {
+ .init = snps_eusb2_hsphy_init,
+ .exit = snps_eusb2_hsphy_exit,
+ .set_mode = snps_eusb2_hsphy_set_mode,
+ .owner = THIS_MODULE,
+};
+
+static int snps_eusb2_hsphy_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct snps_eusb2_hsphy *phy;
+ struct phy_provider *phy_provider;
+ struct phy *generic_phy;
+ int ret, i;
+ int num;
+
+ phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
+ if (!phy)
+ return -ENOMEM;
+
+ phy->data = device_get_match_data(dev);
+ if (!phy->data)
+ return -EINVAL;
+
+ phy->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(phy->base))
+ return PTR_ERR(phy->base);
+
+ phy->phy_reset = devm_reset_control_get_optional_exclusive(dev, NULL);
+ if (IS_ERR(phy->phy_reset))
+ return PTR_ERR(phy->phy_reset);
+
+ phy->clks = devm_kcalloc(dev, phy->data->num_clks, sizeof(*phy->clks),
+ GFP_KERNEL);
+ if (!phy->clks)
+ return -ENOMEM;
+
+ for (int i = 0; i < phy->data->num_clks; ++i)
+ phy->clks[i].id = phy->data->clk_names[i];
+
+ ret = devm_clk_bulk_get(dev, phy->data->num_clks, phy->clks);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "failed to get phy clock(s)\n");
+
+ phy->ref_clk = NULL;
+ for (int i = 0; i < phy->data->num_clks; ++i) {
+ if (!strcmp(phy->clks[i].id, "ref")) {
+ phy->ref_clk = phy->clks[i].clk;
+ break;
+ }
+ }
+
+ if (IS_ERR_OR_NULL(phy->ref_clk))
+ return dev_err_probe(dev, PTR_ERR(phy->ref_clk),
+ "failed to get ref clk\n");
+
+ num = ARRAY_SIZE(phy->vregs);
+ for (i = 0; i < num; i++)
+ phy->vregs[i].supply = eusb2_hsphy_vreg_names[i];
+
+ ret = devm_regulator_bulk_get(dev, num, phy->vregs);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "failed to get regulator supplies\n");
+
+ phy->repeater = devm_of_phy_optional_get(dev, np, 0);
+ if (IS_ERR(phy->repeater))
+ return dev_err_probe(dev, PTR_ERR(phy->repeater),
+ "failed to get repeater\n");
+
+ generic_phy = devm_phy_create(dev, NULL, &snps_eusb2_hsphy_ops);
+ if (IS_ERR(generic_phy)) {
+ dev_err(dev, "failed to create phy %d\n", ret);
+ return PTR_ERR(generic_phy);
+ }
+
+ dev_set_drvdata(dev, phy);
+ phy_set_drvdata(generic_phy, phy);
+
+ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+ if (IS_ERR(phy_provider))
+ return PTR_ERR(phy_provider);
+
+ dev_info(dev, "Registered Snps-eUSB2 phy\n");
+
+ return 0;
+}
+
+static const struct of_device_id snps_eusb2_hsphy_of_match_table[] = {
+ {
+ .compatible = "qcom,sm8550-snps-eusb2-phy",
+ .data = &sm8550_snps_eusb2_phy,
+ }, {
+ .compatible = "samsung,exynos2200-eusb2-phy",
+ .data = &exynos2200_snps_eusb2_phy,
+ }, { },
+};
+MODULE_DEVICE_TABLE(of, snps_eusb2_hsphy_of_match_table);
+
+static struct platform_driver snps_eusb2_hsphy_driver = {
+ .probe = snps_eusb2_hsphy_probe,
+ .driver = {
+ .name = "snps-eusb2-hsphy",
+ .of_match_table = snps_eusb2_hsphy_of_match_table,
+ },
+};
+
+module_platform_driver(snps_eusb2_hsphy_driver);
+MODULE_DESCRIPTION("Synopsys eUSB2 HS PHY driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index c1e0a11ddd76..ef14f4e33973 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -125,15 +125,6 @@ config PHY_QCOM_QUSB2
PHY which is usually paired with either the ChipIdea or Synopsys DWC3
USB IPs on MSM SOCs.
-config PHY_QCOM_SNPS_EUSB2
- tristate "Qualcomm SNPS eUSB2 PHY Driver"
- depends on OF && (ARCH_QCOM || COMPILE_TEST)
- select GENERIC_PHY
- help
- Enable support for the USB high-speed SNPS eUSB2 phy on Qualcomm
- chipsets. The PHY is paired with a Synopsys DWC3 USB controller
- on Qualcomm SOCs.
-
config PHY_QCOM_EUSB2_REPEATER
tristate "Qualcomm SNPS eUSB2 Repeater Driver"
depends on OF && (ARCH_QCOM || COMPILE_TEST)
diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile
index 42038bc30974..3851e28a212d 100644
--- a/drivers/phy/qualcomm/Makefile
+++ b/drivers/phy/qualcomm/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_PHY_QCOM_QMP_USB) += phy-qcom-qmp-usb.o
obj-$(CONFIG_PHY_QCOM_QMP_USB_LEGACY) += phy-qcom-qmp-usb-legacy.o
obj-$(CONFIG_PHY_QCOM_QUSB2) += phy-qcom-qusb2.o
-obj-$(CONFIG_PHY_QCOM_SNPS_EUSB2) += phy-qcom-snps-eusb2.o
obj-$(CONFIG_PHY_QCOM_EUSB2_REPEATER) += phy-qcom-eusb2-repeater.o
obj-$(CONFIG_PHY_QCOM_UNIPHY_PCIE_28LP) += phy-qcom-uniphy-pcie-28lp.o
obj-$(CONFIG_PHY_QCOM_USB_HS) += phy-qcom-usb-hs.o
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index c232b8fe9846..461b9e0af610 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -3021,8 +3021,6 @@ struct qmp_phy_cfg {
bool skip_start_delay;
- bool has_nocsr_reset;
-
/* QMP PHY pipe clock interface rate */
unsigned long pipe_clock_rate;
@@ -3035,6 +3033,7 @@ struct qmp_pcie {
const struct qmp_phy_cfg *cfg;
bool tcsr_4ln_config;
+ bool skip_init;
void __iomem *serdes;
void __iomem *pcs;
@@ -4020,7 +4019,6 @@ static const struct qmp_phy_cfg sm8550_qmp_gen4x2_pciephy_cfg = {
.pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
.phy_status = PHYSTATUS_4_20,
- .has_nocsr_reset = true,
/* 20MHz PHY AUX Clock */
.aux_clock_rate = 20000000,
@@ -4053,7 +4051,6 @@ static const struct qmp_phy_cfg sm8650_qmp_gen4x2_pciephy_cfg = {
.pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
.phy_status = PHYSTATUS_4_20,
- .has_nocsr_reset = true,
/* 20MHz PHY AUX Clock */
.aux_clock_rate = 20000000,
@@ -4173,7 +4170,6 @@ static const struct qmp_phy_cfg x1e80100_qmp_gen4x2_pciephy_cfg = {
.pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
.phy_status = PHYSTATUS_4_20,
- .has_nocsr_reset = true,
};
static const struct qmp_phy_cfg x1e80100_qmp_gen4x4_pciephy_cfg = {
@@ -4207,7 +4203,6 @@ static const struct qmp_phy_cfg x1e80100_qmp_gen4x4_pciephy_cfg = {
.pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
.phy_status = PHYSTATUS_4_20,
- .has_nocsr_reset = true,
};
static const struct qmp_phy_cfg x1e80100_qmp_gen4x8_pciephy_cfg = {
@@ -4233,13 +4228,12 @@ static const struct qmp_phy_cfg x1e80100_qmp_gen4x8_pciephy_cfg = {
.reset_list = sdm845_pciephy_reset_l,
.num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l),
- .vreg_list = sm8550_qmp_phy_vreg_l,
- .num_vregs = ARRAY_SIZE(sm8550_qmp_phy_vreg_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
.regs = pciephy_v6_regs_layout,
.pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
.phy_status = PHYSTATUS_4_20,
- .has_nocsr_reset = true,
};
static const struct qmp_phy_cfg qmp_v6_gen4x4_pciephy_cfg = {
@@ -4337,18 +4331,38 @@ static int qmp_pcie_init(struct phy *phy)
{
struct qmp_pcie *qmp = phy_get_drvdata(phy);
const struct qmp_phy_cfg *cfg = qmp->cfg;
+ void __iomem *pcs = qmp->pcs;
+ bool phy_initialized = !!(readl(pcs + cfg->regs[QPHY_START_CTRL]));
int ret;
+ qmp->skip_init = qmp->nocsr_reset && phy_initialized;
+ /*
+ * We need to check the existence of init sequences in two cases:
+ * 1. The PHY doesn't support no_csr reset.
+ * 2. The PHY supports no_csr reset but isn't initialized by bootloader.
+ * As we can't skip init in these two cases.
+ */
+ if (!qmp->skip_init && !cfg->tbls.serdes_num) {
+ dev_err(qmp->dev, "Init sequence not available\n");
+ return -ENODATA;
+ }
+
ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
if (ret) {
dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
return ret;
}
- ret = reset_control_bulk_assert(cfg->num_resets, qmp->resets);
- if (ret) {
- dev_err(qmp->dev, "reset assert failed\n");
- goto err_disable_regulators;
+ /*
+ * Toggle BCR reset for PHY that doesn't support no_csr reset or has not
+ * been initialized.
+ */
+ if (!qmp->skip_init) {
+ ret = reset_control_bulk_assert(cfg->num_resets, qmp->resets);
+ if (ret) {
+ dev_err(qmp->dev, "reset assert failed\n");
+ goto err_disable_regulators;
+ }
}
ret = reset_control_assert(qmp->nocsr_reset);
@@ -4359,10 +4373,12 @@ static int qmp_pcie_init(struct phy *phy)
usleep_range(200, 300);
- ret = reset_control_bulk_deassert(cfg->num_resets, qmp->resets);
- if (ret) {
- dev_err(qmp->dev, "reset deassert failed\n");
- goto err_assert_reset;
+ if (!qmp->skip_init) {
+ ret = reset_control_bulk_deassert(cfg->num_resets, qmp->resets);
+ if (ret) {
+ dev_err(qmp->dev, "reset deassert failed\n");
+ goto err_assert_reset;
+ }
}
ret = clk_bulk_prepare_enable(ARRAY_SIZE(qmp_pciephy_clk_l), qmp->clks);
@@ -4372,7 +4388,8 @@ static int qmp_pcie_init(struct phy *phy)
return 0;
err_assert_reset:
- reset_control_bulk_assert(cfg->num_resets, qmp->resets);
+ if (!qmp->skip_init)
+ reset_control_bulk_assert(cfg->num_resets, qmp->resets);
err_disable_regulators:
regulator_bulk_disable(cfg->num_vregs, qmp->vregs);
@@ -4384,7 +4401,10 @@ static int qmp_pcie_exit(struct phy *phy)
struct qmp_pcie *qmp = phy_get_drvdata(phy);
const struct qmp_phy_cfg *cfg = qmp->cfg;
- reset_control_bulk_assert(cfg->num_resets, qmp->resets);
+ if (qmp->nocsr_reset)
+ reset_control_assert(qmp->nocsr_reset);
+ else
+ reset_control_bulk_assert(cfg->num_resets, qmp->resets);
clk_bulk_disable_unprepare(ARRAY_SIZE(qmp_pciephy_clk_l), qmp->clks);
@@ -4403,6 +4423,13 @@ static int qmp_pcie_power_on(struct phy *phy)
unsigned int mask, val;
int ret;
+ /*
+ * Write CSR register for PHY that doesn't support no_csr reset or has not
+ * been initialized.
+ */
+ if (qmp->skip_init)
+ goto skip_tbls_init;
+
qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
cfg->pwrdn_ctrl);
@@ -4414,6 +4441,7 @@ static int qmp_pcie_power_on(struct phy *phy)
qmp_pcie_init_registers(qmp, &cfg->tbls);
qmp_pcie_init_registers(qmp, mode_tbls);
+skip_tbls_init:
ret = clk_bulk_prepare_enable(qmp->num_pipe_clks, qmp->pipe_clks);
if (ret)
return ret;
@@ -4424,6 +4452,9 @@ static int qmp_pcie_power_on(struct phy *phy)
goto err_disable_pipe_clk;
}
+ if (qmp->skip_init)
+ goto skip_serdes_start;
+
/* Pull PHY out of reset state */
qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
@@ -4433,6 +4464,7 @@ static int qmp_pcie_power_on(struct phy *phy)
if (!cfg->skip_start_delay)
usleep_range(1000, 1200);
+skip_serdes_start:
status = pcs + cfg->regs[QPHY_PCS_STATUS];
mask = cfg->phy_status;
ret = readl_poll_timeout(status, val, !(val & mask), 200,
@@ -4457,6 +4489,15 @@ static int qmp_pcie_power_off(struct phy *phy)
clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks);
+ /*
+ * While powering off the PHY, only qmp->nocsr_reset needs to be checked. In
+ * this way, no matter whether the PHY settings were initially programmed by
+ * bootloader or PHY driver itself, we can reuse them when PHY is powered on
+ * next time.
+ */
+ if (qmp->nocsr_reset)
+ goto skip_phy_deinit;
+
/* PHY reset */
qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
@@ -4468,6 +4509,7 @@ static int qmp_pcie_power_off(struct phy *phy)
qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
cfg->pwrdn_ctrl);
+skip_phy_deinit:
return 0;
}
@@ -4557,12 +4599,10 @@ static int qmp_pcie_reset_init(struct qmp_pcie *qmp)
if (ret)
return dev_err_probe(dev, ret, "failed to get resets\n");
- if (cfg->has_nocsr_reset) {
- qmp->nocsr_reset = devm_reset_control_get_exclusive(dev, "phy_nocsr");
- if (IS_ERR(qmp->nocsr_reset))
- return dev_err_probe(dev, PTR_ERR(qmp->nocsr_reset),
- "failed to get no-csr reset\n");
- }
+ qmp->nocsr_reset = devm_reset_control_get_optional_exclusive(dev, "phy_nocsr");
+ if (IS_ERR(qmp->nocsr_reset))
+ return dev_err_probe(dev, PTR_ERR(qmp->nocsr_reset),
+ "failed to get no-csr reset\n");
return 0;
}
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 787721570457..ed646a7e705b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2106,12 +2106,16 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np,
int index, bool exclusive)
{
struct resource res;
+ void __iomem *mem;
if (!exclusive) {
if (of_address_to_resource(np, index, &res))
return IOMEM_ERR_PTR(-EINVAL);
- return devm_ioremap(dev, res.start, resource_size(&res));
+ mem = devm_ioremap(dev, res.start, resource_size(&res));
+ if (!mem)
+ return IOMEM_ERR_PTR(-ENOMEM);
+ return mem;
}
return devm_of_iomap(dev, np, index, NULL);
diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
index 1f5f7df14d5a..49c37c53b38e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
+++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
@@ -151,21 +151,6 @@ static const struct qusb2_phy_init_tbl ipq6018_init_tbl[] = {
QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9F),
};
-static const struct qusb2_phy_init_tbl ipq5424_init_tbl[] = {
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL, 0x14),
- QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0x00),
- QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0x53),
- QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xc3),
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30),
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79),
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21),
- QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE5, 0x00),
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00),
- QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14),
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TEST, 0x80),
- QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9f),
-};
-
static const struct qusb2_phy_init_tbl qcs615_init_tbl[] = {
QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0xc8),
QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0xb3),
@@ -359,16 +344,6 @@ static const struct qusb2_phy_cfg ipq6018_phy_cfg = {
.autoresume_en = BIT(0),
};
-static const struct qusb2_phy_cfg ipq5424_phy_cfg = {
- .tbl = ipq5424_init_tbl,
- .tbl_num = ARRAY_SIZE(ipq5424_init_tbl),
- .regs = ipq6018_regs_layout,
-
- .disable_ctrl = POWER_DOWN,
- .mask_core_ready = PLL_LOCKED,
- .autoresume_en = BIT(0),
-};
-
static const struct qusb2_phy_cfg qcs615_phy_cfg = {
.tbl = qcs615_init_tbl,
.tbl_num = ARRAY_SIZE(qcs615_init_tbl),
@@ -955,7 +930,7 @@ static const struct phy_ops qusb2_phy_gen_ops = {
static const struct of_device_id qusb2_phy_of_match_table[] = {
{
.compatible = "qcom,ipq5424-qusb2-phy",
- .data = &ipq5424_phy_cfg,
+ .data = &ipq6018_phy_cfg,
}, {
.compatible = "qcom,ipq6018-qusb2-phy",
.data = &ipq6018_phy_cfg,
diff --git a/drivers/phy/qualcomm/phy-qcom-snps-eusb2.c b/drivers/phy/qualcomm/phy-qcom-snps-eusb2.c
deleted file mode 100644
index 1484691a41d5..000000000000
--- a/drivers/phy/qualcomm/phy-qcom-snps-eusb2.c
+++ /dev/null
@@ -1,442 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2023, Linaro Limited
- */
-
-#include <linux/bitfield.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/iopoll.h>
-#include <linux/mod_devicetable.h>
-#include <linux/phy/phy.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/reset.h>
-
-#define USB_PHY_UTMI_CTRL0 (0x3c)
-#define SLEEPM BIT(0)
-#define OPMODE_MASK GENMASK(4, 3)
-#define OPMODE_NONDRIVING BIT(3)
-
-#define USB_PHY_UTMI_CTRL5 (0x50)
-#define POR BIT(1)
-
-#define USB_PHY_HS_PHY_CTRL_COMMON0 (0x54)
-#define PHY_ENABLE BIT(0)
-#define SIDDQ_SEL BIT(1)
-#define SIDDQ BIT(2)
-#define RETENABLEN BIT(3)
-#define FSEL_MASK GENMASK(6, 4)
-#define FSEL_19_2_MHZ_VAL (0x0)
-#define FSEL_38_4_MHZ_VAL (0x4)
-
-#define USB_PHY_CFG_CTRL_1 (0x58)
-#define PHY_CFG_PLL_CPBIAS_CNTRL_MASK GENMASK(7, 1)
-
-#define USB_PHY_CFG_CTRL_2 (0x5c)
-#define PHY_CFG_PLL_FB_DIV_7_0_MASK GENMASK(7, 0)
-#define DIV_7_0_19_2_MHZ_VAL (0x90)
-#define DIV_7_0_38_4_MHZ_VAL (0xc8)
-
-#define USB_PHY_CFG_CTRL_3 (0x60)
-#define PHY_CFG_PLL_FB_DIV_11_8_MASK GENMASK(3, 0)
-#define DIV_11_8_19_2_MHZ_VAL (0x1)
-#define DIV_11_8_38_4_MHZ_VAL (0x0)
-
-#define PHY_CFG_PLL_REF_DIV GENMASK(7, 4)
-#define PLL_REF_DIV_VAL (0x0)
-
-#define USB_PHY_HS_PHY_CTRL2 (0x64)
-#define VBUSVLDEXT0 BIT(0)
-#define USB2_SUSPEND_N BIT(2)
-#define USB2_SUSPEND_N_SEL BIT(3)
-#define VBUS_DET_EXT_SEL BIT(4)
-
-#define USB_PHY_CFG_CTRL_4 (0x68)
-#define PHY_CFG_PLL_GMP_CNTRL_MASK GENMASK(1, 0)
-#define PHY_CFG_PLL_INT_CNTRL_MASK GENMASK(7, 2)
-
-#define USB_PHY_CFG_CTRL_5 (0x6c)
-#define PHY_CFG_PLL_PROP_CNTRL_MASK GENMASK(4, 0)
-#define PHY_CFG_PLL_VREF_TUNE_MASK GENMASK(7, 6)
-
-#define USB_PHY_CFG_CTRL_6 (0x70)
-#define PHY_CFG_PLL_VCO_CNTRL_MASK GENMASK(2, 0)
-
-#define USB_PHY_CFG_CTRL_7 (0x74)
-
-#define USB_PHY_CFG_CTRL_8 (0x78)
-#define PHY_CFG_TX_FSLS_VREF_TUNE_MASK GENMASK(1, 0)
-#define PHY_CFG_TX_FSLS_VREG_BYPASS BIT(2)
-#define PHY_CFG_TX_HS_VREF_TUNE_MASK GENMASK(5, 3)
-#define PHY_CFG_TX_HS_XV_TUNE_MASK GENMASK(7, 6)
-
-#define USB_PHY_CFG_CTRL_9 (0x7c)
-#define PHY_CFG_TX_PREEMP_TUNE_MASK GENMASK(2, 0)
-#define PHY_CFG_TX_RES_TUNE_MASK GENMASK(4, 3)
-#define PHY_CFG_TX_RISE_TUNE_MASK GENMASK(6, 5)
-#define PHY_CFG_RCAL_BYPASS BIT(7)
-
-#define USB_PHY_CFG_CTRL_10 (0x80)
-
-#define USB_PHY_CFG0 (0x94)
-#define DATAPATH_CTRL_OVERRIDE_EN BIT(0)
-#define CMN_CTRL_OVERRIDE_EN BIT(1)
-
-#define UTMI_PHY_CMN_CTRL0 (0x98)
-#define TESTBURNIN BIT(6)
-
-#define USB_PHY_FSEL_SEL (0xb8)
-#define FSEL_SEL BIT(0)
-
-#define USB_PHY_APB_ACCESS_CMD (0x130)
-#define RW_ACCESS BIT(0)
-#define APB_START_CMD BIT(1)
-#define APB_LOGIC_RESET BIT(2)
-
-#define USB_PHY_APB_ACCESS_STATUS (0x134)
-#define ACCESS_DONE BIT(0)
-#define TIMED_OUT BIT(1)
-#define ACCESS_ERROR BIT(2)
-#define ACCESS_IN_PROGRESS BIT(3)
-
-#define USB_PHY_APB_ADDRESS (0x138)
-#define APB_REG_ADDR_MASK GENMASK(7, 0)
-
-#define USB_PHY_APB_WRDATA_LSB (0x13c)
-#define APB_REG_WRDATA_7_0_MASK GENMASK(3, 0)
-
-#define USB_PHY_APB_WRDATA_MSB (0x140)
-#define APB_REG_WRDATA_15_8_MASK GENMASK(7, 4)
-
-#define USB_PHY_APB_RDDATA_LSB (0x144)
-#define APB_REG_RDDATA_7_0_MASK GENMASK(3, 0)
-
-#define USB_PHY_APB_RDDATA_MSB (0x148)
-#define APB_REG_RDDATA_15_8_MASK GENMASK(7, 4)
-
-static const char * const eusb2_hsphy_vreg_names[] = {
- "vdd", "vdda12",
-};
-
-#define EUSB2_NUM_VREGS ARRAY_SIZE(eusb2_hsphy_vreg_names)
-
-struct qcom_snps_eusb2_hsphy {
- struct phy *phy;
- void __iomem *base;
-
- struct clk *ref_clk;
- struct reset_control *phy_reset;
-
- struct regulator_bulk_data vregs[EUSB2_NUM_VREGS];
-
- enum phy_mode mode;
-
- struct phy *repeater;
-};
-
-static int qcom_snps_eusb2_hsphy_set_mode(struct phy *p, enum phy_mode mode, int submode)
-{
- struct qcom_snps_eusb2_hsphy *phy = phy_get_drvdata(p);
-
- phy->mode = mode;
-
- return phy_set_mode_ext(phy->repeater, mode, submode);
-}
-
-static void qcom_snps_eusb2_hsphy_write_mask(void __iomem *base, u32 offset,
- u32 mask, u32 val)
-{
- u32 reg;
-
- reg = readl_relaxed(base + offset);
- reg &= ~mask;
- reg |= val & mask;
- writel_relaxed(reg, base + offset);
-
- /* Ensure above write is completed */
- readl_relaxed(base + offset);
-}
-
-static void qcom_eusb2_default_parameters(struct qcom_snps_eusb2_hsphy *phy)
-{
- /* default parameters: tx pre-emphasis */
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_9,
- PHY_CFG_TX_PREEMP_TUNE_MASK,
- FIELD_PREP(PHY_CFG_TX_PREEMP_TUNE_MASK, 0));
-
- /* tx rise/fall time */
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_9,
- PHY_CFG_TX_RISE_TUNE_MASK,
- FIELD_PREP(PHY_CFG_TX_RISE_TUNE_MASK, 0x2));
-
- /* source impedance adjustment */
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_9,
- PHY_CFG_TX_RES_TUNE_MASK,
- FIELD_PREP(PHY_CFG_TX_RES_TUNE_MASK, 0x1));
-
- /* dc voltage level adjustement */
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_8,
- PHY_CFG_TX_HS_VREF_TUNE_MASK,
- FIELD_PREP(PHY_CFG_TX_HS_VREF_TUNE_MASK, 0x3));
-
- /* transmitter HS crossover adjustement */
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_8,
- PHY_CFG_TX_HS_XV_TUNE_MASK,
- FIELD_PREP(PHY_CFG_TX_HS_XV_TUNE_MASK, 0x0));
-}
-
-static int qcom_eusb2_ref_clk_init(struct qcom_snps_eusb2_hsphy *phy)
-{
- unsigned long ref_clk_freq = clk_get_rate(phy->ref_clk);
-
- switch (ref_clk_freq) {
- case 19200000:
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL_COMMON0,
- FSEL_MASK,
- FIELD_PREP(FSEL_MASK, FSEL_19_2_MHZ_VAL));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_2,
- PHY_CFG_PLL_FB_DIV_7_0_MASK,
- DIV_7_0_19_2_MHZ_VAL);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_3,
- PHY_CFG_PLL_FB_DIV_11_8_MASK,
- DIV_11_8_19_2_MHZ_VAL);
- break;
-
- case 38400000:
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL_COMMON0,
- FSEL_MASK,
- FIELD_PREP(FSEL_MASK, FSEL_38_4_MHZ_VAL));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_2,
- PHY_CFG_PLL_FB_DIV_7_0_MASK,
- DIV_7_0_38_4_MHZ_VAL);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_3,
- PHY_CFG_PLL_FB_DIV_11_8_MASK,
- DIV_11_8_38_4_MHZ_VAL);
- break;
-
- default:
- dev_err(&phy->phy->dev, "unsupported ref_clk_freq:%lu\n", ref_clk_freq);
- return -EINVAL;
- }
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_3,
- PHY_CFG_PLL_REF_DIV, PLL_REF_DIV_VAL);
-
- return 0;
-}
-
-static int qcom_snps_eusb2_hsphy_init(struct phy *p)
-{
- struct qcom_snps_eusb2_hsphy *phy = phy_get_drvdata(p);
- int ret;
-
- ret = regulator_bulk_enable(ARRAY_SIZE(phy->vregs), phy->vregs);
- if (ret)
- return ret;
-
- ret = phy_init(phy->repeater);
- if (ret) {
- dev_err(&p->dev, "repeater init failed. %d\n", ret);
- goto disable_vreg;
- }
-
- ret = clk_prepare_enable(phy->ref_clk);
- if (ret) {
- dev_err(&p->dev, "failed to enable ref clock, %d\n", ret);
- goto disable_vreg;
- }
-
- ret = reset_control_assert(phy->phy_reset);
- if (ret) {
- dev_err(&p->dev, "failed to assert phy_reset, %d\n", ret);
- goto disable_ref_clk;
- }
-
- usleep_range(100, 150);
-
- ret = reset_control_deassert(phy->phy_reset);
- if (ret) {
- dev_err(&p->dev, "failed to de-assert phy_reset, %d\n", ret);
- goto disable_ref_clk;
- }
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG0,
- CMN_CTRL_OVERRIDE_EN, CMN_CTRL_OVERRIDE_EN);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_UTMI_CTRL5, POR, POR);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL_COMMON0,
- PHY_ENABLE | RETENABLEN, PHY_ENABLE | RETENABLEN);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_APB_ACCESS_CMD,
- APB_LOGIC_RESET, APB_LOGIC_RESET);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, UTMI_PHY_CMN_CTRL0, TESTBURNIN, 0);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_FSEL_SEL,
- FSEL_SEL, FSEL_SEL);
-
- /* update ref_clk related registers */
- ret = qcom_eusb2_ref_clk_init(phy);
- if (ret)
- goto disable_ref_clk;
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_1,
- PHY_CFG_PLL_CPBIAS_CNTRL_MASK,
- FIELD_PREP(PHY_CFG_PLL_CPBIAS_CNTRL_MASK, 0x1));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_4,
- PHY_CFG_PLL_INT_CNTRL_MASK,
- FIELD_PREP(PHY_CFG_PLL_INT_CNTRL_MASK, 0x8));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_4,
- PHY_CFG_PLL_GMP_CNTRL_MASK,
- FIELD_PREP(PHY_CFG_PLL_GMP_CNTRL_MASK, 0x1));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_5,
- PHY_CFG_PLL_PROP_CNTRL_MASK,
- FIELD_PREP(PHY_CFG_PLL_PROP_CNTRL_MASK, 0x10));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_6,
- PHY_CFG_PLL_VCO_CNTRL_MASK,
- FIELD_PREP(PHY_CFG_PLL_VCO_CNTRL_MASK, 0x0));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_CFG_CTRL_5,
- PHY_CFG_PLL_VREF_TUNE_MASK,
- FIELD_PREP(PHY_CFG_PLL_VREF_TUNE_MASK, 0x1));
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL2,
- VBUS_DET_EXT_SEL, VBUS_DET_EXT_SEL);
-
- /* set default parameters */
- qcom_eusb2_default_parameters(phy);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL2,
- USB2_SUSPEND_N_SEL | USB2_SUSPEND_N,
- USB2_SUSPEND_N_SEL | USB2_SUSPEND_N);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_UTMI_CTRL0, SLEEPM, SLEEPM);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL_COMMON0,
- SIDDQ_SEL, SIDDQ_SEL);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL_COMMON0,
- SIDDQ, 0);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_UTMI_CTRL5, POR, 0);
-
- qcom_snps_eusb2_hsphy_write_mask(phy->base, USB_PHY_HS_PHY_CTRL2,
- USB2_SUSPEND_N_SEL, 0);
-
- return 0;
-
-disable_ref_clk:
- clk_disable_unprepare(phy->ref_clk);
-
-disable_vreg:
- regulator_bulk_disable(ARRAY_SIZE(phy->vregs), phy->vregs);
-
- return ret;
-}
-
-static int qcom_snps_eusb2_hsphy_exit(struct phy *p)
-{
- struct qcom_snps_eusb2_hsphy *phy = phy_get_drvdata(p);
-
- clk_disable_unprepare(phy->ref_clk);
-
- regulator_bulk_disable(ARRAY_SIZE(phy->vregs), phy->vregs);
-
- phy_exit(phy->repeater);
-
- return 0;
-}
-
-static const struct phy_ops qcom_snps_eusb2_hsphy_ops = {
- .init = qcom_snps_eusb2_hsphy_init,
- .exit = qcom_snps_eusb2_hsphy_exit,
- .set_mode = qcom_snps_eusb2_hsphy_set_mode,
- .owner = THIS_MODULE,
-};
-
-static int qcom_snps_eusb2_hsphy_probe(struct platform_device *pdev)
-{
- struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
- struct qcom_snps_eusb2_hsphy *phy;
- struct phy_provider *phy_provider;
- struct phy *generic_phy;
- int ret, i;
- int num;
-
- phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
- if (!phy)
- return -ENOMEM;
-
- phy->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(phy->base))
- return PTR_ERR(phy->base);
-
- phy->phy_reset = devm_reset_control_get_exclusive(dev, NULL);
- if (IS_ERR(phy->phy_reset))
- return PTR_ERR(phy->phy_reset);
-
- phy->ref_clk = devm_clk_get(dev, "ref");
- if (IS_ERR(phy->ref_clk))
- return dev_err_probe(dev, PTR_ERR(phy->ref_clk),
- "failed to get ref clk\n");
-
- num = ARRAY_SIZE(phy->vregs);
- for (i = 0; i < num; i++)
- phy->vregs[i].supply = eusb2_hsphy_vreg_names[i];
-
- ret = devm_regulator_bulk_get(dev, num, phy->vregs);
- if (ret)
- return dev_err_probe(dev, ret,
- "failed to get regulator supplies\n");
-
- phy->repeater = devm_of_phy_get_by_index(dev, np, 0);
- if (IS_ERR(phy->repeater))
- return dev_err_probe(dev, PTR_ERR(phy->repeater),
- "failed to get repeater\n");
-
- generic_phy = devm_phy_create(dev, NULL, &qcom_snps_eusb2_hsphy_ops);
- if (IS_ERR(generic_phy)) {
- dev_err(dev, "failed to create phy %d\n", ret);
- return PTR_ERR(generic_phy);
- }
-
- dev_set_drvdata(dev, phy);
- phy_set_drvdata(generic_phy, phy);
-
- phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
- if (IS_ERR(phy_provider))
- return PTR_ERR(phy_provider);
-
- dev_info(dev, "Registered Qcom-eUSB2 phy\n");
-
- return 0;
-}
-
-static const struct of_device_id qcom_snps_eusb2_hsphy_of_match_table[] = {
- { .compatible = "qcom,sm8550-snps-eusb2-phy", },
- { },
-};
-MODULE_DEVICE_TABLE(of, qcom_snps_eusb2_hsphy_of_match_table);
-
-static struct platform_driver qcom_snps_eusb2_hsphy_driver = {
- .probe = qcom_snps_eusb2_hsphy_probe,
- .driver = {
- .name = "qcom-snps-eusb2-hsphy",
- .of_match_table = qcom_snps_eusb2_hsphy_of_match_table,
- },
-};
-
-module_platform_driver(qcom_snps_eusb2_hsphy_driver);
-MODULE_DESCRIPTION("Qualcomm SNPS eUSB2 HS PHY driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c b/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c
index c8b2a3818880..324c0a5d658e 100644
--- a/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c
+++ b/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c
@@ -75,6 +75,40 @@ struct qcom_uniphy_pcie {
#define phy_to_dw_phy(x) container_of((x), struct qca_uni_pcie_phy, phy)
+static const struct qcom_uniphy_pcie_regs ipq5018_regs[] = {
+ {
+ .offset = SSCG_CTRL_REG_4,
+ .val = 0x1cb9,
+ }, {
+ .offset = SSCG_CTRL_REG_5,
+ .val = 0x023a,
+ }, {
+ .offset = SSCG_CTRL_REG_3,
+ .val = 0xd360,
+ }, {
+ .offset = SSCG_CTRL_REG_1,
+ .val = 0x1,
+ }, {
+ .offset = SSCG_CTRL_REG_2,
+ .val = 0xeb,
+ }, {
+ .offset = CDR_CTRL_REG_4,
+ .val = 0x3f9,
+ }, {
+ .offset = CDR_CTRL_REG_5,
+ .val = 0x1c9,
+ }, {
+ .offset = CDR_CTRL_REG_2,
+ .val = 0x419,
+ }, {
+ .offset = CDR_CTRL_REG_1,
+ .val = 0x200,
+ }, {
+ .offset = PCS_INTERNAL_CONTROL_2,
+ .val = 0xf101,
+ },
+};
+
static const struct qcom_uniphy_pcie_regs ipq5332_regs[] = {
{
.offset = PHY_CFG_PLLCFG,
@@ -88,6 +122,14 @@ static const struct qcom_uniphy_pcie_regs ipq5332_regs[] = {
},
};
+static const struct qcom_uniphy_pcie_data ipq5018_data = {
+ .lane_offset = 0x800,
+ .phy_type = PHY_TYPE_PCIE_GEN2,
+ .init_seq = ipq5018_regs,
+ .init_seq_num = ARRAY_SIZE(ipq5018_regs),
+ .pipe_clk_rate = 125 * MEGA,
+};
+
static const struct qcom_uniphy_pcie_data ipq5332_data = {
.lane_offset = 0x800,
.phy_type = PHY_TYPE_PCIE_GEN3,
@@ -212,6 +254,9 @@ static inline int phy_pipe_clk_register(struct qcom_uniphy_pcie *phy, int id)
static const struct of_device_id qcom_uniphy_pcie_id_table[] = {
{
+ .compatible = "qcom,ipq5018-uniphy-pcie-phy",
+ .data = &ipq5018_data,
+ }, {
.compatible = "qcom,ipq5332-uniphy-pcie-phy",
.data = &ipq5332_data,
}, {
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index 9fdf17e0848a..47beb94cd424 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -29,8 +29,10 @@
#define USB2_INT_ENABLE 0x000
#define USB2_AHB_BUS_CTR 0x008
#define USB2_USBCTR 0x00c
+#define USB2_REGEN_CG_CTRL 0x104 /* RZ/V2H(P) only */
#define USB2_SPD_RSM_TIMSET 0x10c
#define USB2_OC_TIMSET 0x110
+#define USB2_UTMI_CTRL 0x118 /* RZ/V2H(P) only */
#define USB2_COMMCTRL 0x600
#define USB2_OBINTSTA 0x604
#define USB2_OBINTEN 0x608
@@ -51,12 +53,18 @@
#define USB2_USBCTR_DIRPD BIT(2)
#define USB2_USBCTR_PLL_RST BIT(1)
+/* REGEN_CG_CTRL*/
+#define USB2_REGEN_CG_CTRL_UPHY_WEN BIT(0)
+
/* SPD_RSM_TIMSET */
#define USB2_SPD_RSM_TIMSET_INIT 0x014e029b
/* OC_TIMSET */
#define USB2_OC_TIMSET_INIT 0x000209ab
+/* UTMI_CTRL */
+#define USB2_UTMI_CTRL_INIT 0x8000018f
+
/* COMMCTRL */
#define USB2_COMMCTRL_OTG_PERI BIT(31) /* 1 = Peripheral mode */
@@ -126,12 +134,14 @@ struct rcar_gen3_chan {
bool is_otg_channel;
bool uses_otg_pins;
bool soc_no_adp_ctrl;
+ bool utmi_ctrl;
};
struct rcar_gen3_phy_drv_data {
const struct phy_ops *phy_usb2_ops;
bool no_adp_ctrl;
bool init_bus;
+ bool utmi_ctrl;
};
/*
@@ -477,6 +487,14 @@ static int rcar_gen3_phy_usb2_init(struct phy *p)
if (rphy->int_enable_bits)
rcar_gen3_init_otg(channel);
+ if (channel->utmi_ctrl) {
+ val = readl(usb2_base + USB2_REGEN_CG_CTRL) | USB2_REGEN_CG_CTRL_UPHY_WEN;
+ writel(val, usb2_base + USB2_REGEN_CG_CTRL);
+
+ writel(USB2_UTMI_CTRL_INIT, usb2_base + USB2_UTMI_CTRL);
+ writel(val & ~USB2_REGEN_CG_CTRL_UPHY_WEN, usb2_base + USB2_REGEN_CG_CTRL);
+ }
+
rphy->initialized = true;
return 0;
@@ -592,6 +610,12 @@ static const struct rcar_gen3_phy_drv_data rz_g3s_phy_usb2_data = {
.init_bus = true,
};
+static const struct rcar_gen3_phy_drv_data rz_v2h_phy_usb2_data = {
+ .phy_usb2_ops = &rcar_gen3_phy_usb2_ops,
+ .no_adp_ctrl = true,
+ .utmi_ctrl = true,
+};
+
static const struct of_device_id rcar_gen3_phy_usb2_match_table[] = {
{
.compatible = "renesas,usb2-phy-r8a77470",
@@ -610,14 +634,18 @@ static const struct of_device_id rcar_gen3_phy_usb2_match_table[] = {
.data = &rcar_gen3_phy_usb2_data,
},
{
- .compatible = "renesas,rzg2l-usb2-phy",
- .data = &rz_g2l_phy_usb2_data,
- },
- {
.compatible = "renesas,usb2-phy-r9a08g045",
.data = &rz_g3s_phy_usb2_data,
},
{
+ .compatible = "renesas,usb2-phy-r9a09g057",
+ .data = &rz_v2h_phy_usb2_data,
+ },
+ {
+ .compatible = "renesas,rzg2l-usb2-phy",
+ .data = &rz_g2l_phy_usb2_data,
+ },
+ {
.compatible = "renesas,rcar-gen3-usb2-phy",
.data = &rcar_gen3_phy_usb2_data,
},
@@ -764,6 +792,8 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
if (phy_data->no_adp_ctrl)
channel->obint_enable_bits = USB2_OBINT_IDCHG_EN;
+ channel->utmi_ctrl = phy_data->utmi_ctrl;
+
spin_lock_init(&channel->lock);
for (i = 0; i < NUM_OF_PHYS; i++) {
channel->rphys[i].phy = devm_phy_create(dev, NULL,
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
index b5e6a864deeb..b0f23690ec30 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
@@ -1583,6 +1583,37 @@ static int rk3588_usb2phy_tuning(struct rockchip_usb2phy *rphy)
return ret;
}
+static const struct rockchip_usb2phy_cfg rk3036_phy_cfgs[] = {
+ {
+ .reg = 0x17c,
+ .num_ports = 2,
+ .phy_tuning = rk3128_usb2phy_tuning,
+ .clkout_ctl = { 0x017c, 11, 11, 1, 0 },
+ .port_cfgs = {
+ [USB2PHY_PORT_OTG] = {
+ .phy_sus = { 0x017c, 8, 0, 0, 0x1d1 },
+ .bvalid_det_en = { 0x017c, 14, 14, 0, 1 },
+ .bvalid_det_st = { 0x017c, 15, 15, 0, 1 },
+ .bvalid_det_clr = { 0x017c, 15, 15, 0, 1 },
+ .ls_det_en = { 0x017c, 12, 12, 0, 1 },
+ .ls_det_st = { 0x017c, 13, 13, 0, 1 },
+ .ls_det_clr = { 0x017c, 13, 13, 0, 1 },
+ .utmi_bvalid = { 0x014c, 8, 8, 0, 1 },
+ .utmi_id = { 0x014c, 11, 11, 0, 1 },
+ .utmi_ls = { 0x014c, 10, 9, 0, 1 },
+
+ },
+ [USB2PHY_PORT_HOST] = {
+ .phy_sus = { 0x0194, 8, 0, 0, 0x1d1 },
+ .ls_det_en = { 0x0194, 14, 14, 0, 1 },
+ .ls_det_st = { 0x0194, 15, 15, 0, 1 },
+ .ls_det_clr = { 0x0194, 15, 15, 0, 1 }
+ }
+ },
+ },
+ { /* sentinel */ }
+};
+
static const struct rockchip_usb2phy_cfg rk3128_phy_cfgs[] = {
{
.reg = 0x17c,
@@ -1892,6 +1923,54 @@ static const struct rockchip_usb2phy_cfg rk3399_phy_cfgs[] = {
{ /* sentinel */ }
};
+static const struct rockchip_usb2phy_cfg rk3562_phy_cfgs[] = {
+ {
+ .reg = 0xff740000,
+ .num_ports = 2,
+ .clkout_ctl = { 0x0108, 4, 4, 1, 0 },
+ .port_cfgs = {
+ [USB2PHY_PORT_OTG] = {
+ .phy_sus = { 0x0100, 8, 0, 0, 0x1d1 },
+ .bvalid_det_en = { 0x0110, 2, 2, 0, 1 },
+ .bvalid_det_st = { 0x0114, 2, 2, 0, 1 },
+ .bvalid_det_clr = { 0x0118, 2, 2, 0, 1 },
+ .idfall_det_en = { 0x0110, 5, 5, 0, 1 },
+ .idfall_det_st = { 0x0114, 5, 5, 0, 1 },
+ .idfall_det_clr = { 0x0118, 5, 5, 0, 1 },
+ .idrise_det_en = { 0x0110, 4, 4, 0, 1 },
+ .idrise_det_st = { 0x0114, 4, 4, 0, 1 },
+ .idrise_det_clr = { 0x0118, 4, 4, 0, 1 },
+ .ls_det_en = { 0x0110, 0, 0, 0, 1 },
+ .ls_det_st = { 0x0114, 0, 0, 0, 1 },
+ .ls_det_clr = { 0x0118, 0, 0, 0, 1 },
+ .utmi_avalid = { 0x0120, 10, 10, 0, 1 },
+ .utmi_bvalid = { 0x0120, 9, 9, 0, 1 },
+ .utmi_ls = { 0x0120, 5, 4, 0, 1 },
+ },
+ [USB2PHY_PORT_HOST] = {
+ .phy_sus = { 0x0104, 8, 0, 0x1d2, 0x1d1 },
+ .ls_det_en = { 0x0110, 1, 1, 0, 1 },
+ .ls_det_st = { 0x0114, 1, 1, 0, 1 },
+ .ls_det_clr = { 0x0118, 1, 1, 0, 1 },
+ .utmi_ls = { 0x0120, 17, 16, 0, 1 },
+ .utmi_hstdet = { 0x0120, 19, 19, 0, 1 }
+ }
+ },
+ .chg_det = {
+ .cp_det = { 0x0120, 24, 24, 0, 1 },
+ .dcp_det = { 0x0120, 23, 23, 0, 1 },
+ .dp_det = { 0x0120, 25, 25, 0, 1 },
+ .idm_sink_en = { 0x0108, 8, 8, 0, 1 },
+ .idp_sink_en = { 0x0108, 7, 7, 0, 1 },
+ .idp_src_en = { 0x0108, 9, 9, 0, 1 },
+ .rdm_pdwn_en = { 0x0108, 10, 10, 0, 1 },
+ .vdm_src_en = { 0x0108, 12, 12, 0, 1 },
+ .vdp_src_en = { 0x0108, 11, 11, 0, 1 },
+ },
+ },
+ { /* sentinel */ }
+};
+
static const struct rockchip_usb2phy_cfg rk3568_phy_cfgs[] = {
{
.reg = 0xfe8a0000,
@@ -2204,12 +2283,14 @@ static const struct rockchip_usb2phy_cfg rv1108_phy_cfgs[] = {
static const struct of_device_id rockchip_usb2phy_dt_match[] = {
{ .compatible = "rockchip,px30-usb2phy", .data = &rk3328_phy_cfgs },
+ { .compatible = "rockchip,rk3036-usb2phy", .data = &rk3036_phy_cfgs },
{ .compatible = "rockchip,rk3128-usb2phy", .data = &rk3128_phy_cfgs },
{ .compatible = "rockchip,rk3228-usb2phy", .data = &rk3228_phy_cfgs },
{ .compatible = "rockchip,rk3308-usb2phy", .data = &rk3308_phy_cfgs },
{ .compatible = "rockchip,rk3328-usb2phy", .data = &rk3328_phy_cfgs },
{ .compatible = "rockchip,rk3366-usb2phy", .data = &rk3366_phy_cfgs },
{ .compatible = "rockchip,rk3399-usb2phy", .data = &rk3399_phy_cfgs },
+ { .compatible = "rockchip,rk3562-usb2phy", .data = &rk3562_phy_cfgs },
{ .compatible = "rockchip,rk3568-usb2phy", .data = &rk3568_phy_cfgs },
{ .compatible = "rockchip,rk3576-usb2phy", .data = &rk3576_phy_cfgs },
{ .compatible = "rockchip,rk3588-usb2phy", .data = &rk3588_phy_cfgs },
diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
index 77236f012a1f..79db57ee90d1 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
@@ -320,6 +320,7 @@
#define LN3_TX_SER_RATE_SEL_HBR2_MASK BIT(3)
#define LN3_TX_SER_RATE_SEL_HBR3_MASK BIT(2)
+#define HDMI14_MAX_RATE 340000000
#define HDMI20_MAX_RATE 600000000
enum dp_link_rate {
@@ -328,39 +329,8 @@ enum dp_link_rate {
DP_BW_HBR2,
};
-struct lcpll_config {
- u32 bit_rate;
- u8 lcvco_mode_en;
- u8 pi_en;
- u8 clk_en_100m;
- u8 pms_mdiv;
- u8 pms_mdiv_afc;
- u8 pms_pdiv;
- u8 pms_refdiv;
- u8 pms_sdiv;
- u8 pi_cdiv_rstn;
- u8 pi_cdiv_sel;
- u8 sdm_en;
- u8 sdm_rstn;
- u8 sdc_frac_en;
- u8 sdc_rstn;
- u8 sdm_deno;
- u8 sdm_num_sign;
- u8 sdm_num;
- u8 sdc_n;
- u8 sdc_n2;
- u8 sdc_num;
- u8 sdc_deno;
- u8 sdc_ndiv_rstn;
- u8 ssc_en;
- u8 ssc_fm_dev;
- u8 ssc_fm_freq;
- u8 ssc_clk_div_sel;
- u8 cd_tx_ser_rate_sel;
-};
-
struct ropll_config {
- u32 bit_rate;
+ unsigned long long rate;
u8 pms_mdiv;
u8 pms_mdiv_afc;
u8 pms_pdiv;
@@ -422,19 +392,17 @@ struct rk_hdptx_phy {
struct regmap *regmap;
struct regmap *grf;
- /* PHY const config */
- const struct rk_hdptx_phy_cfg *cfgs;
int phy_id;
-
struct phy *phy;
- struct phy_config *phy_cfg;
+ struct phy_configure_opts_hdmi hdmi_cfg;
struct clk_bulk_data *clks;
int nr_clks;
struct reset_control_bulk_data rsts[RST_MAX];
/* clk provider */
struct clk_hw hw;
- unsigned long rate;
+ unsigned long hw_rate;
+ bool restrict_rate_change;
atomic_t usage_count;
@@ -444,47 +412,47 @@ struct rk_hdptx_phy {
};
static const struct ropll_config ropll_tmds_cfg[] = {
- { 5940000, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+ { 594000000ULL, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 3712500, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+ { 371250000ULL, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 2970000, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+ { 297000000ULL, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1620000, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10,
+ { 162000000ULL, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10,
1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1856250, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+ { 185625000ULL, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1540000, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1,
+ { 154000000ULL, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1485000, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5,
+ { 148500000ULL, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5,
0x10, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1462500, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1,
+ { 146250000ULL, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1,
1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1190000, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1,
+ { 119000000ULL, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1,
1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1065000, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1,
+ { 106500000ULL, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1,
1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 1080000, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
+ { 108000000ULL, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 855000, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1,
+ { 85500000ULL, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 835000, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0,
+ { 83500000ULL, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 928125, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+ { 92812500ULL, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 742500, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+ { 74250000ULL, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1,
+ { 65000000ULL, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 502500, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5,
+ { 50250000ULL, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5,
4, 11, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5,
+ { 33750000ULL, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5,
1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
+ { 40000000ULL, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 270000, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
+ { 27000000ULL, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
- { 251750, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1,
+ { 25175000ULL, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1,
1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
};
@@ -930,10 +898,10 @@ static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx)
regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
}
-static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate,
+static bool rk_hdptx_phy_clk_pll_calc(unsigned long long rate,
struct ropll_config *cfg)
{
- const unsigned int fout = data_rate / 2, fref = 24000;
+ const unsigned int fout = div_u64(rate, 200), fref = 24000;
unsigned long k = 0, lc, k_sub, lc_sub;
unsigned int fvco, sdc;
u32 mdiv, sdiv, n = 8;
@@ -1002,33 +970,34 @@ static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate,
return true;
}
-static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
- unsigned int rate)
+static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx)
{
const struct ropll_config *cfg = NULL;
struct ropll_config rc = {0};
- int i;
+ int ret, i;
- hdptx->rate = rate * 100;
+ if (!hdptx->hdmi_cfg.tmds_char_rate)
+ return 0;
for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
- if (rate == ropll_tmds_cfg[i].bit_rate) {
+ if (hdptx->hdmi_cfg.tmds_char_rate == ropll_tmds_cfg[i].rate) {
cfg = &ropll_tmds_cfg[i];
break;
}
if (!cfg) {
- if (rk_hdptx_phy_clk_pll_calc(rate, &rc)) {
- cfg = &rc;
- } else {
- dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__);
+ if (!rk_hdptx_phy_clk_pll_calc(hdptx->hdmi_cfg.tmds_char_rate, &rc)) {
+ dev_err(hdptx->dev, "%s cannot find pll cfg for rate=%llu\n",
+ __func__, hdptx->hdmi_cfg.tmds_char_rate);
return -EINVAL;
}
+
+ cfg = &rc;
}
- dev_dbg(hdptx->dev, "mdiv=%u, sdiv=%u, sdm_en=%u, k_sign=%u, k=%u, lc=%u\n",
- cfg->pms_mdiv, cfg->pms_sdiv + 1, cfg->sdm_en,
- cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno);
+ dev_dbg(hdptx->dev, "%s rate=%llu mdiv=%u sdiv=%u sdm_en=%u k_sign=%u k=%u lc=%u\n",
+ __func__, hdptx->hdmi_cfg.tmds_char_rate, cfg->pms_mdiv, cfg->pms_sdiv + 1,
+ cfg->sdm_en, cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno);
rk_hdptx_pre_power_up(hdptx);
@@ -1061,20 +1030,26 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK,
FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv));
+ regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_SEL_MASK,
+ FIELD_PREP(PLL_PCG_CLK_SEL_MASK, (hdptx->hdmi_cfg.bpc - 8) >> 1));
+
regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN_MASK,
FIELD_PREP(PLL_PCG_CLK_EN_MASK, 0x1));
- return rk_hdptx_post_enable_pll(hdptx);
+ ret = rk_hdptx_post_enable_pll(hdptx);
+ if (!ret)
+ hdptx->hw_rate = hdptx->hdmi_cfg.tmds_char_rate;
+
+ return ret;
}
-static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx,
- unsigned int rate)
+static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx)
{
rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq);
regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06);
- if (rate >= 3400000) {
+ if (hdptx->hdmi_cfg.tmds_char_rate > HDMI14_MAX_RATE) {
/* For 1/40 bitrate clk */
rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq);
} else {
@@ -1126,8 +1101,7 @@ static void rk_hdptx_dp_reset(struct rk_hdptx_phy *hdptx)
HDPTX_I_BGR_EN << 16 | FIELD_PREP(HDPTX_I_BGR_EN, 0x0));
}
-static int rk_hdptx_phy_consumer_get(struct rk_hdptx_phy *hdptx,
- unsigned int rate)
+static int rk_hdptx_phy_consumer_get(struct rk_hdptx_phy *hdptx)
{
enum phy_mode mode = phy_get_mode(hdptx->phy);
u32 status;
@@ -1146,11 +1120,9 @@ static int rk_hdptx_phy_consumer_get(struct rk_hdptx_phy *hdptx,
if (mode == PHY_MODE_DP) {
rk_hdptx_dp_reset(hdptx);
} else {
- if (rate) {
- ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate);
- if (ret)
- goto dec_usage;
- }
+ ret = rk_hdptx_ropll_tmds_cmn_config(hdptx);
+ if (ret)
+ goto dec_usage;
}
return 0;
@@ -1445,21 +1417,26 @@ static int rk_hdptx_dp_aux_init(struct rk_hdptx_phy *hdptx)
static int rk_hdptx_phy_power_on(struct phy *phy)
{
struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
- int bus_width = phy_get_bus_width(hdptx->phy);
enum phy_mode mode = phy_get_mode(phy);
int ret, lane;
- /*
- * FIXME: Temporary workaround to pass pixel_clk_rate
- * from the HDMI bridge driver until phy_configure_opts_hdmi
- * becomes available in the PHY API.
- */
- unsigned int rate = bus_width & 0xfffffff;
+ if (mode != PHY_MODE_DP) {
+ if (!hdptx->hdmi_cfg.tmds_char_rate) {
+ /*
+ * FIXME: Temporary workaround to setup TMDS char rate
+ * from the RK DW HDMI QP bridge driver.
+ * Will be removed as soon the switch to the HDMI PHY
+ * configuration API has been completed on both ends.
+ */
+ hdptx->hdmi_cfg.tmds_char_rate = phy_get_bus_width(hdptx->phy) & 0xfffffff;
+ hdptx->hdmi_cfg.tmds_char_rate *= 100;
+ }
- dev_dbg(hdptx->dev, "%s bus_width=%x rate=%u\n",
- __func__, bus_width, rate);
+ dev_dbg(hdptx->dev, "%s rate=%llu bpc=%u\n", __func__,
+ hdptx->hdmi_cfg.tmds_char_rate, hdptx->hdmi_cfg.bpc);
+ }
- ret = rk_hdptx_phy_consumer_get(hdptx, rate);
+ ret = rk_hdptx_phy_consumer_get(hdptx);
if (ret)
return ret;
@@ -1490,7 +1467,7 @@ static int rk_hdptx_phy_power_on(struct phy *phy)
regmap_write(hdptx->grf, GRF_HDPTX_CON0,
HDPTX_MODE_SEL << 16 | FIELD_PREP(HDPTX_MODE_SEL, 0x0));
- ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate);
+ ret = rk_hdptx_ropll_tmds_mode_config(hdptx);
if (ret)
rk_hdptx_phy_consumer_put(hdptx, true);
}
@@ -1505,8 +1482,40 @@ static int rk_hdptx_phy_power_off(struct phy *phy)
return rk_hdptx_phy_consumer_put(hdptx, false);
}
-static int rk_hdptx_phy_verify_config(struct rk_hdptx_phy *hdptx,
- struct phy_configure_opts_dp *dp)
+static int rk_hdptx_phy_verify_hdmi_config(struct rk_hdptx_phy *hdptx,
+ struct phy_configure_opts_hdmi *hdmi)
+{
+ int i;
+
+ if (!hdmi->tmds_char_rate || hdmi->tmds_char_rate > HDMI20_MAX_RATE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
+ if (hdmi->tmds_char_rate == ropll_tmds_cfg[i].rate)
+ break;
+
+ if (i == ARRAY_SIZE(ropll_tmds_cfg) &&
+ !rk_hdptx_phy_clk_pll_calc(hdmi->tmds_char_rate, NULL))
+ return -EINVAL;
+
+ if (!hdmi->bpc)
+ hdmi->bpc = 8;
+
+ switch (hdmi->bpc) {
+ case 8:
+ case 10:
+ case 12:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int rk_hdptx_phy_verify_dp_config(struct rk_hdptx_phy *hdptx,
+ struct phy_configure_opts_dp *dp)
{
int i;
@@ -1766,12 +1775,23 @@ static int rk_hdptx_phy_configure(struct phy *phy, union phy_configure_opts *opt
enum phy_mode mode = phy_get_mode(phy);
int ret;
- if (mode != PHY_MODE_DP)
- return 0;
+ if (mode != PHY_MODE_DP) {
+ ret = rk_hdptx_phy_verify_hdmi_config(hdptx, &opts->hdmi);
+ if (ret) {
+ dev_err(hdptx->dev, "invalid hdmi params for phy configure\n");
+ } else {
+ hdptx->hdmi_cfg = opts->hdmi;
+ hdptx->restrict_rate_change = true;
+ }
- ret = rk_hdptx_phy_verify_config(hdptx, &opts->dp);
+ dev_dbg(hdptx->dev, "%s rate=%llu bpc=%u\n", __func__,
+ hdptx->hdmi_cfg.tmds_char_rate, hdptx->hdmi_cfg.bpc);
+ return ret;
+ }
+
+ ret = rk_hdptx_phy_verify_dp_config(hdptx, &opts->dp);
if (ret) {
- dev_err(hdptx->dev, "invalid params for phy configure\n");
+ dev_err(hdptx->dev, "invalid dp params for phy configure\n");
return ret;
}
@@ -1803,10 +1823,22 @@ static int rk_hdptx_phy_configure(struct phy *phy, union phy_configure_opts *opt
return 0;
}
+static int rk_hdptx_phy_validate(struct phy *phy, enum phy_mode mode,
+ int submode, union phy_configure_opts *opts)
+{
+ struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
+
+ if (mode != PHY_MODE_DP)
+ return rk_hdptx_phy_verify_hdmi_config(hdptx, &opts->hdmi);
+
+ return rk_hdptx_phy_verify_dp_config(hdptx, &opts->dp);
+}
+
static const struct phy_ops rk_hdptx_phy_ops = {
.power_on = rk_hdptx_phy_power_on,
.power_off = rk_hdptx_phy_power_off,
.configure = rk_hdptx_phy_configure,
+ .validate = rk_hdptx_phy_validate,
.owner = THIS_MODULE,
};
@@ -1819,7 +1851,7 @@ static int rk_hdptx_phy_clk_prepare(struct clk_hw *hw)
{
struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
- return rk_hdptx_phy_consumer_get(hdptx, hdptx->rate / 100);
+ return rk_hdptx_phy_consumer_get(hdptx);
}
static void rk_hdptx_phy_clk_unprepare(struct clk_hw *hw)
@@ -1834,27 +1866,37 @@ static unsigned long rk_hdptx_phy_clk_recalc_rate(struct clk_hw *hw,
{
struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
- return hdptx->rate;
+ return hdptx->hw_rate;
}
static long rk_hdptx_phy_clk_round_rate(struct clk_hw *hw, unsigned long rate,
unsigned long *parent_rate)
{
- u32 bit_rate = rate / 100;
- int i;
+ struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
- if (rate > HDMI20_MAX_RATE)
- return rate;
+ /*
+ * FIXME: Temporarily allow altering TMDS char rate via CCF.
+ * To be dropped as soon as the RK DW HDMI QP bridge driver
+ * switches to make use of phy_configure().
+ */
+ if (!hdptx->restrict_rate_change && rate != hdptx->hdmi_cfg.tmds_char_rate) {
+ struct phy_configure_opts_hdmi hdmi = {
+ .tmds_char_rate = rate,
+ };
+ int ret = rk_hdptx_phy_verify_hdmi_config(hdptx, &hdmi);
- for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
- if (bit_rate == ropll_tmds_cfg[i].bit_rate)
- break;
+ if (ret)
+ return ret;
- if (i == ARRAY_SIZE(ropll_tmds_cfg) &&
- !rk_hdptx_phy_clk_pll_calc(bit_rate, NULL))
- return -EINVAL;
+ hdptx->hdmi_cfg = hdmi;
+ }
- return rate;
+ /*
+ * The TMDS char rate shall be adjusted via phy_configure() only,
+ * hence ensure rk_hdptx_phy_clk_set_rate() won't be invoked with
+ * a different rate argument.
+ */
+ return hdptx->hdmi_cfg.tmds_char_rate;
}
static int rk_hdptx_phy_clk_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -1862,7 +1904,21 @@ static int rk_hdptx_phy_clk_set_rate(struct clk_hw *hw, unsigned long rate,
{
struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
- return rk_hdptx_ropll_tmds_cmn_config(hdptx, rate / 100);
+ /* Revert any unlikely TMDS char rate change since round_rate() */
+ if (hdptx->hdmi_cfg.tmds_char_rate != rate) {
+ dev_warn(hdptx->dev, "Reverting unexpected rate change from %lu to %llu\n",
+ rate, hdptx->hdmi_cfg.tmds_char_rate);
+ hdptx->hdmi_cfg.tmds_char_rate = rate;
+ }
+
+ /*
+ * The TMDS char rate would be normally programmed in HW during
+ * phy_ops.power_on() or clk_ops.prepare() callbacks, but it might
+ * happen that the former gets fired too late, i.e. after this call,
+ * while the latter being executed only once, i.e. when clock remains
+ * in the prepared state during rate changes.
+ */
+ return rk_hdptx_ropll_tmds_cmn_config(hdptx);
}
static const struct clk_ops hdptx_phy_clk_ops = {
@@ -1925,6 +1981,7 @@ static int rk_hdptx_phy_runtime_resume(struct device *dev)
static int rk_hdptx_phy_probe(struct platform_device *pdev)
{
+ const struct rk_hdptx_phy_cfg *cfgs;
struct phy_provider *phy_provider;
struct device *dev = &pdev->dev;
struct rk_hdptx_phy *hdptx;
@@ -1937,20 +1994,21 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev)
return -ENOMEM;
hdptx->dev = dev;
+ hdptx->hdmi_cfg.bpc = 8;
regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(regs))
return dev_err_probe(dev, PTR_ERR(regs),
"Failed to ioremap resource\n");
- hdptx->cfgs = device_get_match_data(dev);
- if (!hdptx->cfgs)
+ cfgs = device_get_match_data(dev);
+ if (!cfgs)
return dev_err_probe(dev, -EINVAL, "missing match data\n");
/* find the phy-id from the io address */
hdptx->phy_id = -ENODEV;
- for (id = 0; id < hdptx->cfgs->num_phys; id++) {
- if (res->start == hdptx->cfgs->phy_ids[id]) {
+ for (id = 0; id < cfgs->num_phys; id++) {
+ if (res->start == cfgs->phy_ids[id]) {
hdptx->phy_id = id;
break;
}
diff --git a/drivers/phy/samsung/Kconfig b/drivers/phy/samsung/Kconfig
index 6566100441d6..b7ab402909a8 100644
--- a/drivers/phy/samsung/Kconfig
+++ b/drivers/phy/samsung/Kconfig
@@ -85,7 +85,7 @@ config PHY_EXYNOS5_USBDRD
depends on USB_DWC3_EXYNOS
select GENERIC_PHY
select MFD_SYSCON
- default y
+ default ARCH_EXYNOS
help
Enable USB DRD PHY support for Exynos 5 SoC series.
This driver provides PHY interface for USB 3.0 DRD controller
diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c
index 817fddee0392..917a76d584f0 100644
--- a/drivers/phy/samsung/phy-exynos5-usbdrd.c
+++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c
@@ -36,14 +36,40 @@
#define EXYNOS5_FSEL_26MHZ 0x6
#define EXYNOS5_FSEL_50MHZ 0x7
+/* USB 3.2 DRD 4nm PHY link controller registers */
+#define EXYNOS2200_DRD_CLKRST 0x0c
+#define EXYNOS2200_CLKRST_LINK_PCLK_SEL BIT(1)
+
+#define EXYNOS2200_DRD_UTMI 0x10
+#define EXYNOS2200_UTMI_FORCE_VBUSVALID BIT(1)
+#define EXYNOS2200_UTMI_FORCE_BVALID BIT(0)
+
+#define EXYNOS2200_DRD_HSP_MISC 0x114
+#define HSP_MISC_SET_REQ_IN2 BIT(4)
+#define HSP_MISC_RES_TUNE GENMASK(1, 0)
+#define RES_TUNE_PHY1_PHY2 0x1
+#define RES_TUNE_PHY1 0x2
+#define RES_TUNE_PHY2 0x3
+
/* Exynos5: USB 3.0 DRD PHY registers */
#define EXYNOS5_DRD_LINKSYSTEM 0x04
#define LINKSYSTEM_XHCI_VERSION_CONTROL BIT(27)
-#define LINKSYSTEM_FLADJ_MASK (0x3f << 1)
-#define LINKSYSTEM_FLADJ(_x) ((_x) << 1)
+#define LINKSYSTEM_FORCE_VBUSVALID BIT(8)
+#define LINKSYSTEM_FORCE_BVALID BIT(7)
+#define LINKSYSTEM_FLADJ GENMASK(6, 1)
#define EXYNOS5_DRD_PHYUTMI 0x08
+#define PHYUTMI_UTMI_SUSPEND_COM_N BIT(12)
+#define PHYUTMI_UTMI_L1_SUSPEND_COM_N BIT(11)
+#define PHYUTMI_VBUSVLDEXTSEL BIT(10)
+#define PHYUTMI_VBUSVLDEXT BIT(9)
+#define PHYUTMI_TXBITSTUFFENH BIT(8)
+#define PHYUTMI_TXBITSTUFFEN BIT(7)
#define PHYUTMI_OTGDISABLE BIT(6)
+#define PHYUTMI_IDPULLUP BIT(5)
+#define PHYUTMI_DRVVBUS BIT(4)
+#define PHYUTMI_DPPULLDOWN BIT(3)
+#define PHYUTMI_DMPULLDOWN BIT(2)
#define PHYUTMI_FORCESUSPEND BIT(1)
#define PHYUTMI_FORCESLEEP BIT(0)
@@ -51,30 +77,27 @@
#define EXYNOS5_DRD_PHYCLKRST 0x10
#define PHYCLKRST_EN_UTMISUSPEND BIT(31)
-#define PHYCLKRST_SSC_REFCLKSEL_MASK (0xff << 23)
-#define PHYCLKRST_SSC_REFCLKSEL(_x) ((_x) << 23)
-#define PHYCLKRST_SSC_RANGE_MASK (0x03 << 21)
-#define PHYCLKRST_SSC_RANGE(_x) ((_x) << 21)
+#define PHYCLKRST_SSC_REFCLKSEL GENMASK(30, 23)
+#define PHYCLKRST_SSC_RANGE GENMASK(22, 21)
#define PHYCLKRST_SSC_EN BIT(20)
#define PHYCLKRST_REF_SSP_EN BIT(19)
#define PHYCLKRST_REF_CLKDIV2 BIT(18)
-#define PHYCLKRST_MPLL_MULTIPLIER_MASK (0x7f << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF (0x19 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF (0x32 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF (0x68 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF (0x7d << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF (0x02 << 11)
-#define PHYCLKRST_FSEL_PIPE_MASK (0x7 << 8)
-#define PHYCLKRST_FSEL_UTMI_MASK (0x7 << 5)
-#define PHYCLKRST_FSEL(_x) ((_x) << 5)
-#define PHYCLKRST_FSEL_PAD_100MHZ (0x27 << 5)
-#define PHYCLKRST_FSEL_PAD_24MHZ (0x2a << 5)
-#define PHYCLKRST_FSEL_PAD_20MHZ (0x31 << 5)
-#define PHYCLKRST_FSEL_PAD_19_2MHZ (0x38 << 5)
+#define PHYCLKRST_MPLL_MULTIPLIER GENMASK(17, 11)
+#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF 0x19
+#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF 0x32
+#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF 0x68
+#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF 0x7d
+#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF 0x02
+#define PHYCLKRST_FSEL_PIPE GENMASK(10, 8)
+#define PHYCLKRST_FSEL_UTMI GENMASK(7, 5)
+#define PHYCLKRST_FSEL_PAD_100MHZ 0x27
+#define PHYCLKRST_FSEL_PAD_24MHZ 0x2a
+#define PHYCLKRST_FSEL_PAD_20MHZ 0x31
+#define PHYCLKRST_FSEL_PAD_19_2MHZ 0x38
#define PHYCLKRST_RETENABLEN BIT(4)
-#define PHYCLKRST_REFCLKSEL_MASK (0x03 << 2)
-#define PHYCLKRST_REFCLKSEL_PAD_REFCLK (0x2 << 2)
-#define PHYCLKRST_REFCLKSEL_EXT_REFCLK (0x3 << 2)
+#define PHYCLKRST_REFCLKSEL GENMASK(3, 2)
+#define PHYCLKRST_REFCLKSEL_PAD_REFCLK 0x2
+#define PHYCLKRST_REFCLKSEL_EXT_REFCLK 0x3
#define PHYCLKRST_PORTRESET BIT(1)
#define PHYCLKRST_COMMONONN BIT(0)
@@ -83,22 +106,32 @@
#define PHYREG0_SSC_RANGE BIT(20)
#define PHYREG0_CR_WRITE BIT(19)
#define PHYREG0_CR_READ BIT(18)
-#define PHYREG0_CR_DATA_IN(_x) ((_x) << 2)
+#define PHYREG0_CR_DATA_IN GENMASK(17, 2)
#define PHYREG0_CR_CAP_DATA BIT(1)
#define PHYREG0_CR_CAP_ADDR BIT(0)
#define EXYNOS5_DRD_PHYREG1 0x18
-#define PHYREG1_CR_DATA_OUT(_x) ((_x) << 1)
+#define PHYREG0_CR_DATA_OUT GENMASK(16, 1)
#define PHYREG1_CR_ACK BIT(0)
#define EXYNOS5_DRD_PHYPARAM0 0x1c
#define PHYPARAM0_REF_USE_PAD BIT(31)
-#define PHYPARAM0_REF_LOSLEVEL_MASK (0x1f << 26)
-#define PHYPARAM0_REF_LOSLEVEL (0x9 << 26)
+#define PHYPARAM0_REF_LOSLEVEL GENMASK(30, 26)
+#define PHYPARAM0_REF_LOSLEVEL_VAL 0x9
+#define PHYPARAM0_TXVREFTUNE GENMASK(25, 22)
+#define PHYPARAM0_TXRISETUNE GENMASK(21, 20)
+#define PHYPARAM0_TXRESTUNE GENMASK(19, 18)
+#define PHYPARAM0_TXPREEMPPULSETUNE BIT(17)
+#define PHYPARAM0_TXPREEMPAMPTUNE GENMASK(16, 15)
+#define PHYPARAM0_TXHSXVTUNE GENMASK(14, 13)
+#define PHYPARAM0_TXFSLSTUNE GENMASK(12, 9)
+#define PHYPARAM0_SQRXTUNE GENMASK(8, 6)
+#define PHYPARAM0_OTGTUNE GENMASK(5, 3)
+#define PHYPARAM0_COMPDISTUNE GENMASK(2, 0)
#define EXYNOS5_DRD_PHYPARAM1 0x20
-#define PHYPARAM1_PCS_TXDEEMPH_MASK (0x1f << 0)
-#define PHYPARAM1_PCS_TXDEEMPH (0x1c)
+#define PHYPARAM1_PCS_TXDEEMPH GENMASK(4, 0)
+#define PHYPARAM1_PCS_TXDEEMPH_VAL 0x1c
#define EXYNOS5_DRD_PHYTERM 0x24
@@ -114,6 +147,12 @@
#define EXYNOS5_DRD_PHYRESUME 0x34
#define EXYNOS5_DRD_LINKPORT 0x44
+#define LINKPORT_HOST_U3_PORT_DISABLE BIT(8)
+#define LINKPORT_HOST_U2_PORT_DISABLE BIT(7)
+#define LINKPORT_HOST_PORT_OVCR_U3 BIT(5)
+#define LINKPORT_HOST_PORT_OVCR_U2 BIT(4)
+#define LINKPORT_HOST_PORT_OVCR_U3_SEL BIT(3)
+#define LINKPORT_HOST_PORT_OVCR_U2_SEL BIT(2)
/* USB 3.0 DRD PHY SS Function Control Reg; accessed by CR_PORT */
#define EXYNOS5_DRD_PHYSS_LOSLEVEL_OVRD_IN (0x15)
@@ -134,13 +173,31 @@
#define LANE0_TX_DEBUG_RXDET_MEAS_TIME_62M5 (0x20 << 4)
#define LANE0_TX_DEBUG_RXDET_MEAS_TIME_96M_100M (0x40 << 4)
+/* Exynos7870: USB DRD PHY registers */
+#define EXYNOS7870_DRD_PHYPCSVAL 0x3C
+#define PHYPCSVAL_PCS_RX_LOS_MASK GENMASK(9, 0)
+
+#define EXYNOS7870_DRD_PHYPARAM2 0x50
+#define PHYPARAM2_TX_VBOOST_LVL GENMASK(6, 4)
+#define PHYPARAM2_LOS_BIAS GENMASK(2, 0)
+
+#define EXYNOS7870_DRD_HSPHYCTRL 0x54
+#define HSPHYCTRL_PHYSWRSTALL BIT(31)
+#define HSPHYCTRL_SIDDQ BIT(6)
+#define HSPHYCTRL_PHYSWRST BIT(0)
+
+#define EXYNOS7870_DRD_HSPHYPLLTUNE 0x70
+#define HSPHYPLLTUNE_PLL_B_TUNE BIT(6)
+#define HSPHYPLLTUNE_PLL_I_TUNE GENMASK(5, 4)
+#define HSPHYPLLTUNE_PLL_P_TUNE GENMASK(3, 0)
+
/* Exynos850: USB DRD PHY registers */
#define EXYNOS850_DRD_LINKCTRL 0x04
#define LINKCTRL_FORCE_RXELECIDLE BIT(18)
#define LINKCTRL_FORCE_PHYSTATUS BIT(17)
#define LINKCTRL_FORCE_PIPE_EN BIT(16)
#define LINKCTRL_FORCE_QACT BIT(8)
-#define LINKCTRL_BUS_FILTER_BYPASS(_x) ((_x) << 4)
+#define LINKCTRL_BUS_FILTER_BYPASS GENMASK(7, 4)
#define EXYNOS850_DRD_LINKPORT 0x08
#define LINKPORT_HOST_NUM_U3 GENMASK(19, 16)
@@ -389,6 +446,7 @@ struct exynos5_usbdrd_phy_drvdata {
* @clks: clocks for register access
* @core_clks: core clocks for phy (ref, pipe3, utmi+, ITP, etc. as required)
* @drv_data: pointer to SoC level driver data structure
+ * @hs_phy: pointer to non-Samsung IP high-speed phy controller
* @phy_mutex: mutex protecting phy_init/exit & TCPC callbacks
* @phys: array for 'EXYNOS5_DRDPHYS_NUM' number of PHY
* instances each with its 'phy' and 'phy_cfg'.
@@ -406,6 +464,7 @@ struct exynos5_usbdrd_phy {
struct clk_bulk_data *clks;
struct clk_bulk_data *core_clks;
const struct exynos5_usbdrd_phy_drvdata *drv_data;
+ struct phy *hs_phy;
struct mutex phy_mutex;
struct phy_usb_instance {
struct phy *phy;
@@ -497,29 +556,33 @@ exynos5_usbdrd_pipe3_set_refclk(struct phy_usb_instance *inst)
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
/* Use EXTREFCLK as ref clock */
- reg &= ~PHYCLKRST_REFCLKSEL_MASK;
- reg |= PHYCLKRST_REFCLKSEL_EXT_REFCLK;
+ reg &= ~PHYCLKRST_REFCLKSEL;
+ reg |= FIELD_PREP(PHYCLKRST_REFCLKSEL, PHYCLKRST_REFCLKSEL_EXT_REFCLK);
/* FSEL settings corresponding to reference clock */
- reg &= ~(PHYCLKRST_FSEL_PIPE_MASK |
- PHYCLKRST_MPLL_MULTIPLIER_MASK |
- PHYCLKRST_SSC_REFCLKSEL_MASK);
+ reg &= ~(PHYCLKRST_FSEL_PIPE |
+ PHYCLKRST_MPLL_MULTIPLIER |
+ PHYCLKRST_SSC_REFCLKSEL);
switch (phy_drd->extrefclk) {
case EXYNOS5_FSEL_50MHZ:
- reg |= (PHYCLKRST_MPLL_MULTIPLIER_50M_REF |
- PHYCLKRST_SSC_REFCLKSEL(0x00));
+ reg |= (FIELD_PREP(PHYCLKRST_SSC_REFCLKSEL, 0x00) |
+ FIELD_PREP(PHYCLKRST_MPLL_MULTIPLIER,
+ PHYCLKRST_MPLL_MULTIPLIER_50M_REF));
break;
case EXYNOS5_FSEL_24MHZ:
- reg |= (PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF |
- PHYCLKRST_SSC_REFCLKSEL(0x88));
+ reg |= (FIELD_PREP(PHYCLKRST_SSC_REFCLKSEL, 0x88) |
+ FIELD_PREP(PHYCLKRST_MPLL_MULTIPLIER,
+ PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF));
break;
case EXYNOS5_FSEL_20MHZ:
- reg |= (PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF |
- PHYCLKRST_SSC_REFCLKSEL(0x00));
+ reg |= (FIELD_PREP(PHYCLKRST_SSC_REFCLKSEL, 0x00) |
+ FIELD_PREP(PHYCLKRST_MPLL_MULTIPLIER,
+ PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF));
break;
case EXYNOS5_FSEL_19MHZ2:
- reg |= (PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF |
- PHYCLKRST_SSC_REFCLKSEL(0x88));
+ reg |= (FIELD_PREP(PHYCLKRST_SSC_REFCLKSEL, 0x88) |
+ FIELD_PREP(PHYCLKRST_MPLL_MULTIPLIER,
+ PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF));
break;
default:
dev_dbg(phy_drd->dev, "unsupported ref clk\n");
@@ -542,13 +605,13 @@ exynos5_usbdrd_utmi_set_refclk(struct phy_usb_instance *inst)
/* restore any previous reference clock settings */
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
- reg &= ~PHYCLKRST_REFCLKSEL_MASK;
- reg |= PHYCLKRST_REFCLKSEL_EXT_REFCLK;
+ reg &= ~PHYCLKRST_REFCLKSEL;
+ reg |= FIELD_PREP(PHYCLKRST_REFCLKSEL, PHYCLKRST_REFCLKSEL_EXT_REFCLK);
- reg &= ~(PHYCLKRST_FSEL_UTMI_MASK |
- PHYCLKRST_MPLL_MULTIPLIER_MASK |
- PHYCLKRST_SSC_REFCLKSEL_MASK);
- reg |= PHYCLKRST_FSEL(phy_drd->extrefclk);
+ reg &= ~(PHYCLKRST_FSEL_UTMI |
+ PHYCLKRST_MPLL_MULTIPLIER |
+ PHYCLKRST_SSC_REFCLKSEL);
+ reg |= FIELD_PREP(PHYCLKRST_FSEL_UTMI, phy_drd->extrefclk);
return reg;
}
@@ -598,8 +661,8 @@ static void exynos5_usbdrd_pipe3_init(struct exynos5_usbdrd_phy *phy_drd)
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
/* Set Tx De-Emphasis level */
- reg &= ~PHYPARAM1_PCS_TXDEEMPH_MASK;
- reg |= PHYPARAM1_PCS_TXDEEMPH;
+ reg &= ~PHYPARAM1_PCS_TXDEEMPH;
+ reg |= FIELD_PREP(PHYPARAM1_PCS_TXDEEMPH, PHYPARAM1_PCS_TXDEEMPH_VAL);
writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
@@ -749,14 +812,14 @@ static void exynos5_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd)
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
/* Set Loss-of-Signal Detector sensitivity */
- reg &= ~PHYPARAM0_REF_LOSLEVEL_MASK;
- reg |= PHYPARAM0_REF_LOSLEVEL;
+ reg &= ~PHYPARAM0_REF_LOSLEVEL;
+ reg |= FIELD_PREP(PHYPARAM0_REF_LOSLEVEL, PHYPARAM0_REF_LOSLEVEL_VAL);
writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
/* Set Tx De-Emphasis level */
- reg &= ~PHYPARAM1_PCS_TXDEEMPH_MASK;
- reg |= PHYPARAM1_PCS_TXDEEMPH;
+ reg &= ~PHYPARAM1_PCS_TXDEEMPH;
+ reg |= FIELD_PREP(PHYPARAM1_PCS_TXDEEMPH, PHYPARAM1_PCS_TXDEEMPH_VAL);
writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
/* UTMI Power Control */
@@ -787,7 +850,7 @@ static int exynos5_usbdrd_phy_init(struct phy *phy)
* See xHCI 1.0 spec, 5.2.4
*/
reg = LINKSYSTEM_XHCI_VERSION_CONTROL |
- LINKSYSTEM_FLADJ(0x20);
+ FIELD_PREP(LINKSYSTEM_FLADJ, 0x20);
writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_LINKSYSTEM);
reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
@@ -946,26 +1009,24 @@ static int crport_handshake(struct exynos5_usbdrd_phy *phy_drd,
static int crport_ctrl_write(struct exynos5_usbdrd_phy *phy_drd,
u32 addr, u32 data)
{
+ u32 val;
int ret;
/* Write Address */
- writel(PHYREG0_CR_DATA_IN(addr),
- phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
- ret = crport_handshake(phy_drd, PHYREG0_CR_DATA_IN(addr),
- PHYREG0_CR_CAP_ADDR);
+ val = FIELD_PREP(PHYREG0_CR_DATA_IN, addr);
+ writel(val, phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
+ ret = crport_handshake(phy_drd, val, PHYREG0_CR_CAP_ADDR);
if (ret)
return ret;
/* Write Data */
- writel(PHYREG0_CR_DATA_IN(data),
- phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
- ret = crport_handshake(phy_drd, PHYREG0_CR_DATA_IN(data),
- PHYREG0_CR_CAP_DATA);
+ val = FIELD_PREP(PHYREG0_CR_DATA_IN, data);
+ writel(val, phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
+ ret = crport_handshake(phy_drd, val, PHYREG0_CR_CAP_DATA);
if (ret)
return ret;
- ret = crport_handshake(phy_drd, PHYREG0_CR_DATA_IN(data),
- PHYREG0_CR_WRITE);
+ ret = crport_handshake(phy_drd, val, PHYREG0_CR_WRITE);
return ret;
}
@@ -1075,6 +1136,315 @@ static const struct phy_ops exynos5_usbdrd_phy_ops = {
.owner = THIS_MODULE,
};
+static void exynos7870_usbdrd_phy_isol(struct phy_usb_instance *inst,
+ bool isolate)
+{
+ unsigned int val;
+
+ if (!inst->reg_pmu)
+ return;
+
+ val = isolate ? 0 : EXYNOS7870_USB2PHY_ENABLE;
+
+ regmap_update_bits(inst->reg_pmu, inst->pmu_offset,
+ EXYNOS7870_USB2PHY_ENABLE, val);
+}
+
+static void exynos7870_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd)
+{
+ u32 reg;
+
+ reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+ /* Use PADREFCLK as ref clock */
+ reg &= ~PHYCLKRST_REFCLKSEL;
+ reg |= FIELD_PREP(PHYCLKRST_REFCLKSEL, PHYCLKRST_REFCLKSEL_PAD_REFCLK);
+ /* Select ref clock rate */
+ reg &= ~PHYCLKRST_FSEL_UTMI;
+ reg &= ~PHYCLKRST_FSEL_PIPE;
+ reg |= FIELD_PREP(PHYCLKRST_FSEL_UTMI, phy_drd->extrefclk);
+ /* Enable suspend and reset the port */
+ reg |= PHYCLKRST_EN_UTMISUSPEND;
+ reg |= PHYCLKRST_COMMONONN;
+ reg |= PHYCLKRST_PORTRESET;
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+ udelay(10);
+
+ /* Clear the port reset bit */
+ reg &= ~PHYCLKRST_PORTRESET;
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+
+ /* Change PHY PLL tune value */
+ reg = readl(phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYPLLTUNE);
+ if (phy_drd->extrefclk == EXYNOS5_FSEL_24MHZ)
+ reg |= HSPHYPLLTUNE_PLL_B_TUNE;
+ else
+ reg &= ~HSPHYPLLTUNE_PLL_B_TUNE;
+ reg &= ~HSPHYPLLTUNE_PLL_P_TUNE;
+ reg |= FIELD_PREP(HSPHYPLLTUNE_PLL_P_TUNE, 14);
+ writel(reg, phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYPLLTUNE);
+
+ /* High-Speed PHY control */
+ reg = readl(phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+ reg &= ~HSPHYCTRL_SIDDQ;
+ reg &= ~HSPHYCTRL_PHYSWRST;
+ reg &= ~HSPHYCTRL_PHYSWRSTALL;
+ writel(reg, phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+ udelay(500);
+
+ reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_LINKSYSTEM);
+ /*
+ * Setting the Frame length Adj value[6:1] to default 0x20
+ * See xHCI 1.0 spec, 5.2.4
+ */
+ reg |= LINKSYSTEM_XHCI_VERSION_CONTROL;
+ reg &= ~LINKSYSTEM_FLADJ;
+ reg |= FIELD_PREP(LINKSYSTEM_FLADJ, 0x20);
+ /* Set VBUSVALID signal as the VBUS pad is not used */
+ reg |= LINKSYSTEM_FORCE_BVALID;
+ reg |= LINKSYSTEM_FORCE_VBUSVALID;
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_LINKSYSTEM);
+
+ reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMI);
+ /* Release force_sleep & force_suspend */
+ reg &= ~PHYUTMI_FORCESLEEP;
+ reg &= ~PHYUTMI_FORCESUSPEND;
+ /* DP/DM pull down control */
+ reg &= ~PHYUTMI_DMPULLDOWN;
+ reg &= ~PHYUTMI_DPPULLDOWN;
+ reg &= ~PHYUTMI_DRVVBUS;
+ /* Set DP-pull up as the VBUS pad is not used */
+ reg |= PHYUTMI_VBUSVLDEXTSEL;
+ reg |= PHYUTMI_VBUSVLDEXT;
+ /* Disable OTG block and VBUS valid comparator */
+ reg |= PHYUTMI_OTGDISABLE;
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMI);
+
+ /* Configure OVC IO usage */
+ reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_LINKPORT);
+ reg |= LINKPORT_HOST_PORT_OVCR_U3_SEL | LINKPORT_HOST_PORT_OVCR_U2_SEL;
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_LINKPORT);
+
+ /* High-Speed PHY swrst */
+ reg = readl(phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+ reg |= HSPHYCTRL_PHYSWRST;
+ writel(reg, phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+ udelay(20);
+
+ /* Clear the PHY swrst bit */
+ reg = readl(phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+ reg &= ~HSPHYCTRL_PHYSWRST;
+ writel(reg, phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+
+ if (phy_drd->drv_data->phy_tunes)
+ exynos5_usbdrd_apply_phy_tunes(phy_drd,
+ PTS_UTMI_POSTINIT);
+}
+
+static int exynos7870_usbdrd_phy_init(struct phy *phy)
+{
+ struct phy_usb_instance *inst = phy_get_drvdata(phy);
+ struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+ int ret;
+
+ ret = clk_bulk_prepare_enable(phy_drd->drv_data->n_clks, phy_drd->clks);
+ if (ret)
+ return ret;
+
+ /* UTMI or PIPE3 specific init */
+ inst->phy_cfg->phy_init(phy_drd);
+
+ clk_bulk_disable_unprepare(phy_drd->drv_data->n_clks, phy_drd->clks);
+
+ return 0;
+}
+
+static int exynos7870_usbdrd_phy_exit(struct phy *phy)
+{
+ int ret;
+ u32 reg;
+ struct phy_usb_instance *inst = phy_get_drvdata(phy);
+ struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+ ret = clk_bulk_prepare_enable(phy_drd->drv_data->n_clks, phy_drd->clks);
+ if (ret)
+ return ret;
+
+ /*
+ * Disable the VBUS signal and the ID pull-up resistor.
+ * Enable force-suspend and force-sleep modes.
+ */
+ reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMI);
+ reg &= ~(PHYUTMI_DRVVBUS | PHYUTMI_VBUSVLDEXT | PHYUTMI_VBUSVLDEXTSEL);
+ reg &= ~PHYUTMI_IDPULLUP;
+ reg |= PHYUTMI_FORCESUSPEND | PHYUTMI_FORCESLEEP;
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMI);
+
+ /* Power down PHY analog blocks */
+ reg = readl(phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+ reg |= HSPHYCTRL_SIDDQ;
+ writel(reg, phy_drd->reg_phy + EXYNOS7870_DRD_HSPHYCTRL);
+
+ /* Clear VBUSVALID signal as the VBUS pad is not used */
+ reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_LINKSYSTEM);
+ reg &= ~(LINKSYSTEM_FORCE_BVALID | LINKSYSTEM_FORCE_VBUSVALID);
+ writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_LINKSYSTEM);
+
+ clk_bulk_disable_unprepare(phy_drd->drv_data->n_clks, phy_drd->clks);
+
+ return 0;
+}
+
+static const struct phy_ops exynos7870_usbdrd_phy_ops = {
+ .init = exynos7870_usbdrd_phy_init,
+ .exit = exynos7870_usbdrd_phy_exit,
+ .power_on = exynos5_usbdrd_phy_power_on,
+ .power_off = exynos5_usbdrd_phy_power_off,
+ .owner = THIS_MODULE,
+};
+
+static void exynos2200_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd)
+{
+ /* Configure non-Samsung IP PHY, responsible for UTMI */
+ phy_init(phy_drd->hs_phy);
+}
+
+static void exynos2200_usbdrd_link_init(struct exynos5_usbdrd_phy *phy_drd)
+{
+ void __iomem *regs_base = phy_drd->reg_phy;
+ u32 reg;
+
+ /*
+ * Disable HWACG (hardware auto clock gating control). This will force
+ * QACTIVE signal in Q-Channel interface to HIGH level, to make sure
+ * the PHY clock is not gated by the hardware.
+ */
+ reg = readl(regs_base + EXYNOS850_DRD_LINKCTRL);
+ reg |= LINKCTRL_FORCE_QACT;
+ writel(reg, regs_base + EXYNOS850_DRD_LINKCTRL);
+
+ /* De-assert link reset */
+ reg = readl(regs_base + EXYNOS2200_DRD_CLKRST);
+ reg &= ~CLKRST_LINK_SW_RST;
+ writel(reg, regs_base + EXYNOS2200_DRD_CLKRST);
+
+ /* Set link VBUS Valid */
+ reg = readl(regs_base + EXYNOS2200_DRD_UTMI);
+ reg |= EXYNOS2200_UTMI_FORCE_BVALID | EXYNOS2200_UTMI_FORCE_VBUSVALID;
+ writel(reg, regs_base + EXYNOS2200_DRD_UTMI);
+}
+
+static void
+exynos2200_usbdrd_link_attach_detach_pipe3_phy(struct phy_usb_instance *inst)
+{
+ struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+ void __iomem *regs_base = phy_drd->reg_phy;
+ u32 reg;
+
+ reg = readl(regs_base + EXYNOS850_DRD_LINKCTRL);
+ if (inst->phy_cfg->id == EXYNOS5_DRDPHY_UTMI) {
+ /* force pipe3 signal for link */
+ reg &= ~LINKCTRL_FORCE_PHYSTATUS;
+ reg |= LINKCTRL_FORCE_PIPE_EN | LINKCTRL_FORCE_RXELECIDLE;
+ } else {
+ /* disable forcing pipe interface */
+ reg &= ~LINKCTRL_FORCE_PIPE_EN;
+ }
+ writel(reg, regs_base + EXYNOS850_DRD_LINKCTRL);
+
+ reg = readl(regs_base + EXYNOS2200_DRD_HSP_MISC);
+ if (inst->phy_cfg->id == EXYNOS5_DRDPHY_UTMI) {
+ /* calibrate only eUSB phy */
+ reg |= FIELD_PREP(HSP_MISC_RES_TUNE, RES_TUNE_PHY1);
+ reg |= HSP_MISC_SET_REQ_IN2;
+ } else {
+ /* calibrate for dual phy */
+ reg |= FIELD_PREP(HSP_MISC_RES_TUNE, RES_TUNE_PHY1_PHY2);
+ reg &= ~HSP_MISC_SET_REQ_IN2;
+ }
+ writel(reg, regs_base + EXYNOS2200_DRD_HSP_MISC);
+
+ reg = readl(regs_base + EXYNOS2200_DRD_CLKRST);
+ if (inst->phy_cfg->id == EXYNOS5_DRDPHY_UTMI)
+ reg &= ~EXYNOS2200_CLKRST_LINK_PCLK_SEL;
+ else
+ reg |= EXYNOS2200_CLKRST_LINK_PCLK_SEL;
+
+ writel(reg, regs_base + EXYNOS2200_DRD_CLKRST);
+}
+
+static int exynos2200_usbdrd_phy_init(struct phy *phy)
+{
+ struct phy_usb_instance *inst = phy_get_drvdata(phy);
+ struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+ int ret;
+
+ if (inst->phy_cfg->id == EXYNOS5_DRDPHY_UTMI) {
+ /* Power-on PHY ... */
+ ret = regulator_bulk_enable(phy_drd->drv_data->n_regulators,
+ phy_drd->regulators);
+ if (ret) {
+ dev_err(phy_drd->dev,
+ "Failed to enable PHY regulator(s)\n");
+ return ret;
+ }
+ }
+ /*
+ * ... and ungate power via PMU. Without this here, we get an SError
+ * trying to access PMA registers
+ */
+ exynos5_usbdrd_phy_isol(inst, false);
+
+ ret = clk_bulk_prepare_enable(phy_drd->drv_data->n_clks, phy_drd->clks);
+ if (ret)
+ return ret;
+
+ /* Set up the link controller */
+ exynos2200_usbdrd_link_init(phy_drd);
+
+ /* UTMI or PIPE3 link preparation */
+ exynos2200_usbdrd_link_attach_detach_pipe3_phy(inst);
+
+ /* UTMI or PIPE3 specific init */
+ inst->phy_cfg->phy_init(phy_drd);
+
+ clk_bulk_disable_unprepare(phy_drd->drv_data->n_clks, phy_drd->clks);
+
+ return 0;
+}
+
+static int exynos2200_usbdrd_phy_exit(struct phy *phy)
+{
+ struct phy_usb_instance *inst = phy_get_drvdata(phy);
+ struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+ void __iomem *regs_base = phy_drd->reg_phy;
+ u32 reg;
+ int ret;
+
+ ret = clk_bulk_prepare_enable(phy_drd->drv_data->n_clks, phy_drd->clks);
+ if (ret)
+ return ret;
+
+ reg = readl(regs_base + EXYNOS2200_DRD_UTMI);
+ reg &= ~(EXYNOS2200_UTMI_FORCE_BVALID | EXYNOS2200_UTMI_FORCE_VBUSVALID);
+ writel(reg, regs_base + EXYNOS2200_DRD_UTMI);
+
+ reg = readl(regs_base + EXYNOS2200_DRD_CLKRST);
+ reg |= CLKRST_LINK_SW_RST;
+ writel(reg, regs_base + EXYNOS2200_DRD_CLKRST);
+
+ clk_bulk_disable_unprepare(phy_drd->drv_data->n_clks, phy_drd->clks);
+
+ exynos5_usbdrd_phy_isol(inst, true);
+ return regulator_bulk_disable(phy_drd->drv_data->n_regulators,
+ phy_drd->regulators);
+}
+
+static const struct phy_ops exynos2200_usbdrd_phy_ops = {
+ .init = exynos2200_usbdrd_phy_init,
+ .exit = exynos2200_usbdrd_phy_exit,
+ .owner = THIS_MODULE,
+};
+
static void
exynos5_usbdrd_usb_v3p1_pipe_override(struct exynos5_usbdrd_phy *phy_drd)
{
@@ -1134,7 +1504,7 @@ static void exynos850_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd)
/* Set VBUS Valid and D+ pull-up control by VBUS pad usage */
reg = readl(regs_base + EXYNOS850_DRD_LINKCTRL);
- reg |= LINKCTRL_BUS_FILTER_BYPASS(0xf);
+ reg |= FIELD_PREP(LINKCTRL_BUS_FILTER_BYPASS, 0xf);
writel(reg, regs_base + EXYNOS850_DRD_LINKCTRL);
if (!phy_drd->sw) {
@@ -1384,27 +1754,37 @@ static int exynos5_usbdrd_phy_clk_handle(struct exynos5_usbdrd_phy *phy_drd)
return dev_err_probe(phy_drd->dev, ret,
"failed to get phy core clock(s)\n");
- ref_clk = NULL;
- for (int i = 0; i < phy_drd->drv_data->n_core_clks; ++i) {
- if (!strcmp(phy_drd->core_clks[i].id, "ref")) {
- ref_clk = phy_drd->core_clks[i].clk;
- break;
+ if (phy_drd->drv_data->n_core_clks) {
+ ref_clk = NULL;
+ for (int i = 0; i < phy_drd->drv_data->n_core_clks; ++i) {
+ if (!strcmp(phy_drd->core_clks[i].id, "ref")) {
+ ref_clk = phy_drd->core_clks[i].clk;
+ break;
+ }
}
- }
- if (!ref_clk)
- return dev_err_probe(phy_drd->dev, -ENODEV,
- "failed to find phy reference clock\n");
+ if (!ref_clk)
+ return dev_err_probe(phy_drd->dev, -ENODEV,
+ "failed to find phy reference clock\n");
- ref_rate = clk_get_rate(ref_clk);
- ret = exynos5_rate_to_clk(ref_rate, &phy_drd->extrefclk);
- if (ret)
- return dev_err_probe(phy_drd->dev, ret,
- "clock rate (%ld) not supported\n",
- ref_rate);
+ ref_rate = clk_get_rate(ref_clk);
+ ret = exynos5_rate_to_clk(ref_rate, &phy_drd->extrefclk);
+ if (ret)
+ return dev_err_probe(phy_drd->dev, ret,
+ "clock rate (%ld) not supported\n",
+ ref_rate);
+ }
return 0;
}
+static const struct exynos5_usbdrd_phy_config phy_cfg_exynos2200[] = {
+ {
+ .id = EXYNOS5_DRDPHY_UTMI,
+ .phy_isol = exynos5_usbdrd_phy_isol,
+ .phy_init = exynos2200_usbdrd_utmi_init,
+ },
+};
+
static int exynos5_usbdrd_orien_sw_set(struct typec_switch_dev *sw,
enum typec_orientation orientation)
{
@@ -1501,6 +1881,14 @@ static const struct exynos5_usbdrd_phy_config phy_cfg_exynos5[] = {
},
};
+static const struct exynos5_usbdrd_phy_config phy_cfg_exynos7870[] = {
+ {
+ .id = EXYNOS5_DRDPHY_UTMI,
+ .phy_isol = exynos7870_usbdrd_phy_isol,
+ .phy_init = exynos7870_usbdrd_utmi_init,
+ },
+};
+
static const struct exynos5_usbdrd_phy_config phy_cfg_exynos850[] = {
{
.id = EXYNOS5_DRDPHY_UTMI,
@@ -1509,6 +1897,30 @@ static const struct exynos5_usbdrd_phy_config phy_cfg_exynos850[] = {
},
};
+static
+const struct exynos5_usbdrd_phy_tuning exynos7870_tunes_utmi_postinit[] = {
+ PHY_TUNING_ENTRY_PHY(EXYNOS5_DRD_PHYPARAM0,
+ (PHYPARAM0_TXVREFTUNE | PHYPARAM0_TXRISETUNE |
+ PHYPARAM0_TXRESTUNE | PHYPARAM0_TXPREEMPPULSETUNE |
+ PHYPARAM0_TXPREEMPAMPTUNE | PHYPARAM0_TXHSXVTUNE |
+ PHYPARAM0_TXFSLSTUNE | PHYPARAM0_SQRXTUNE |
+ PHYPARAM0_OTGTUNE | PHYPARAM0_COMPDISTUNE),
+ (FIELD_PREP_CONST(PHYPARAM0_TXVREFTUNE, 14) |
+ FIELD_PREP_CONST(PHYPARAM0_TXRISETUNE, 1) |
+ FIELD_PREP_CONST(PHYPARAM0_TXRESTUNE, 3) |
+ FIELD_PREP_CONST(PHYPARAM0_TXPREEMPAMPTUNE, 0) |
+ FIELD_PREP_CONST(PHYPARAM0_TXHSXVTUNE, 0) |
+ FIELD_PREP_CONST(PHYPARAM0_TXFSLSTUNE, 3) |
+ FIELD_PREP_CONST(PHYPARAM0_SQRXTUNE, 6) |
+ FIELD_PREP_CONST(PHYPARAM0_OTGTUNE, 2) |
+ FIELD_PREP_CONST(PHYPARAM0_COMPDISTUNE, 3))),
+ PHY_TUNING_ENTRY_LAST
+};
+
+static const struct exynos5_usbdrd_phy_tuning *exynos7870_tunes[PTS_MAX] = {
+ [PTS_UTMI_POSTINIT] = exynos7870_tunes_utmi_postinit,
+};
+
static const char * const exynos5_clk_names[] = {
"phy",
};
@@ -1525,6 +1937,19 @@ static const char * const exynos5_regulator_names[] = {
"vbus", "vbus-boost",
};
+static const struct exynos5_usbdrd_phy_drvdata exynos2200_usb32drd_phy = {
+ .phy_cfg = phy_cfg_exynos2200,
+ .phy_ops = &exynos2200_usbdrd_phy_ops,
+ .pmu_offset_usbdrd0_phy = EXYNOS2200_PHY_CTRL_USB20,
+ .clk_names = exynos5_clk_names,
+ .n_clks = ARRAY_SIZE(exynos5_clk_names),
+ /* clocks and regulators are specific to the underlying PHY blocks */
+ .core_clk_names = NULL,
+ .n_core_clks = 0,
+ .regulator_names = NULL,
+ .n_regulators = 0,
+};
+
static const struct exynos5_usbdrd_phy_drvdata exynos5420_usbdrd_phy = {
.phy_cfg = phy_cfg_exynos5,
.phy_ops = &exynos5_usbdrd_phy_ops,
@@ -1575,6 +2000,19 @@ static const struct exynos5_usbdrd_phy_drvdata exynos7_usbdrd_phy = {
.n_regulators = ARRAY_SIZE(exynos5_regulator_names),
};
+static const struct exynos5_usbdrd_phy_drvdata exynos7870_usbdrd_phy = {
+ .phy_cfg = phy_cfg_exynos7870,
+ .phy_tunes = exynos7870_tunes,
+ .phy_ops = &exynos7870_usbdrd_phy_ops,
+ .pmu_offset_usbdrd0_phy = EXYNOS5_USBDRD_PHY_CONTROL,
+ .clk_names = exynos5_clk_names,
+ .n_clks = ARRAY_SIZE(exynos5_clk_names),
+ .core_clk_names = exynos5_core_clk_names,
+ .n_core_clks = ARRAY_SIZE(exynos5_core_clk_names),
+ .regulator_names = exynos5_regulator_names,
+ .n_regulators = ARRAY_SIZE(exynos5_regulator_names),
+};
+
static const struct exynos5_usbdrd_phy_drvdata exynos850_usbdrd_phy = {
.phy_cfg = phy_cfg_exynos850,
.phy_ops = &exynos850_usbdrd_phy_ops,
@@ -1770,6 +2208,9 @@ static const struct of_device_id exynos5_usbdrd_phy_of_match[] = {
.compatible = "google,gs101-usb31drd-phy",
.data = &gs101_usbd31rd_phy
}, {
+ .compatible = "samsung,exynos2200-usb32drd-phy",
+ .data = &exynos2200_usb32drd_phy,
+ }, {
.compatible = "samsung,exynos5250-usbdrd-phy",
.data = &exynos5250_usbdrd_phy
}, {
@@ -1782,6 +2223,9 @@ static const struct of_device_id exynos5_usbdrd_phy_of_match[] = {
.compatible = "samsung,exynos7-usbdrd-phy",
.data = &exynos7_usbdrd_phy
}, {
+ .compatible = "samsung,exynos7870-usbdrd-phy",
+ .data = &exynos7870_usbdrd_phy
+ }, {
.compatible = "samsung,exynos850-usbdrd-phy",
.data = &exynos850_usbdrd_phy
},
@@ -1841,6 +2285,17 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev)
return PTR_ERR(phy_drd->reg_phy);
}
+ /*
+ * USB32DRD 4nm controller implements Synopsys eUSB2.0 PHY
+ * and Synopsys SS/USBDP COMBOPHY, managed by external code.
+ */
+ if (of_property_present(dev->of_node, "phy-names")) {
+ phy_drd->hs_phy = devm_of_phy_get(dev, dev->of_node, "hs");
+ if (IS_ERR(phy_drd->hs_phy))
+ return dev_err_probe(dev, PTR_ERR(phy_drd->hs_phy),
+ "failed to get hs_phy\n");
+ }
+
ret = exynos5_usbdrd_phy_clk_handle(phy_drd);
if (ret)
return ret;
diff --git a/drivers/phy/tegra/Kconfig b/drivers/phy/tegra/Kconfig
index f30cfb42b210..342fb736da4b 100644
--- a/drivers/phy/tegra/Kconfig
+++ b/drivers/phy/tegra/Kconfig
@@ -13,7 +13,7 @@ config PHY_TEGRA_XUSB
config PHY_TEGRA194_P2U
tristate "NVIDIA Tegra194 PIPE2UPHY PHY driver"
- depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || COMPILE_TEST
+ depends on ARCH_TEGRA || COMPILE_TEST
select GENERIC_PHY
help
Enable this to support the P2U (PIPE to UPHY) that is part of Tegra 19x
diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c
index 05a4a59f7c40..fe6b4925d166 100644
--- a/drivers/phy/xilinx/phy-zynqmp.c
+++ b/drivers/phy/xilinx/phy-zynqmp.c
@@ -222,7 +222,6 @@ struct xpsgtr_phy {
* @siou: siou base address
* @gtr_mutex: mutex for locking
* @phys: PHY lanes
- * @refclk_sscs: spread spectrum settings for the reference clocks
* @clk: reference clocks
* @tx_term_fix: fix for GT issue
* @saved_icm_cfg0: stored value of ICM CFG0 register
@@ -235,7 +234,6 @@ struct xpsgtr_dev {
void __iomem *siou;
struct mutex gtr_mutex; /* mutex for locking */
struct xpsgtr_phy phys[NUM_LANES];
- const struct xpsgtr_ssc *refclk_sscs[NUM_LANES];
struct clk *clk[NUM_LANES];
bool tx_term_fix;
unsigned int saved_icm_cfg0;
@@ -398,13 +396,40 @@ got_phy:
return ret;
}
+/* Get the spread spectrum (SSC) settings for the reference clock rate */
+static const struct xpsgtr_ssc *xpsgtr_find_sscs(struct xpsgtr_phy *gtr_phy)
+{
+ unsigned long rate;
+ struct clk *clk;
+ unsigned int i;
+
+ clk = gtr_phy->dev->clk[gtr_phy->refclk];
+ rate = clk_get_rate(clk);
+
+ for (i = 0 ; i < ARRAY_SIZE(ssc_lookup); i++) {
+ /* Allow an error of 100 ppm */
+ unsigned long error = ssc_lookup[i].refclk_rate / 10000;
+
+ if (abs(rate - ssc_lookup[i].refclk_rate) < error)
+ return &ssc_lookup[i];
+ }
+
+ dev_err(gtr_phy->dev->dev, "Invalid rate %lu for reference clock %u\n",
+ rate, gtr_phy->refclk);
+
+ return NULL;
+}
+
/* Configure PLL and spread-sprectrum clock. */
-static void xpsgtr_configure_pll(struct xpsgtr_phy *gtr_phy)
+static int xpsgtr_configure_pll(struct xpsgtr_phy *gtr_phy)
{
const struct xpsgtr_ssc *ssc;
u32 step_size;
- ssc = gtr_phy->dev->refclk_sscs[gtr_phy->refclk];
+ ssc = xpsgtr_find_sscs(gtr_phy);
+ if (!ssc)
+ return -EINVAL;
+
step_size = ssc->step_size;
xpsgtr_clr_set(gtr_phy->dev, PLL_REF_SEL(gtr_phy->lane),
@@ -446,6 +471,8 @@ static void xpsgtr_configure_pll(struct xpsgtr_phy *gtr_phy)
xpsgtr_clr_set_phy(gtr_phy, L0_PLL_SS_STEP_SIZE_3_MSB,
STEP_SIZE_3_MASK, (step_size & STEP_SIZE_3_MASK) |
FORCE_STEP_SIZE | FORCE_STEPS);
+
+ return 0;
}
/* Configure the lane protocol. */
@@ -658,7 +685,10 @@ static int xpsgtr_phy_init(struct phy *phy)
* Configure the PLL, the lane protocol, and perform protocol-specific
* initialization.
*/
- xpsgtr_configure_pll(gtr_phy);
+ ret = xpsgtr_configure_pll(gtr_phy);
+ if (ret)
+ goto out;
+
xpsgtr_lane_set_protocol(gtr_phy);
switch (gtr_phy->protocol) {
@@ -823,8 +853,7 @@ static struct phy *xpsgtr_xlate(struct device *dev,
}
refclk = args->args[3];
- if (refclk >= ARRAY_SIZE(gtr_dev->refclk_sscs) ||
- !gtr_dev->refclk_sscs[refclk]) {
+ if (refclk >= ARRAY_SIZE(gtr_dev->clk)) {
dev_err(dev, "Invalid reference clock number %u\n", refclk);
return ERR_PTR(-EINVAL);
}
@@ -928,9 +957,7 @@ static int xpsgtr_get_ref_clocks(struct xpsgtr_dev *gtr_dev)
{
unsigned int refclk;
- for (refclk = 0; refclk < ARRAY_SIZE(gtr_dev->refclk_sscs); ++refclk) {
- unsigned long rate;
- unsigned int i;
+ for (refclk = 0; refclk < ARRAY_SIZE(gtr_dev->clk); ++refclk) {
struct clk *clk;
char name[8];
@@ -946,29 +973,6 @@ static int xpsgtr_get_ref_clocks(struct xpsgtr_dev *gtr_dev)
continue;
gtr_dev->clk[refclk] = clk;
-
- /*
- * Get the spread spectrum (SSC) settings for the reference
- * clock rate.
- */
- rate = clk_get_rate(clk);
-
- for (i = 0 ; i < ARRAY_SIZE(ssc_lookup); i++) {
- /* Allow an error of 100 ppm */
- unsigned long error = ssc_lookup[i].refclk_rate / 10000;
-
- if (abs(rate - ssc_lookup[i].refclk_rate) < error) {
- gtr_dev->refclk_sscs[refclk] = &ssc_lookup[i];
- break;
- }
- }
-
- if (i == ARRAY_SIZE(ssc_lookup)) {
- dev_err(gtr_dev->dev,
- "Invalid rate %lu for reference clock %u\n",
- rate, refclk);
- return -EINVAL;
- }
}
return 0;
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 838bdc138ffe..9aec922613ce 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1388,6 +1388,18 @@ config RTC_DRV_ASM9260
This driver can also be built as a module. If so, the module
will be called rtc-asm9260.
+config RTC_DRV_CV1800
+ tristate "Sophgo CV1800 RTC"
+ depends on SOPHGO_CV1800_RTCSYS || COMPILE_TEST
+ select MFD_SYSCON
+ select REGMAP
+ help
+ If you say yes here you get support the RTC driver for Sophgo CV1800
+ series SoC.
+
+ This driver can also be built as a module. If so, the module will be
+ called rtc-cv1800.
+
config RTC_DRV_DIGICOLOR
tristate "Conexant Digicolor RTC"
depends on ARCH_DIGICOLOR || COMPILE_TEST
@@ -2088,7 +2100,7 @@ config RTC_DRV_AMLOGIC_A4
tristate "Amlogic RTC"
depends on ARCH_MESON || COMPILE_TEST
select REGMAP_MMIO
- default y
+ default ARCH_MESON
help
If you say yes here you get support for the RTC block on the
Amlogic A113L2(A4) and A113X2(A5) SoCs.
@@ -2096,4 +2108,15 @@ config RTC_DRV_AMLOGIC_A4
This driver can also be built as a module. If so, the module
will be called "rtc-amlogic-a4".
+config RTC_DRV_S32G
+ tristate "RTC driver for S32G2/S32G3 SoCs"
+ depends on ARCH_S32 || COMPILE_TEST
+ depends on COMMON_CLK
+ help
+ Say yes to enable RTC driver for platforms based on the
+ S32G2/S32G3 SoC family.
+
+ This RTC module can be used as a wakeup source.
+ Please note that it is not battery-powered.
+
endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 31473b3276d9..4619aa2ac469 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_RTC_DRV_CADENCE) += rtc-cadence.o
obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o
obj-$(CONFIG_RTC_DRV_CPCAP) += rtc-cpcap.o
obj-$(CONFIG_RTC_DRV_CROS_EC) += rtc-cros-ec.o
+obj-$(CONFIG_RTC_DRV_CV1800) += rtc-cv1800.o
obj-$(CONFIG_RTC_DRV_DA9052) += rtc-da9052.o
obj-$(CONFIG_RTC_DRV_DA9055) += rtc-da9055.o
obj-$(CONFIG_RTC_DRV_DA9063) += rtc-da9063.o
@@ -160,6 +161,7 @@ obj-$(CONFIG_RTC_DRV_RX8111) += rtc-rx8111.o
obj-$(CONFIG_RTC_DRV_RX8581) += rtc-rx8581.o
obj-$(CONFIG_RTC_DRV_RZN1) += rtc-rzn1.o
obj-$(CONFIG_RTC_DRV_RENESAS_RTCA3) += rtc-renesas-rtca3.o
+obj-$(CONFIG_RTC_DRV_S32G) += rtc-s32g.o
obj-$(CONFIG_RTC_DRV_S35390A) += rtc-s35390a.o
obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o
obj-$(CONFIG_RTC_DRV_S5M) += rtc-s5m.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index b88cd4fb295b..b1a2be1f9e3b 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -326,7 +326,7 @@ static void rtc_device_get_offset(struct rtc_device *rtc)
*
* Otherwise the offset seconds should be 0.
*/
- if (rtc->start_secs > rtc->range_max ||
+ if ((rtc->start_secs >= 0 && rtc->start_secs > rtc->range_max) ||
rtc->start_secs + range_secs - 1 < rtc->range_min)
rtc->offset_secs = rtc->start_secs - rtc->range_min;
else if (rtc->start_secs > rtc->range_min)
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index aaf76406cd7d..dc741ba29fa3 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -205,7 +205,7 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc,
mutex_unlock(&rtc->ops_lock);
- trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
+ trace_rtc_read_alarm(err?0:rtc_tm_to_time64(&alarm->time), err);
return err;
}
diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c
index fe361652727a..13b5b1f20465 100644
--- a/drivers/rtc/lib.c
+++ b/drivers/rtc/lib.c
@@ -46,24 +46,38 @@ EXPORT_SYMBOL(rtc_year_days);
* rtc_time64_to_tm - converts time64_t to rtc_time.
*
* @time: The number of seconds since 01-01-1970 00:00:00.
- * (Must be positive.)
+ * Works for values since at least 1900
* @tm: Pointer to the struct rtc_time.
*/
void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
{
- unsigned int secs;
- int days;
+ int days, secs;
u64 u64tmp;
u32 u32tmp, udays, century, day_of_century, year_of_century, year,
day_of_year, month, day;
bool is_Jan_or_Feb, is_leap_year;
- /* time must be positive */
+ /*
+ * Get days and seconds while preserving the sign to
+ * handle negative time values (dates before 1970-01-01)
+ */
days = div_s64_rem(time, 86400, &secs);
+ /*
+ * We need 0 <= secs < 86400 which isn't given for negative
+ * values of time. Fixup accordingly.
+ */
+ if (secs < 0) {
+ days -= 1;
+ secs += 86400;
+ }
+
/* day of the week, 1970-01-01 was a Thursday */
tm->tm_wday = (days + 4) % 7;
+ /* Ensure tm_wday is always positive */
+ if (tm->tm_wday < 0)
+ tm->tm_wday += 7;
/*
* The following algorithm is, basically, Proposition 6.3 of Neri
@@ -93,7 +107,7 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
* thus, is slightly different from [1].
*/
- udays = ((u32) days) + 719468;
+ udays = days + 719468;
u32tmp = 4 * udays + 3;
century = u32tmp / 146097;
diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c
index c30c759662e3..0eebad1fe2a0 100644
--- a/drivers/rtc/lib_test.c
+++ b/drivers/rtc/lib_test.c
@@ -6,8 +6,10 @@
/*
* Advance a date by one day.
*/
-static void advance_date(int *year, int *month, int *mday, int *yday)
+static void advance_date(int *year, int *month, int *mday, int *yday, int *wday)
{
+ *wday = (*wday + 1) % 7;
+
if (*mday != rtc_month_days(*month - 1, *year)) {
++*mday;
++*yday;
@@ -39,35 +41,38 @@ static void rtc_time64_to_tm_test_date_range(struct kunit *test, int years)
*/
time64_t total_secs = ((time64_t)years) / 400 * 146097 * 86400;
- int year = 1970;
+ int year = 1900;
int month = 1;
int mday = 1;
int yday = 1;
+ int wday = 1; /* Jan 1st 1900 was a Monday */
struct rtc_time result;
time64_t secs;
- s64 days;
+ const time64_t sec_offset = RTC_TIMESTAMP_BEGIN_1900 + ((1 * 60) + 2) * 60 + 3;
for (secs = 0; secs <= total_secs; secs += 86400) {
- rtc_time64_to_tm(secs, &result);
-
- days = div_s64(secs, 86400);
+ rtc_time64_to_tm(secs + sec_offset, &result);
- #define FAIL_MSG "%d/%02d/%02d (%2d) : %lld", \
- year, month, mday, yday, days
+ #define FAIL_MSG "%d/%02d/%02d (%2d, %d) : %lld", \
+ year, month, mday, yday, wday, secs + sec_offset
KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG);
KUNIT_ASSERT_EQ_MSG(test, mday, result.tm_mday, FAIL_MSG);
KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, 1, result.tm_hour, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, 2, result.tm_min, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, 3, result.tm_sec, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, wday, result.tm_wday, FAIL_MSG);
- advance_date(&year, &month, &mday, &yday);
+ advance_date(&year, &month, &mday, &yday, &wday);
}
}
/*
- * Checks every day in a 160000 years interval starting on 1970-01-01
+ * Checks every day in a 160000 years interval starting on 1900-01-01
* against the expected result.
*/
static void rtc_time64_to_tm_test_date_range_160000(struct kunit *test)
@@ -76,7 +81,7 @@ static void rtc_time64_to_tm_test_date_range_160000(struct kunit *test)
}
/*
- * Checks every day in a 1000 years interval starting on 1970-01-01
+ * Checks every day in a 1000 years interval starting on 1900-01-01
* against the expected result.
*/
static void rtc_time64_to_tm_test_date_range_1000(struct kunit *test)
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index f6b0102a843a..643734dbae33 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -654,4 +654,3 @@ module_platform_driver_probe(at91_rtc_driver, at91_rtc_probe);
MODULE_AUTHOR("Rick Bronson");
MODULE_DESCRIPTION("RTC driver for Atmel AT91RM9200");
MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:at91_rtc");
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index 568a89e79c11..c170345ac076 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -320,7 +320,6 @@ static struct platform_driver cpcap_rtc_driver = {
module_platform_driver(cpcap_rtc_driver);
-MODULE_ALIAS("platform:cpcap-rtc");
MODULE_DESCRIPTION("CPCAP RTC driver");
MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-cv1800.c b/drivers/rtc/rtc-cv1800.c
new file mode 100644
index 000000000000..678c2c10bf58
--- /dev/null
+++ b/drivers/rtc/rtc-cv1800.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rtc-cv1800.c: RTC driver for Sophgo cv1800 RTC
+ *
+ * Author: Jingbao Qiu <qiujingbao.dlmu@gmail.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+
+#define SEC_PULSE_GEN 0x1004
+#define ALARM_TIME 0x1008
+#define ALARM_ENABLE 0x100C
+#define SET_SEC_CNTR_VAL 0x1010
+#define SET_SEC_CNTR_TRIG 0x1014
+#define SEC_CNTR_VAL 0x1018
+
+/*
+ * When in VDDBKUP domain, this MACRO register
+ * does not power down
+ */
+#define MACRO_RO_T 0x14A8
+#define MACRO_RG_SET_T 0x1498
+
+#define ALARM_ENABLE_MASK BIT(0)
+#define SEL_SEC_PULSE BIT(31)
+
+struct cv1800_rtc_priv {
+ struct rtc_device *rtc_dev;
+ struct regmap *rtc_map;
+ struct clk *clk;
+ int irq;
+};
+
+static bool cv1800_rtc_enabled(struct device *dev)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+ u32 reg;
+
+ regmap_read(info->rtc_map, SEC_PULSE_GEN, &reg);
+
+ return (reg & SEL_SEC_PULSE) == 0;
+}
+
+static void cv1800_rtc_enable(struct device *dev)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+
+ /* Sec pulse generated internally */
+ regmap_update_bits(info->rtc_map, SEC_PULSE_GEN, SEL_SEC_PULSE, 0);
+}
+
+static int cv1800_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+
+ regmap_write(info->rtc_map, ALARM_ENABLE, enabled);
+
+ return 0;
+}
+
+static int cv1800_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+ unsigned long alarm_time;
+
+ alarm_time = rtc_tm_to_time64(&alrm->time);
+
+ cv1800_rtc_alarm_irq_enable(dev, 0);
+
+ regmap_write(info->rtc_map, ALARM_TIME, alarm_time);
+
+ cv1800_rtc_alarm_irq_enable(dev, alrm->enabled);
+
+ return 0;
+}
+
+static int cv1800_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+ u32 enabled;
+ u32 time;
+
+ if (!cv1800_rtc_enabled(dev)) {
+ alarm->enabled = 0;
+ return 0;
+ }
+
+ regmap_read(info->rtc_map, ALARM_ENABLE, &enabled);
+
+ alarm->enabled = enabled & ALARM_ENABLE_MASK;
+
+ regmap_read(info->rtc_map, ALARM_TIME, &time);
+
+ rtc_time64_to_tm(time, &alarm->time);
+
+ return 0;
+}
+
+static int cv1800_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+ u32 sec;
+
+ if (!cv1800_rtc_enabled(dev))
+ return -EINVAL;
+
+ regmap_read(info->rtc_map, SEC_CNTR_VAL, &sec);
+
+ rtc_time64_to_tm(sec, tm);
+
+ return 0;
+}
+
+static int cv1800_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+ struct cv1800_rtc_priv *info = dev_get_drvdata(dev);
+ unsigned long sec;
+
+ sec = rtc_tm_to_time64(tm);
+
+ regmap_write(info->rtc_map, SET_SEC_CNTR_VAL, sec);
+ regmap_write(info->rtc_map, SET_SEC_CNTR_TRIG, 1);
+
+ regmap_write(info->rtc_map, MACRO_RG_SET_T, sec);
+
+ cv1800_rtc_enable(dev);
+
+ return 0;
+}
+
+static irqreturn_t cv1800_rtc_irq_handler(int irq, void *dev_id)
+{
+ struct cv1800_rtc_priv *info = dev_id;
+
+ rtc_update_irq(info->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+ regmap_write(info->rtc_map, ALARM_ENABLE, 0);
+
+ return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops cv1800_rtc_ops = {
+ .read_time = cv1800_rtc_read_time,
+ .set_time = cv1800_rtc_set_time,
+ .read_alarm = cv1800_rtc_read_alarm,
+ .set_alarm = cv1800_rtc_set_alarm,
+ .alarm_irq_enable = cv1800_rtc_alarm_irq_enable,
+};
+
+static int cv1800_rtc_probe(struct platform_device *pdev)
+{
+ struct cv1800_rtc_priv *rtc;
+ int ret;
+
+ rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+ if (!rtc)
+ return -ENOMEM;
+
+ rtc->rtc_map = device_node_to_regmap(pdev->dev.parent->of_node);
+ if (IS_ERR(rtc->rtc_map))
+ return dev_err_probe(&pdev->dev, PTR_ERR(rtc->rtc_map),
+ "cannot get parent regmap\n");
+
+ rtc->irq = platform_get_irq(pdev, 0);
+ if (rtc->irq < 0)
+ return rtc->irq;
+
+ rtc->clk = devm_clk_get_enabled(pdev->dev.parent, "rtc");
+ if (IS_ERR(rtc->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(rtc->clk),
+ "rtc clk not found\n");
+
+ platform_set_drvdata(pdev, rtc);
+
+ device_init_wakeup(&pdev->dev, 1);
+
+ rtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
+ if (IS_ERR(rtc->rtc_dev))
+ return PTR_ERR(rtc->rtc_dev);
+
+ rtc->rtc_dev->ops = &cv1800_rtc_ops;
+ rtc->rtc_dev->range_max = U32_MAX;
+
+ ret = devm_request_irq(&pdev->dev, rtc->irq, cv1800_rtc_irq_handler,
+ IRQF_TRIGGER_HIGH, "rtc alarm", rtc);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret,
+ "cannot register interrupt handler\n");
+
+ return devm_rtc_register_device(rtc->rtc_dev);
+}
+
+static const struct platform_device_id cv1800_rtc_id[] = {
+ { .name = "cv1800b-rtc" },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(platform, cv1800_rtc_id);
+
+static struct platform_driver cv1800_rtc_driver = {
+ .driver = {
+ .name = "sophgo-cv1800-rtc",
+ },
+ .probe = cv1800_rtc_probe,
+ .id_table = cv1800_rtc_id,
+};
+
+module_platform_driver(cv1800_rtc_driver);
+MODULE_AUTHOR("Jingbao Qiu");
+MODULE_DESCRIPTION("Sophgo cv1800 RTC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index 859397541f29..557c9b29dcc1 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -194,26 +194,17 @@ static void da9063_tm_to_data(struct rtc_time *tm, u8 *data,
config->rtc_count_year_mask;
}
-static int da9063_rtc_stop_alarm(struct device *dev)
-{
- struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
- const struct da9063_compatible_rtc_regmap *config = rtc->config;
-
- return regmap_update_bits(rtc->regmap,
- config->rtc_alarm_year_reg,
- config->rtc_alarm_on_mask,
- 0);
-}
-
-static int da9063_rtc_start_alarm(struct device *dev)
+static int da9063_rtc_alarm_irq_enable(struct device *dev,
+ unsigned int enabled)
{
struct da9063_compatible_rtc *rtc = dev_get_drvdata(dev);
const struct da9063_compatible_rtc_regmap *config = rtc->config;
+ u8 set_bit = enabled ? config->rtc_alarm_on_mask : 0;
return regmap_update_bits(rtc->regmap,
config->rtc_alarm_year_reg,
config->rtc_alarm_on_mask,
- config->rtc_alarm_on_mask);
+ set_bit);
}
static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
@@ -312,7 +303,7 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
da9063_tm_to_data(&alrm->time, data, rtc);
- ret = da9063_rtc_stop_alarm(dev);
+ ret = da9063_rtc_alarm_irq_enable(dev, 0);
if (ret < 0) {
dev_err(dev, "Failed to stop alarm: %d\n", ret);
return ret;
@@ -330,7 +321,7 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
da9063_data_to_tm(data, &rtc->alarm_time, rtc);
if (alrm->enabled) {
- ret = da9063_rtc_start_alarm(dev);
+ ret = da9063_rtc_alarm_irq_enable(dev, 1);
if (ret < 0) {
dev_err(dev, "Failed to start alarm: %d\n", ret);
return ret;
@@ -340,15 +331,6 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
return ret;
}
-static int da9063_rtc_alarm_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- if (enabled)
- return da9063_rtc_start_alarm(dev);
- else
- return da9063_rtc_stop_alarm(dev);
-}
-
static irqreturn_t da9063_alarm_event(int irq, void *data)
{
struct da9063_compatible_rtc *rtc = data;
@@ -513,4 +495,3 @@ module_platform_driver(da9063_rtc_driver);
MODULE_AUTHOR("S Twiss <stwiss.opensource@diasemi.com>");
MODULE_DESCRIPTION("Real time clock device driver for Dialog DA9063");
MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DA9063_DRVNAME_RTC);
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index 44bba356268c..11fce47be780 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -437,4 +437,3 @@ module_platform_driver(jz4740_rtc_driver);
MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RTC driver for the JZ4740 SoC\n");
-MODULE_ALIAS("platform:jz4740-rtc");
diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c
index 97e5625c064c..2ca7ffd5d7a9 100644
--- a/drivers/rtc/rtc-loongson.c
+++ b/drivers/rtc/rtc-loongson.c
@@ -129,6 +129,14 @@ static u32 loongson_rtc_handler(void *id)
{
struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id;
+ rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF);
+
+ /*
+ * The TOY_MATCH0_REG should be cleared 0 here,
+ * otherwise the interrupt cannot be cleared.
+ */
+ regmap_write(priv->regmap, TOY_MATCH0_REG, 0);
+
spin_lock(&priv->lock);
/* Disable RTC alarm wakeup and interrupt */
writel(readl(priv->pm_base + PM1_EN_REG) & ~RTC_EN,
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 1f58ae8b151e..c568639d2151 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -22,6 +22,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/string.h>
+#include <linux/delay.h>
#ifdef CONFIG_RTC_DRV_M41T80_WDT
#include <linux/fs.h>
#include <linux/ioctl.h>
@@ -204,14 +205,14 @@ static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
return flags;
if (flags & M41T80_FLAGS_OF) {
- dev_err(&client->dev, "Oscillator failure, data is invalid.\n");
+ dev_err(&client->dev, "Oscillator failure, time may not be accurate, write time to RTC to fix it.\n");
return -EINVAL;
}
err = i2c_smbus_read_i2c_block_data(client, M41T80_REG_SSEC,
sizeof(buf), buf);
if (err < 0) {
- dev_err(&client->dev, "Unable to read date\n");
+ dev_dbg(&client->dev, "Unable to read date\n");
return err;
}
@@ -227,21 +228,31 @@ static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
return 0;
}
-static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
+static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *in_tm)
{
struct i2c_client *client = to_i2c_client(dev);
struct m41t80_data *clientdata = i2c_get_clientdata(client);
+ struct rtc_time tm = *in_tm;
unsigned char buf[8];
int err, flags;
+ time64_t time = 0;
+ flags = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS);
+ if (flags < 0)
+ return flags;
+ if (flags & M41T80_FLAGS_OF) {
+ /* add 4sec of oscillator stablize time otherwise we are behind 4sec */
+ time = rtc_tm_to_time64(&tm);
+ rtc_time64_to_tm(time + 4, &tm);
+ }
buf[M41T80_REG_SSEC] = 0;
- buf[M41T80_REG_SEC] = bin2bcd(tm->tm_sec);
- buf[M41T80_REG_MIN] = bin2bcd(tm->tm_min);
- buf[M41T80_REG_HOUR] = bin2bcd(tm->tm_hour);
- buf[M41T80_REG_DAY] = bin2bcd(tm->tm_mday);
- buf[M41T80_REG_MON] = bin2bcd(tm->tm_mon + 1);
- buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year - 100);
- buf[M41T80_REG_WDAY] = tm->tm_wday;
+ buf[M41T80_REG_SEC] = bin2bcd(tm.tm_sec);
+ buf[M41T80_REG_MIN] = bin2bcd(tm.tm_min);
+ buf[M41T80_REG_HOUR] = bin2bcd(tm.tm_hour);
+ buf[M41T80_REG_DAY] = bin2bcd(tm.tm_mday);
+ buf[M41T80_REG_MON] = bin2bcd(tm.tm_mon + 1);
+ buf[M41T80_REG_YEAR] = bin2bcd(tm.tm_year - 100);
+ buf[M41T80_REG_WDAY] = tm.tm_wday;
/* If the square wave output is controlled in the weekday register */
if (clientdata->features & M41T80_FEATURE_SQ_ALT) {
@@ -257,20 +268,37 @@ static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
err = i2c_smbus_write_i2c_block_data(client, M41T80_REG_SSEC,
sizeof(buf), buf);
if (err < 0) {
- dev_err(&client->dev, "Unable to write to date registers\n");
+ dev_dbg(&client->dev, "Unable to write to date registers\n");
return err;
}
-
- /* Clear the OF bit of Flags Register */
- flags = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS);
- if (flags < 0)
- return flags;
-
- err = i2c_smbus_write_byte_data(client, M41T80_REG_FLAGS,
- flags & ~M41T80_FLAGS_OF);
- if (err < 0) {
- dev_err(&client->dev, "Unable to write flags register\n");
- return err;
+ if (flags & M41T80_FLAGS_OF) {
+ /* OF cannot be immediately reset: oscillator has to be restarted. */
+ dev_warn(&client->dev, "OF bit is still set, kickstarting clock.\n");
+ err = i2c_smbus_write_byte_data(client, M41T80_REG_SEC, M41T80_SEC_ST);
+ if (err < 0) {
+ dev_dbg(&client->dev, "Can't set ST bit\n");
+ return err;
+ }
+ err = i2c_smbus_write_byte_data(client, M41T80_REG_SEC, flags & ~M41T80_SEC_ST);
+ if (err < 0) {
+ dev_dbg(&client->dev, "Can't clear ST bit\n");
+ return err;
+ }
+ /* oscillator must run for 4sec before we attempt to reset OF bit */
+ msleep(4000);
+ /* Clear the OF bit of Flags Register */
+ err = i2c_smbus_write_byte_data(client, M41T80_REG_FLAGS, flags & ~M41T80_FLAGS_OF);
+ if (err < 0) {
+ dev_dbg(&client->dev, "Unable to write flags register\n");
+ return err;
+ }
+ flags = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS);
+ if (flags < 0) {
+ return flags;
+ } else if (flags & M41T80_FLAGS_OF) {
+ dev_dbg(&client->dev, "Can't clear the OF bit check battery\n");
+ return err;
+ }
}
return err;
@@ -308,7 +336,7 @@ static int m41t80_alarm_irq_enable(struct device *dev, unsigned int enabled)
retval = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON, flags);
if (retval < 0) {
- dev_err(dev, "Unable to enable alarm IRQ %d\n", retval);
+ dev_dbg(dev, "Unable to enable alarm IRQ %d\n", retval);
return retval;
}
return 0;
@@ -333,7 +361,7 @@ static int m41t80_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
err = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
ret & ~(M41T80_ALMON_AFE));
if (err < 0) {
- dev_err(dev, "Unable to clear AFE bit\n");
+ dev_dbg(dev, "Unable to clear AFE bit\n");
return err;
}
@@ -347,7 +375,7 @@ static int m41t80_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
err = i2c_smbus_write_byte_data(client, M41T80_REG_FLAGS,
ret & ~(M41T80_FLAGS_AF));
if (err < 0) {
- dev_err(dev, "Unable to clear AF bit\n");
+ dev_dbg(dev, "Unable to clear AF bit\n");
return err;
}
diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
index 6979d225a78e..692c00ff544b 100644
--- a/drivers/rtc/rtc-mt6397.c
+++ b/drivers/rtc/rtc-mt6397.c
@@ -332,6 +332,7 @@ static const struct mtk_rtc_data mt6397_rtc_data = {
static const struct of_device_id mt6397_rtc_of_match[] = {
{ .compatible = "mediatek,mt6323-rtc", .data = &mt6397_rtc_data },
+ { .compatible = "mediatek,mt6357-rtc", .data = &mt6358_rtc_data },
{ .compatible = "mediatek,mt6358-rtc", .data = &mt6358_rtc_data },
{ .compatible = "mediatek,mt6397-rtc", .data = &mt6397_rtc_data },
{ }
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 5a084d426e58..b2611697fa5e 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -285,7 +285,7 @@ static int pcf8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
buf[2] = bin2bcd(tm->time.tm_mday);
buf[3] = tm->time.tm_wday & 0x07;
- err = regmap_bulk_write(pcf8563->regmap, PCF8563_REG_SC, buf,
+ err = regmap_bulk_write(pcf8563->regmap, PCF8563_REG_AMN, buf,
sizeof(buf));
if (err)
return err;
diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c
index 3c1dddcc81df..e624f848c22b 100644
--- a/drivers/rtc/rtc-pm8xxx.c
+++ b/drivers/rtc/rtc-pm8xxx.c
@@ -576,13 +576,20 @@ static int pm8xxx_rtc_probe_offset(struct pm8xxx_rtc *rtc_dd)
}
/* Use UEFI storage as fallback if available */
- if (efivar_is_available()) {
- rc = pm8xxx_rtc_read_uefi_offset(rtc_dd);
- if (rc == 0)
- rtc_dd->use_uefi = true;
+ rtc_dd->use_uefi = of_property_read_bool(rtc_dd->dev->of_node,
+ "qcom,uefi-rtc-info");
+ if (!rtc_dd->use_uefi)
+ return 0;
+
+ if (!efivar_is_available()) {
+ if (IS_ENABLED(CONFIG_EFI))
+ return -EPROBE_DEFER;
+
+ dev_warn(rtc_dd->dev, "efivars not available\n");
+ rtc_dd->use_uefi = false;
}
- return 0;
+ return pm8xxx_rtc_read_uefi_offset(rtc_dd);
}
static int pm8xxx_rtc_probe(struct platform_device *pdev)
@@ -676,7 +683,6 @@ static struct platform_driver pm8xxx_rtc_driver = {
module_platform_driver(pm8xxx_rtc_driver);
-MODULE_ALIAS("platform:rtc-pm8xxx");
MODULE_DESCRIPTION("PMIC8xxx RTC driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Anirudh Ghayal <aghayal@codeaurora.org>");
diff --git a/drivers/rtc/rtc-rzn1.c b/drivers/rtc/rtc-rzn1.c
index eeb9612a666f..c4ed43735457 100644
--- a/drivers/rtc/rtc-rzn1.c
+++ b/drivers/rtc/rtc-rzn1.c
@@ -12,6 +12,7 @@
*/
#include <linux/bcd.h>
+#include <linux/clk.h>
#include <linux/init.h>
#include <linux/iopoll.h>
#include <linux/module.h>
@@ -22,9 +23,9 @@
#include <linux/spinlock.h>
#define RZN1_RTC_CTL0 0x00
-#define RZN1_RTC_CTL0_SLSB_SUBU 0
#define RZN1_RTC_CTL0_SLSB_SCMP BIT(4)
#define RZN1_RTC_CTL0_AMPM BIT(5)
+#define RZN1_RTC_CTL0_CEST BIT(6)
#define RZN1_RTC_CTL0_CE BIT(7)
#define RZN1_RTC_CTL1 0x04
@@ -49,6 +50,8 @@
#define RZN1_RTC_SUBU_DEV BIT(7)
#define RZN1_RTC_SUBU_DECR BIT(6)
+#define RZN1_RTC_SCMP 0x3c
+
#define RZN1_RTC_ALM 0x40
#define RZN1_RTC_ALH 0x44
#define RZN1_RTC_ALW 0x48
@@ -356,7 +359,7 @@ static int rzn1_rtc_set_offset(struct device *dev, long offset)
return 0;
}
-static const struct rtc_class_ops rzn1_rtc_ops = {
+static const struct rtc_class_ops rzn1_rtc_ops_subu = {
.read_time = rzn1_rtc_read_time,
.set_time = rzn1_rtc_set_time,
.read_alarm = rzn1_rtc_read_alarm,
@@ -366,11 +369,21 @@ static const struct rtc_class_ops rzn1_rtc_ops = {
.set_offset = rzn1_rtc_set_offset,
};
+static const struct rtc_class_ops rzn1_rtc_ops_scmp = {
+ .read_time = rzn1_rtc_read_time,
+ .set_time = rzn1_rtc_set_time,
+ .read_alarm = rzn1_rtc_read_alarm,
+ .set_alarm = rzn1_rtc_set_alarm,
+ .alarm_irq_enable = rzn1_rtc_alarm_irq_enable,
+};
+
static int rzn1_rtc_probe(struct platform_device *pdev)
{
struct rzn1_rtc *rtc;
- int irq;
- int ret;
+ u32 val, scmp_val = 0;
+ struct clk *xtal;
+ unsigned long rate;
+ int irq, ret;
rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
if (!rtc)
@@ -393,7 +406,6 @@ static int rzn1_rtc_probe(struct platform_device *pdev)
rtc->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000;
rtc->rtcdev->range_max = RTC_TIMESTAMP_END_2099;
rtc->rtcdev->alarm_offset_max = 7 * 86400;
- rtc->rtcdev->ops = &rzn1_rtc_ops;
ret = devm_pm_runtime_enable(&pdev->dev);
if (ret < 0)
@@ -402,12 +414,44 @@ static int rzn1_rtc_probe(struct platform_device *pdev)
if (ret < 0)
return ret;
- /*
- * Ensure the clock counter is enabled.
- * Set 24-hour mode and possible oscillator offset compensation in SUBU mode.
- */
- writel(RZN1_RTC_CTL0_CE | RZN1_RTC_CTL0_AMPM | RZN1_RTC_CTL0_SLSB_SUBU,
- rtc->base + RZN1_RTC_CTL0);
+ /* Only switch to scmp if we have an xtal clock with a valid rate and != 32768 */
+ xtal = devm_clk_get_optional(&pdev->dev, "xtal");
+ if (IS_ERR(xtal)) {
+ ret = PTR_ERR(xtal);
+ goto dis_runtime_pm;
+ } else if (xtal) {
+ rate = clk_get_rate(xtal);
+
+ if (rate < 32000 || rate > BIT(22)) {
+ ret = -EOPNOTSUPP;
+ goto dis_runtime_pm;
+ }
+
+ if (rate != 32768)
+ scmp_val = RZN1_RTC_CTL0_SLSB_SCMP;
+ }
+
+ /* Disable controller during SUBU/SCMP setup */
+ val = readl(rtc->base + RZN1_RTC_CTL0) & ~RZN1_RTC_CTL0_CE;
+ writel(val, rtc->base + RZN1_RTC_CTL0);
+ /* Wait 2-4 32k clock cycles for the disabled controller */
+ ret = readl_poll_timeout(rtc->base + RZN1_RTC_CTL0, val,
+ !(val & RZN1_RTC_CTL0_CEST), 62, 123);
+ if (ret)
+ goto dis_runtime_pm;
+
+ /* Set desired modes leaving the controller disabled */
+ writel(RZN1_RTC_CTL0_AMPM | scmp_val, rtc->base + RZN1_RTC_CTL0);
+
+ if (scmp_val) {
+ writel(rate - 1, rtc->base + RZN1_RTC_SCMP);
+ rtc->rtcdev->ops = &rzn1_rtc_ops_scmp;
+ } else {
+ rtc->rtcdev->ops = &rzn1_rtc_ops_subu;
+ }
+
+ /* Enable controller finally */
+ writel(RZN1_RTC_CTL0_CE | RZN1_RTC_CTL0_AMPM | scmp_val, rtc->base + RZN1_RTC_CTL0);
/* Disable all interrupts */
writel(0, rtc->base + RZN1_RTC_CTL1);
@@ -444,6 +488,11 @@ dis_runtime_pm:
static void rzn1_rtc_remove(struct platform_device *pdev)
{
+ struct rzn1_rtc *rtc = platform_get_drvdata(pdev);
+
+ /* Disable all interrupts */
+ writel(0, rtc->base + RZN1_RTC_CTL1);
+
pm_runtime_put(&pdev->dev);
}
diff --git a/drivers/rtc/rtc-s32g.c b/drivers/rtc/rtc-s32g.c
new file mode 100644
index 000000000000..3a0818e972eb
--- /dev/null
+++ b/drivers/rtc/rtc-s32g.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2025 NXP
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#define RTCC_OFFSET 0x4ul
+#define RTCS_OFFSET 0x8ul
+#define APIVAL_OFFSET 0x10ul
+
+/* RTCC fields */
+#define RTCC_CNTEN BIT(31)
+#define RTCC_APIEN BIT(15)
+#define RTCC_APIIE BIT(14)
+#define RTCC_CLKSEL_MASK GENMASK(13, 12)
+#define RTCC_DIV512EN BIT(11)
+#define RTCC_DIV32EN BIT(10)
+
+/* RTCS fields */
+#define RTCS_INV_API BIT(17)
+#define RTCS_APIF BIT(13)
+
+#define APIVAL_MAX_VAL GENMASK(31, 0)
+#define RTC_SYNCH_TIMEOUT (100 * USEC_PER_MSEC)
+
+/*
+ * S32G2 and S32G3 SoCs have RTC clock source1 reserved and
+ * should not be used.
+ */
+#define RTC_CLK_SRC1_RESERVED BIT(1)
+
+/*
+ * S32G RTC module has a 512 value and a 32 value hardware frequency
+ * divisors (DIV512 and DIV32) which could be used to achieve higher
+ * counter ranges by lowering the RTC frequency.
+ */
+enum {
+ DIV1 = 1,
+ DIV32 = 32,
+ DIV512 = 512,
+ DIV512_32 = 16384
+};
+
+static const char *const rtc_clk_src[] = {
+ "source0",
+ "source1",
+ "source2",
+ "source3"
+};
+
+struct rtc_priv {
+ struct rtc_device *rdev;
+ void __iomem *rtc_base;
+ struct clk *ipg;
+ struct clk *clk_src;
+ const struct rtc_soc_data *rtc_data;
+ u64 rtc_hz;
+ time64_t sleep_sec;
+ int irq;
+ u32 clk_src_idx;
+};
+
+struct rtc_soc_data {
+ u32 clk_div;
+ u32 reserved_clk_mask;
+};
+
+static const struct rtc_soc_data rtc_s32g2_data = {
+ .clk_div = DIV512_32,
+ .reserved_clk_mask = RTC_CLK_SRC1_RESERVED,
+};
+
+static irqreturn_t s32g_rtc_handler(int irq, void *dev)
+{
+ struct rtc_priv *priv = platform_get_drvdata(dev);
+ u32 status;
+
+ status = readl(priv->rtc_base + RTCS_OFFSET);
+
+ if (status & RTCS_APIF) {
+ writel(0x0, priv->rtc_base + APIVAL_OFFSET);
+ writel(status | RTCS_APIF, priv->rtc_base + RTCS_OFFSET);
+ }
+
+ rtc_update_irq(priv->rdev, 1, RTC_IRQF | RTC_AF);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * The function is not really getting time from the RTC since the S32G RTC
+ * has several limitations. Thus, to setup alarm use system time.
+ */
+static int s32g_rtc_read_time(struct device *dev,
+ struct rtc_time *tm)
+{
+ struct rtc_priv *priv = dev_get_drvdata(dev);
+ time64_t sec;
+
+ if (check_add_overflow(ktime_get_real_seconds(),
+ priv->sleep_sec, &sec))
+ return -ERANGE;
+
+ rtc_time64_to_tm(sec, tm);
+
+ return 0;
+}
+
+static int s32g_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct rtc_priv *priv = dev_get_drvdata(dev);
+ u32 rtcc, rtcs;
+
+ rtcc = readl(priv->rtc_base + RTCC_OFFSET);
+ rtcs = readl(priv->rtc_base + RTCS_OFFSET);
+
+ alrm->enabled = rtcc & RTCC_APIIE;
+ if (alrm->enabled)
+ alrm->pending = !(rtcs & RTCS_APIF);
+
+ return 0;
+}
+
+static int s32g_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ struct rtc_priv *priv = dev_get_drvdata(dev);
+ u32 rtcc;
+
+ /* RTC API functionality is used both for triggering interrupts
+ * and as a wakeup event. Hence it should always be enabled.
+ */
+ rtcc = readl(priv->rtc_base + RTCC_OFFSET);
+ rtcc |= RTCC_APIEN | RTCC_APIIE;
+ writel(rtcc, priv->rtc_base + RTCC_OFFSET);
+
+ return 0;
+}
+
+static int s32g_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct rtc_priv *priv = dev_get_drvdata(dev);
+ unsigned long long cycles;
+ long long t_offset;
+ time64_t alrm_time;
+ u32 rtcs;
+ int ret;
+
+ alrm_time = rtc_tm_to_time64(&alrm->time);
+ t_offset = alrm_time - ktime_get_real_seconds() - priv->sleep_sec;
+ if (t_offset < 0)
+ return -ERANGE;
+
+ cycles = t_offset * priv->rtc_hz;
+ if (cycles > APIVAL_MAX_VAL)
+ return -ERANGE;
+
+ /* APIVAL could have been reset from the IRQ handler.
+ * Hence, we wait in case there is a synchronization process.
+ */
+ ret = read_poll_timeout(readl, rtcs, !(rtcs & RTCS_INV_API),
+ 0, RTC_SYNCH_TIMEOUT, false, priv->rtc_base + RTCS_OFFSET);
+ if (ret)
+ return ret;
+
+ writel(cycles, priv->rtc_base + APIVAL_OFFSET);
+
+ return read_poll_timeout(readl, rtcs, !(rtcs & RTCS_INV_API),
+ 0, RTC_SYNCH_TIMEOUT, false, priv->rtc_base + RTCS_OFFSET);
+}
+
+/*
+ * Disable the 32-bit free running counter.
+ * This allows Clock Source and Divisors selection
+ * to be performed without causing synchronization issues.
+ */
+static void s32g_rtc_disable(struct rtc_priv *priv)
+{
+ u32 rtcc = readl(priv->rtc_base + RTCC_OFFSET);
+
+ rtcc &= ~RTCC_CNTEN;
+ writel(rtcc, priv->rtc_base + RTCC_OFFSET);
+}
+
+static void s32g_rtc_enable(struct rtc_priv *priv)
+{
+ u32 rtcc = readl(priv->rtc_base + RTCC_OFFSET);
+
+ rtcc |= RTCC_CNTEN;
+ writel(rtcc, priv->rtc_base + RTCC_OFFSET);
+}
+
+static int rtc_clk_src_setup(struct rtc_priv *priv)
+{
+ u32 rtcc;
+
+ rtcc = FIELD_PREP(RTCC_CLKSEL_MASK, priv->clk_src_idx);
+
+ switch (priv->rtc_data->clk_div) {
+ case DIV512_32:
+ rtcc |= RTCC_DIV512EN;
+ rtcc |= RTCC_DIV32EN;
+ break;
+ case DIV512:
+ rtcc |= RTCC_DIV512EN;
+ break;
+ case DIV32:
+ rtcc |= RTCC_DIV32EN;
+ break;
+ case DIV1:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rtcc |= RTCC_APIEN | RTCC_APIIE;
+ /*
+ * Make sure the CNTEN is 0 before we configure
+ * the clock source and dividers.
+ */
+ s32g_rtc_disable(priv);
+ writel(rtcc, priv->rtc_base + RTCC_OFFSET);
+ s32g_rtc_enable(priv);
+
+ return 0;
+}
+
+static const struct rtc_class_ops rtc_ops = {
+ .read_time = s32g_rtc_read_time,
+ .read_alarm = s32g_rtc_read_alarm,
+ .set_alarm = s32g_rtc_set_alarm,
+ .alarm_irq_enable = s32g_rtc_alarm_irq_enable,
+};
+
+static int rtc_clk_dts_setup(struct rtc_priv *priv,
+ struct device *dev)
+{
+ u32 i;
+
+ priv->ipg = devm_clk_get_enabled(dev, "ipg");
+ if (IS_ERR(priv->ipg))
+ return dev_err_probe(dev, PTR_ERR(priv->ipg),
+ "Failed to get 'ipg' clock\n");
+
+ for (i = 0; i < ARRAY_SIZE(rtc_clk_src); i++) {
+ if (priv->rtc_data->reserved_clk_mask & BIT(i))
+ return -EOPNOTSUPP;
+
+ priv->clk_src = devm_clk_get_enabled(dev, rtc_clk_src[i]);
+ if (!IS_ERR(priv->clk_src)) {
+ priv->clk_src_idx = i;
+ break;
+ }
+ }
+
+ if (IS_ERR(priv->clk_src))
+ return dev_err_probe(dev, PTR_ERR(priv->clk_src),
+ "Failed to get rtc module clock source\n");
+
+ return 0;
+}
+
+static int s32g_rtc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rtc_priv *priv;
+ unsigned long rtc_hz;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->rtc_data = of_device_get_match_data(dev);
+ if (!priv->rtc_data)
+ return -ENODEV;
+
+ priv->rtc_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->rtc_base))
+ return PTR_ERR(priv->rtc_base);
+
+ device_init_wakeup(dev, true);
+
+ ret = rtc_clk_dts_setup(priv, dev);
+ if (ret)
+ return ret;
+
+ priv->rdev = devm_rtc_allocate_device(dev);
+ if (IS_ERR(priv->rdev))
+ return PTR_ERR(priv->rdev);
+
+ ret = rtc_clk_src_setup(priv);
+ if (ret)
+ return ret;
+
+ priv->irq = platform_get_irq(pdev, 0);
+ if (priv->irq < 0) {
+ ret = priv->irq;
+ goto disable_rtc;
+ }
+
+ rtc_hz = clk_get_rate(priv->clk_src);
+ if (!rtc_hz) {
+ dev_err(dev, "Failed to get RTC frequency\n");
+ ret = -EINVAL;
+ goto disable_rtc;
+ }
+
+ priv->rtc_hz = DIV_ROUND_UP(rtc_hz, priv->rtc_data->clk_div);
+
+ platform_set_drvdata(pdev, priv);
+ priv->rdev->ops = &rtc_ops;
+
+ ret = devm_request_irq(dev, priv->irq,
+ s32g_rtc_handler, 0, dev_name(dev), pdev);
+ if (ret) {
+ dev_err(dev, "Request interrupt %d failed, error: %d\n",
+ priv->irq, ret);
+ goto disable_rtc;
+ }
+
+ ret = devm_rtc_register_device(priv->rdev);
+ if (ret)
+ goto disable_rtc;
+
+ return 0;
+
+disable_rtc:
+ s32g_rtc_disable(priv);
+ return ret;
+}
+
+static int s32g_rtc_suspend(struct device *dev)
+{
+ struct rtc_priv *priv = dev_get_drvdata(dev);
+ u32 apival = readl(priv->rtc_base + APIVAL_OFFSET);
+
+ if (check_add_overflow(priv->sleep_sec, div64_u64(apival, priv->rtc_hz),
+ &priv->sleep_sec)) {
+ dev_warn(dev, "Overflow on sleep cycles occurred. Resetting to 0.\n");
+ priv->sleep_sec = 0;
+ }
+
+ return 0;
+}
+
+static int s32g_rtc_resume(struct device *dev)
+{
+ struct rtc_priv *priv = dev_get_drvdata(dev);
+
+ /* The transition from resume to run is a reset event.
+ * This leads to the RTC registers being reset after resume from
+ * suspend. It is uncommon, but this behaviour has been observed
+ * on S32G RTC after issuing a Suspend to RAM operation.
+ * Thus, reconfigure RTC registers on the resume path.
+ */
+ return rtc_clk_src_setup(priv);
+}
+
+static const struct of_device_id rtc_dt_ids[] = {
+ { .compatible = "nxp,s32g2-rtc", .data = &rtc_s32g2_data },
+ { /* sentinel */ },
+};
+
+static DEFINE_SIMPLE_DEV_PM_OPS(s32g_rtc_pm_ops,
+ s32g_rtc_suspend, s32g_rtc_resume);
+
+static struct platform_driver s32g_rtc_driver = {
+ .driver = {
+ .name = "s32g-rtc",
+ .pm = pm_sleep_ptr(&s32g_rtc_pm_ops),
+ .of_match_table = rtc_dt_ids,
+ },
+ .probe = s32g_rtc_probe,
+};
+module_platform_driver(s32g_rtc_driver);
+
+MODULE_AUTHOR("NXP");
+MODULE_DESCRIPTION("NXP RTC driver for S32G2/S32G3");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 58c957eb753d..5dd575865adf 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -609,4 +609,3 @@ module_platform_driver(s3c_rtc_driver);
MODULE_DESCRIPTION("Samsung S3C RTC Driver");
MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:s3c2410-rtc");
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 9ea40f40188f..f15ef3aa82a0 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -5,6 +5,7 @@
* Copyright (C) 2006 - 2009 Paul Mundt
* Copyright (C) 2006 Jamie Lenehan
* Copyright (C) 2008 Angelo Castello
+ * Copyright (C) 2025 Wolfram Sang, Renesas Electronics Corporation
*
* Based on the old arch/sh/kernel/cpu/rtc.c by:
*
@@ -31,7 +32,7 @@
/* Default values for RZ/A RTC */
#define rtc_reg_size sizeof(u16)
#define RTC_BIT_INVERTED 0 /* no chip bugs */
-#define RTC_CAP_4_DIGIT_YEAR (1 << 0)
+#define RTC_CAP_4_DIGIT_YEAR BIT(0)
#define RTC_DEF_CAPABILITIES RTC_CAP_4_DIGIT_YEAR
#endif
@@ -70,62 +71,35 @@
*/
/* ALARM Bits - or with BCD encoded value */
-#define AR_ENB 0x80 /* Enable for alarm cmp */
-
-/* Period Bits */
-#define PF_HP 0x100 /* Enable Half Period to support 8,32,128Hz */
-#define PF_COUNT 0x200 /* Half periodic counter */
-#define PF_OXS 0x400 /* Periodic One x Second */
-#define PF_KOU 0x800 /* Kernel or User periodic request 1=kernel */
-#define PF_MASK 0xf00
+#define AR_ENB BIT(7) /* Enable for alarm cmp */
/* RCR1 Bits */
-#define RCR1_CF 0x80 /* Carry Flag */
-#define RCR1_CIE 0x10 /* Carry Interrupt Enable */
-#define RCR1_AIE 0x08 /* Alarm Interrupt Enable */
-#define RCR1_AF 0x01 /* Alarm Flag */
+#define RCR1_CF BIT(7) /* Carry Flag */
+#define RCR1_CIE BIT(4) /* Carry Interrupt Enable */
+#define RCR1_AIE BIT(3) /* Alarm Interrupt Enable */
+#define RCR1_AF BIT(0) /* Alarm Flag */
/* RCR2 Bits */
-#define RCR2_PEF 0x80 /* PEriodic interrupt Flag */
-#define RCR2_PESMASK 0x70 /* Periodic interrupt Set */
-#define RCR2_RTCEN 0x08 /* ENable RTC */
-#define RCR2_ADJ 0x04 /* ADJustment (30-second) */
-#define RCR2_RESET 0x02 /* Reset bit */
-#define RCR2_START 0x01 /* Start bit */
+#define RCR2_RTCEN BIT(3) /* ENable RTC */
+#define RCR2_ADJ BIT(2) /* ADJustment (30-second) */
+#define RCR2_RESET BIT(1) /* Reset bit */
+#define RCR2_START BIT(0) /* Start bit */
struct sh_rtc {
void __iomem *regbase;
- unsigned long regsize;
- struct resource *res;
int alarm_irq;
- int periodic_irq;
- int carry_irq;
struct clk *clk;
struct rtc_device *rtc_dev;
- spinlock_t lock;
+ spinlock_t lock; /* protecting register access */
unsigned long capabilities; /* See asm/rtc.h for cap bits */
- unsigned short periodic_freq;
};
-static int __sh_rtc_interrupt(struct sh_rtc *rtc)
+static irqreturn_t sh_rtc_alarm(int irq, void *dev_id)
{
+ struct sh_rtc *rtc = dev_id;
unsigned int tmp, pending;
- tmp = readb(rtc->regbase + RCR1);
- pending = tmp & RCR1_CF;
- tmp &= ~RCR1_CF;
- writeb(tmp, rtc->regbase + RCR1);
-
- /* Users have requested One x Second IRQ */
- if (pending && rtc->periodic_freq & PF_OXS)
- rtc_update_irq(rtc->rtc_dev, 1, RTC_UF | RTC_IRQF);
-
- return pending;
-}
-
-static int __sh_rtc_alarm(struct sh_rtc *rtc)
-{
- unsigned int tmp, pending;
+ spin_lock(&rtc->lock);
tmp = readb(rtc->regbase + RCR1);
pending = tmp & RCR1_AF;
@@ -135,84 +109,12 @@ static int __sh_rtc_alarm(struct sh_rtc *rtc)
if (pending)
rtc_update_irq(rtc->rtc_dev, 1, RTC_AF | RTC_IRQF);
- return pending;
-}
-
-static int __sh_rtc_periodic(struct sh_rtc *rtc)
-{
- unsigned int tmp, pending;
-
- tmp = readb(rtc->regbase + RCR2);
- pending = tmp & RCR2_PEF;
- tmp &= ~RCR2_PEF;
- writeb(tmp, rtc->regbase + RCR2);
-
- if (!pending)
- return 0;
-
- /* Half period enabled than one skipped and the next notified */
- if ((rtc->periodic_freq & PF_HP) && (rtc->periodic_freq & PF_COUNT))
- rtc->periodic_freq &= ~PF_COUNT;
- else {
- if (rtc->periodic_freq & PF_HP)
- rtc->periodic_freq |= PF_COUNT;
- rtc_update_irq(rtc->rtc_dev, 1, RTC_PF | RTC_IRQF);
- }
-
- return pending;
-}
-
-static irqreturn_t sh_rtc_interrupt(int irq, void *dev_id)
-{
- struct sh_rtc *rtc = dev_id;
- int ret;
-
- spin_lock(&rtc->lock);
- ret = __sh_rtc_interrupt(rtc);
- spin_unlock(&rtc->lock);
-
- return IRQ_RETVAL(ret);
-}
-
-static irqreturn_t sh_rtc_alarm(int irq, void *dev_id)
-{
- struct sh_rtc *rtc = dev_id;
- int ret;
-
- spin_lock(&rtc->lock);
- ret = __sh_rtc_alarm(rtc);
- spin_unlock(&rtc->lock);
-
- return IRQ_RETVAL(ret);
-}
-
-static irqreturn_t sh_rtc_periodic(int irq, void *dev_id)
-{
- struct sh_rtc *rtc = dev_id;
- int ret;
-
- spin_lock(&rtc->lock);
- ret = __sh_rtc_periodic(rtc);
- spin_unlock(&rtc->lock);
-
- return IRQ_RETVAL(ret);
-}
-
-static irqreturn_t sh_rtc_shared(int irq, void *dev_id)
-{
- struct sh_rtc *rtc = dev_id;
- int ret;
-
- spin_lock(&rtc->lock);
- ret = __sh_rtc_interrupt(rtc);
- ret |= __sh_rtc_alarm(rtc);
- ret |= __sh_rtc_periodic(rtc);
spin_unlock(&rtc->lock);
- return IRQ_RETVAL(ret);
+ return IRQ_RETVAL(pending);
}
-static inline void sh_rtc_setaie(struct device *dev, unsigned int enable)
+static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
{
struct sh_rtc *rtc = dev_get_drvdata(dev);
unsigned int tmp;
@@ -229,45 +131,7 @@ static inline void sh_rtc_setaie(struct device *dev, unsigned int enable)
writeb(tmp, rtc->regbase + RCR1);
spin_unlock_irq(&rtc->lock);
-}
-
-static int sh_rtc_proc(struct device *dev, struct seq_file *seq)
-{
- struct sh_rtc *rtc = dev_get_drvdata(dev);
- unsigned int tmp;
-
- tmp = readb(rtc->regbase + RCR1);
- seq_printf(seq, "carry_IRQ\t: %s\n", (tmp & RCR1_CIE) ? "yes" : "no");
-
- tmp = readb(rtc->regbase + RCR2);
- seq_printf(seq, "periodic_IRQ\t: %s\n",
- (tmp & RCR2_PESMASK) ? "yes" : "no");
-
- return 0;
-}
-
-static inline void sh_rtc_setcie(struct device *dev, unsigned int enable)
-{
- struct sh_rtc *rtc = dev_get_drvdata(dev);
- unsigned int tmp;
-
- spin_lock_irq(&rtc->lock);
-
- tmp = readb(rtc->regbase + RCR1);
-
- if (!enable)
- tmp &= ~RCR1_CIE;
- else
- tmp |= RCR1_CIE;
-
- writeb(tmp, rtc->regbase + RCR1);
-
- spin_unlock_irq(&rtc->lock);
-}
-static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
-{
- sh_rtc_setaie(dev, enabled);
return 0;
}
@@ -320,14 +184,8 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
tm->tm_sec--;
#endif
- /* only keep the carry interrupt enabled if UIE is on */
- if (!(rtc->periodic_freq & PF_OXS))
- sh_rtc_setcie(dev, 0);
-
- dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
- "mday=%d, mon=%d, year=%d, wday=%d\n",
- __func__,
- tm->tm_sec, tm->tm_min, tm->tm_hour,
+ dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+ __func__, tm->tm_sec, tm->tm_min, tm->tm_hour,
tm->tm_mday, tm->tm_mon + 1, tm->tm_year, tm->tm_wday);
return 0;
@@ -461,16 +319,17 @@ static const struct rtc_class_ops sh_rtc_ops = {
.set_time = sh_rtc_set_time,
.read_alarm = sh_rtc_read_alarm,
.set_alarm = sh_rtc_set_alarm,
- .proc = sh_rtc_proc,
.alarm_irq_enable = sh_rtc_alarm_irq_enable,
};
static int __init sh_rtc_probe(struct platform_device *pdev)
{
struct sh_rtc *rtc;
- struct resource *res;
+ struct resource *res, *req_res;
char clk_name[14];
int clk_id, ret;
+ unsigned int tmp;
+ resource_size_t regsize;
rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
if (unlikely(!rtc))
@@ -478,34 +337,32 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
spin_lock_init(&rtc->lock);
- /* get periodic/carry/alarm irqs */
ret = platform_get_irq(pdev, 0);
if (unlikely(ret <= 0)) {
dev_err(&pdev->dev, "No IRQ resource\n");
return -ENOENT;
}
- rtc->periodic_irq = ret;
- rtc->carry_irq = platform_get_irq(pdev, 1);
- rtc->alarm_irq = platform_get_irq(pdev, 2);
+ if (!pdev->dev.of_node)
+ rtc->alarm_irq = platform_get_irq(pdev, 2);
+ else
+ rtc->alarm_irq = ret;
res = platform_get_resource(pdev, IORESOURCE_IO, 0);
if (!res)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (unlikely(res == NULL)) {
+ if (!res) {
dev_err(&pdev->dev, "No IO resource\n");
return -ENOENT;
}
- rtc->regsize = resource_size(res);
-
- rtc->res = devm_request_mem_region(&pdev->dev, res->start,
- rtc->regsize, pdev->name);
- if (unlikely(!rtc->res))
+ regsize = resource_size(res);
+ req_res = devm_request_mem_region(&pdev->dev, res->start, regsize, pdev->name);
+ if (!req_res)
return -EBUSY;
- rtc->regbase = devm_ioremap(&pdev->dev, rtc->res->start, rtc->regsize);
- if (unlikely(!rtc->regbase))
+ rtc->regbase = devm_ioremap(&pdev->dev, req_res->start, regsize);
+ if (!rtc->regbase)
return -EINVAL;
if (!pdev->dev.of_node) {
@@ -515,8 +372,9 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
clk_id = 0;
snprintf(clk_name, sizeof(clk_name), "rtc%d", clk_id);
- } else
+ } else {
snprintf(clk_name, sizeof(clk_name), "fck");
+ }
rtc->clk = devm_clk_get(&pdev->dev, clk_name);
if (IS_ERR(rtc->clk)) {
@@ -550,51 +408,19 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
}
#endif
- if (rtc->carry_irq <= 0) {
- /* register shared periodic/carry/alarm irq */
- ret = devm_request_irq(&pdev->dev, rtc->periodic_irq,
- sh_rtc_shared, 0, "sh-rtc", rtc);
- if (unlikely(ret)) {
- dev_err(&pdev->dev,
- "request IRQ failed with %d, IRQ %d\n", ret,
- rtc->periodic_irq);
- goto err_unmap;
- }
- } else {
- /* register periodic/carry/alarm irqs */
- ret = devm_request_irq(&pdev->dev, rtc->periodic_irq,
- sh_rtc_periodic, 0, "sh-rtc period", rtc);
- if (unlikely(ret)) {
- dev_err(&pdev->dev,
- "request period IRQ failed with %d, IRQ %d\n",
- ret, rtc->periodic_irq);
- goto err_unmap;
- }
-
- ret = devm_request_irq(&pdev->dev, rtc->carry_irq,
- sh_rtc_interrupt, 0, "sh-rtc carry", rtc);
- if (unlikely(ret)) {
- dev_err(&pdev->dev,
- "request carry IRQ failed with %d, IRQ %d\n",
- ret, rtc->carry_irq);
- goto err_unmap;
- }
-
- ret = devm_request_irq(&pdev->dev, rtc->alarm_irq,
- sh_rtc_alarm, 0, "sh-rtc alarm", rtc);
- if (unlikely(ret)) {
- dev_err(&pdev->dev,
- "request alarm IRQ failed with %d, IRQ %d\n",
- ret, rtc->alarm_irq);
- goto err_unmap;
- }
+ ret = devm_request_irq(&pdev->dev, rtc->alarm_irq, sh_rtc_alarm, 0, "sh-rtc", rtc);
+ if (ret) {
+ dev_err(&pdev->dev, "request alarm IRQ failed with %d, IRQ %d\n",
+ ret, rtc->alarm_irq);
+ goto err_unmap;
}
platform_set_drvdata(pdev, rtc);
/* everything disabled by default */
- sh_rtc_setaie(&pdev->dev, 0);
- sh_rtc_setcie(&pdev->dev, 0);
+ tmp = readb(rtc->regbase + RCR1);
+ tmp &= ~(RCR1_CIE | RCR1_AIE);
+ writeb(tmp, rtc->regbase + RCR1);
rtc->rtc_dev->ops = &sh_rtc_ops;
rtc->rtc_dev->max_user_freq = 256;
@@ -624,36 +450,27 @@ static void __exit sh_rtc_remove(struct platform_device *pdev)
{
struct sh_rtc *rtc = platform_get_drvdata(pdev);
- sh_rtc_setaie(&pdev->dev, 0);
- sh_rtc_setcie(&pdev->dev, 0);
+ sh_rtc_alarm_irq_enable(&pdev->dev, 0);
clk_disable(rtc->clk);
}
-static void sh_rtc_set_irq_wake(struct device *dev, int enabled)
+static int __maybe_unused sh_rtc_suspend(struct device *dev)
{
struct sh_rtc *rtc = dev_get_drvdata(dev);
- irq_set_irq_wake(rtc->periodic_irq, enabled);
-
- if (rtc->carry_irq > 0) {
- irq_set_irq_wake(rtc->carry_irq, enabled);
- irq_set_irq_wake(rtc->alarm_irq, enabled);
- }
-}
-
-static int __maybe_unused sh_rtc_suspend(struct device *dev)
-{
if (device_may_wakeup(dev))
- sh_rtc_set_irq_wake(dev, 1);
+ irq_set_irq_wake(rtc->alarm_irq, 1);
return 0;
}
static int __maybe_unused sh_rtc_resume(struct device *dev)
{
+ struct sh_rtc *rtc = dev_get_drvdata(dev);
+
if (device_may_wakeup(dev))
- sh_rtc_set_irq_wake(dev, 0);
+ irq_set_irq_wake(rtc->alarm_irq, 0);
return 0;
}
@@ -684,8 +501,8 @@ static struct platform_driver sh_rtc_platform_driver __refdata = {
module_platform_driver_probe(sh_rtc_platform_driver, sh_rtc_probe);
MODULE_DESCRIPTION("SuperH on-chip RTC driver");
-MODULE_AUTHOR("Paul Mundt <lethal@linux-sh.org>, "
- "Jamie Lenehan <lenehan@twibble.org>, "
- "Angelo Castello <angelo.castello@st.com>");
+MODULE_AUTHOR("Paul Mundt <lethal@linux-sh.org>");
+MODULE_AUTHOR("Jamie Lenehan <lenehan@twibble.org>");
+MODULE_AUTHOR("Angelo Castello <angelo.castello@st.com>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c
index 1b715db47160..ef8fb88aab48 100644
--- a/drivers/rtc/rtc-stm32.c
+++ b/drivers/rtc/rtc-stm32.c
@@ -1283,7 +1283,6 @@ static struct platform_driver stm32_rtc_driver = {
module_platform_driver(stm32_rtc_driver);
-MODULE_ALIAS("platform:" DRIVER_NAME);
MODULE_AUTHOR("Amelie Delaunay <amelie.delaunay@st.com>");
MODULE_DESCRIPTION("STMicroelectronics STM32 Real Time Clock driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 39aecd34c641..68db4b67a86f 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -56,6 +56,8 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
return ret;
}
+ ida_init(&bus->slave_ida);
+
ret = sdw_master_device_add(bus, parent, fwnode);
if (ret < 0) {
dev_err(parent, "Failed to add master device at link %d\n",
@@ -751,41 +753,36 @@ err:
static int sdw_assign_device_num(struct sdw_slave *slave)
{
struct sdw_bus *bus = slave->bus;
- int ret, dev_num;
- bool new_device = false;
+ struct device *dev = bus->dev;
+ int ret;
/* check first if device number is assigned, if so reuse that */
if (!slave->dev_num) {
if (!slave->dev_num_sticky) {
+ int dev_num;
+
mutex_lock(&slave->bus->bus_lock);
dev_num = sdw_get_device_num(slave);
mutex_unlock(&slave->bus->bus_lock);
if (dev_num < 0) {
- dev_err(bus->dev, "Get dev_num failed: %d\n",
- dev_num);
+ dev_err(dev, "Get dev_num failed: %d\n", dev_num);
return dev_num;
}
- slave->dev_num = dev_num;
+
slave->dev_num_sticky = dev_num;
- new_device = true;
} else {
- slave->dev_num = slave->dev_num_sticky;
+ dev_dbg(dev, "Slave already registered, reusing dev_num: %d\n",
+ slave->dev_num_sticky);
}
}
- if (!new_device)
- dev_dbg(bus->dev,
- "Slave already registered, reusing dev_num:%d\n",
- slave->dev_num);
-
/* Clear the slave->dev_num to transfer message on device 0 */
- dev_num = slave->dev_num;
slave->dev_num = 0;
- ret = sdw_write_no_pm(slave, SDW_SCP_DEVNUMBER, dev_num);
+ ret = sdw_write_no_pm(slave, SDW_SCP_DEVNUMBER, slave->dev_num_sticky);
if (ret < 0) {
- dev_err(bus->dev, "Program device_num %d failed: %d\n",
- dev_num, ret);
+ dev_err(dev, "Program device_num %d failed: %d\n",
+ slave->dev_num_sticky, ret);
return ret;
}
@@ -793,7 +790,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
slave->dev_num = slave->dev_num_sticky;
if (bus->ops && bus->ops->new_peripheral_assigned)
- bus->ops->new_peripheral_assigned(bus, slave, dev_num);
+ bus->ops->new_peripheral_assigned(bus, slave, slave->dev_num);
return 0;
}
diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c
index e98d5db81b1c..75d6f16efced 100644
--- a/drivers/soundwire/bus_type.c
+++ b/drivers/soundwire/bus_type.c
@@ -105,9 +105,17 @@ static int sdw_drv_probe(struct device *dev)
if (ret)
return ret;
+ ret = ida_alloc_max(&slave->bus->slave_ida, SDW_FW_MAX_DEVICES, GFP_KERNEL);
+ if (ret < 0) {
+ dev_err(dev, "Failed to allocated ID: %d\n", ret);
+ return ret;
+ }
+ slave->index = ret;
+
ret = drv->probe(slave, id);
if (ret) {
dev_pm_domain_detach(dev, false);
+ ida_free(&slave->bus->slave_ida, slave->index);
return ret;
}
@@ -174,6 +182,8 @@ static int sdw_drv_remove(struct device *dev)
dev_pm_domain_detach(dev, false);
+ ida_free(&slave->bus->slave_ida, slave->index);
+
return ret;
}
diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c
index 1cfaccf43eac..c18f0c16f929 100644
--- a/drivers/soundwire/generic_bandwidth_allocation.c
+++ b/drivers/soundwire/generic_bandwidth_allocation.c
@@ -204,6 +204,13 @@ static void _sdw_compute_port_params(struct sdw_bus *bus,
port_bo = 1;
list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) {
+ /*
+ * Only runtimes with CONFIGURED, PREPARED, ENABLED, and DISABLED
+ * states should be included in the bandwidth calculation.
+ */
+ if (m_rt->stream->state > SDW_STREAM_DISABLED ||
+ m_rt->stream->state < SDW_STREAM_CONFIGURED)
+ continue;
sdw_compute_master_ports(m_rt, &params[i], &port_bo, hstop);
}
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index d44e70d3c4e3..86abc465260f 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -22,6 +22,7 @@ struct hdac_bus;
* @shim_lock: mutex to handle access to shared SHIM registers
* @shim_mask: global pointer to check SHIM register initialization
* @clock_stop_quirks: mask defining requested behavior on pm_suspend
+ * @mic_privacy: ACE version supports microphone privacy
* @link_mask: global mask needed for power-up/down sequences
* @cdns: Cadence master descriptor
* @list: used to walk-through all masters exposed by the same controller
@@ -42,6 +43,7 @@ struct sdw_intel_link_res {
struct mutex *shim_lock; /* protect shared registers */
u32 *shim_mask;
u32 clock_stop_quirks;
+ bool mic_privacy;
u32 link_mask;
struct sdw_cdns *cdns;
struct list_head list;
diff --git a/drivers/soundwire/intel_ace2x_debugfs.c b/drivers/soundwire/intel_ace2x_debugfs.c
index 206a8d511ebd..fda8f0daaa96 100644
--- a/drivers/soundwire/intel_ace2x_debugfs.c
+++ b/drivers/soundwire/intel_ace2x_debugfs.c
@@ -76,6 +76,12 @@ static int intel_reg_show(struct seq_file *s_file, void *data)
ret += intel_sprintf(vs_s, false, buf, ret, SDW_SHIM2_INTEL_VS_IOCTL);
ret += intel_sprintf(vs_s, false, buf, ret, SDW_SHIM2_INTEL_VS_ACTMCTL);
+ if (sdw->link_res->mic_privacy) {
+ ret += scnprintf(buf + ret, RD_BUF - ret, "\nVS PVCCS\n");
+ ret += intel_sprintf(vs_s, false, buf, ret,
+ SDW_SHIM2_INTEL_VS_PVCCS);
+ }
+
seq_printf(s_file, "%s", buf);
kfree(buf);
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index 5f53666514a4..4ffdabaf9693 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -77,6 +77,7 @@ static struct sdw_intel_link_dev *intel_link_dev_register(struct sdw_intel_res *
link->shim = res->mmio_base + SDW_SHIM2_GENERIC_BASE(link_id);
link->shim_vs = res->mmio_base + SDW_SHIM2_VS_BASE(link_id);
link->shim_lock = res->eml_lock;
+ link->mic_privacy = res->mic_privacy;
}
link->ops = res->ops;
diff --git a/drivers/soundwire/irq.c b/drivers/soundwire/irq.c
index c237e6d0766b..f18be37efef8 100644
--- a/drivers/soundwire/irq.c
+++ b/drivers/soundwire/irq.c
@@ -31,7 +31,7 @@ int sdw_irq_create(struct sdw_bus *bus,
{
bus->irq_chip.name = dev_name(bus->dev);
- bus->domain = irq_domain_create_linear(fwnode, SDW_MAX_DEVICES,
+ bus->domain = irq_domain_create_linear(fwnode, SDW_FW_MAX_DEVICES,
&sdw_domain_ops, bus);
if (!bus->domain) {
dev_err(bus->dev, "Failed to add IRQ domain\n");
@@ -50,12 +50,12 @@ static void sdw_irq_dispose_mapping(void *data)
{
struct sdw_slave *slave = data;
- irq_dispose_mapping(irq_find_mapping(slave->bus->domain, slave->dev_num));
+ irq_dispose_mapping(slave->irq);
}
void sdw_irq_create_mapping(struct sdw_slave *slave)
{
- slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num);
+ slave->irq = irq_create_mapping(slave->bus->domain, slave->index);
if (!slave->irq)
dev_warn(&slave->dev, "Failed to map IRQ\n");
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 173e81c2bbcb..b228a5a64479 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -21,7 +21,6 @@
#include "error.h"
#include "lru.h"
#include "recovery.h"
-#include "trace.h"
#include "varint.h"
#include <linux/kthread.h>
@@ -337,11 +336,10 @@ void bch2_alloc_v4_swab(struct bkey_s k)
a->stripe_sectors = swab32(a->stripe_sectors);
}
-void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c,
+ unsigned dev, const struct bch_alloc_v4 *a)
{
- struct bch_alloc_v4 _a;
- const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
- struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL;
+ struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, dev) : NULL;
prt_newline(out);
printbuf_indent_add(out, 2);
@@ -369,6 +367,19 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
bch2_dev_put(ca);
}
+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
+
+ __bch2_alloc_v4_to_text(out, c, k.k->p.inode, a);
+}
+
+void bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ __bch2_alloc_v4_to_text(out, c, k.k->p.inode, bkey_s_c_to_alloc_v4(k).v);
+}
+
void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
{
if (k.k->type == KEY_TYPE_alloc_v4) {
@@ -697,8 +708,8 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
set ? "" : "un",
bch2_btree_id_str(btree),
buf.buf);
- if (ret == -BCH_ERR_fsck_ignore ||
- ret == -BCH_ERR_fsck_errors_not_fixed)
+ if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) ||
+ bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed))
ret = 0;
printbuf_exit(&buf);
@@ -854,7 +865,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
if (!ca)
- return -BCH_ERR_trigger_alloc;
+ return bch_err_throw(c, trigger_alloc);
struct bch_alloc_v4 old_a_convert;
const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
@@ -988,14 +999,11 @@ int bch2_trigger_alloc(struct btree_trans *trans,
}
if (new_a->gen != old_a->gen) {
- rcu_read_lock();
+ guard(rcu)();
u8 *gen = bucket_gen(ca, new.k->p.offset);
- if (unlikely(!gen)) {
- rcu_read_unlock();
+ if (unlikely(!gen))
goto invalid_bucket;
- }
*gen = new_a->gen;
- rcu_read_unlock();
}
#define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
@@ -1021,15 +1029,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
}
if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) {
- rcu_read_lock();
+ guard(rcu)();
struct bucket *g = gc_bucket(ca, new.k->p.offset);
- if (unlikely(!g)) {
- rcu_read_unlock();
+ if (unlikely(!g))
goto invalid_bucket;
- }
g->gen_valid = 1;
g->gen = new_a->gen;
- rcu_read_unlock();
}
err:
fsck_err:
@@ -1039,7 +1044,7 @@ fsck_err:
invalid_bucket:
bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
- ret = -BCH_ERR_trigger_alloc;
+ ret = bch_err_throw(c, trigger_alloc);
goto err;
}
@@ -1105,13 +1110,12 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck
bucket->offset = 0;
}
- rcu_read_lock();
+ guard(rcu)();
*ca = __bch2_next_dev_idx(c, bucket->inode, NULL);
if (*ca) {
*bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket);
bch2_dev_get(*ca);
}
- rcu_read_unlock();
return *ca != NULL;
}
@@ -1454,7 +1458,7 @@ delete:
ret = bch2_btree_bit_mod_iter(trans, iter, false) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_commit;
+ bch_err_throw(c, transaction_restart_commit);
goto out;
} else {
/*
@@ -1777,14 +1781,16 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
+ struct bch_fs *c = ca->fs;
int ret;
mutex_lock(&ca->discard_buckets_in_flight_lock);
- darray_for_each(ca->discard_buckets_in_flight, i)
- if (i->bucket == bucket) {
- ret = -BCH_ERR_EEXIST_discard_in_flight_add;
- goto out;
- }
+ struct discard_in_flight *i =
+ darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket);
+ if (i) {
+ ret = bch_err_throw(c, EEXIST_discard_in_flight_add);
+ goto out;
+ }
ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
.in_progress = in_progress,
@@ -1798,14 +1804,11 @@ out:
static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
mutex_lock(&ca->discard_buckets_in_flight_lock);
- darray_for_each(ca->discard_buckets_in_flight, i)
- if (i->bucket == bucket) {
- BUG_ON(!i->in_progress);
- darray_remove_item(&ca->discard_buckets_in_flight, i);
- goto found;
- }
- BUG();
-found:
+ struct discard_in_flight *i =
+ darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket);
+ BUG_ON(!i || !i->in_progress);
+
+ darray_remove_item(&ca->discard_buckets_in_flight, i);
mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
@@ -2504,7 +2507,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
lockdep_assert_held(&c->state_lock);
- rcu_read_lock();
+ guard(rcu)();
for_each_member_device_rcu(c, ca, NULL) {
struct block_device *bdev = READ_ONCE(ca->disk_sb.bdev);
if (bdev)
@@ -2549,7 +2552,6 @@ void bch2_recalc_capacity(struct bch_fs *c)
bucket_size_max = max_t(unsigned, bucket_size_max,
ca->mi.bucket_size);
}
- rcu_read_unlock();
bch2_set_ra_pages(c, ra_pages);
@@ -2574,10 +2576,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c)
{
u64 ret = U64_MAX;
- rcu_read_lock();
+ guard(rcu)();
for_each_rw_member_rcu(c, ca)
ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
- rcu_read_unlock();
return ret;
}
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 4f94c6a661bf..0cc5adc55b6f 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -13,11 +13,9 @@
static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
{
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
- bool ret = ca && bucket_valid(ca, pos.offset);
- rcu_read_unlock();
- return ret;
+ return ca && bucket_valid(ca, pos.offset);
}
static inline u64 bucket_to_u64(struct bpos bucket)
@@ -253,6 +251,7 @@ int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c,
struct bkey_validate_context);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+void bch2_alloc_v4_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc ((struct bkey_ops) { \
.key_validate = bch2_alloc_v1_validate, \
@@ -277,7 +276,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \
.key_validate = bch2_alloc_v4_validate, \
- .val_to_text = bch2_alloc_to_text, \
+ .val_to_text = bch2_alloc_v4_to_text, \
.swab = bch2_alloc_v4_swab, \
.trigger = bch2_trigger_alloc, \
.min_val_size = 48, \
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 1a52c12c51ae..b375ad610acd 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -69,10 +69,9 @@ const char * const bch2_watermarks[] = {
void bch2_reset_alloc_cursors(struct bch_fs *c)
{
- rcu_read_lock();
+ guard(rcu)();
for_each_member_device_rcu(c, ca, NULL)
memset(ca->alloc_cursor, 0, sizeof(ca->alloc_cursor));
- rcu_read_unlock();
}
static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
@@ -166,9 +165,8 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
ARRAY_SIZE(c->open_buckets_partial));
spin_lock(&c->freelist_lock);
- rcu_read_lock();
- bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
- rcu_read_unlock();
+ scoped_guard(rcu)
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] =
@@ -229,7 +227,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
spin_unlock(&c->freelist_lock);
- return ERR_PTR(-BCH_ERR_open_buckets_empty);
+ return ERR_PTR(bch_err_throw(c, open_buckets_empty));
}
/* Recheck under lock: */
@@ -535,7 +533,7 @@ again:
track_event_change(&c->times[BCH_TIME_blocked_allocate], true);
- ob = ERR_PTR(-BCH_ERR_freelist_empty);
+ ob = ERR_PTR(bch_err_throw(c, freelist_empty));
goto err;
}
@@ -560,7 +558,7 @@ alloc:
}
err:
if (!ob)
- ob = ERR_PTR(-BCH_ERR_no_buckets_found);
+ ob = ERR_PTR(bch_err_throw(c, no_buckets_found));
if (!IS_ERR(ob))
ob->data_type = req->data_type;
@@ -603,18 +601,18 @@ static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
-struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
- struct dev_stripe_state *stripe,
- struct bch_devs_mask *devs)
+void bch2_dev_alloc_list(struct bch_fs *c,
+ struct dev_stripe_state *stripe,
+ struct bch_devs_mask *devs,
+ struct dev_alloc_list *ret)
{
- struct dev_alloc_list ret = { .nr = 0 };
- unsigned i;
+ ret->nr = 0;
+ unsigned i;
for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
- ret.data[ret.nr++] = i;
+ ret->data[ret->nr++] = i;
- bubble_sort(ret.data, ret.nr, dev_stripe_cmp);
- return ret;
+ bubble_sort(ret->data, ret->nr, dev_stripe_cmp);
}
static const u64 stripe_clock_hand_rescale = 1ULL << 62; /* trigger rescale at */
@@ -705,18 +703,19 @@ static int add_new_bucket(struct bch_fs *c,
return 0;
}
-int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
- struct alloc_request *req,
- struct dev_stripe_state *stripe,
- struct closure *cl)
+inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
+ struct alloc_request *req,
+ struct dev_stripe_state *stripe,
+ struct closure *cl)
{
struct bch_fs *c = trans->c;
- int ret = -BCH_ERR_insufficient_devices;
+ int ret = 0;
BUG_ON(req->nr_effective >= req->nr_replicas);
- struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc);
- darray_for_each(devs_sorted, i) {
+ bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc, &req->devs_sorted);
+
+ darray_for_each(req->devs_sorted, i) {
req->ca = bch2_dev_tryget_noerror(c, *i);
if (!req->ca)
continue;
@@ -739,13 +738,16 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
- if (add_new_bucket(c, req, ob)) {
- ret = 0;
+ ret = add_new_bucket(c, req, ob);
+ if (ret)
break;
- }
}
- return ret;
+ if (ret == 1)
+ return 0;
+ if (ret)
+ return ret;
+ return bch_err_throw(c, insufficient_devices);
}
/* Allocate from stripes: */
@@ -776,9 +778,9 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
if (!h)
return 0;
- struct dev_alloc_list devs_sorted =
- bch2_dev_alloc_list(c, &req->wp->stripe, &req->devs_may_alloc);
- darray_for_each(devs_sorted, i)
+ bch2_dev_alloc_list(c, &req->wp->stripe, &req->devs_may_alloc, &req->devs_sorted);
+
+ darray_for_each(req->devs_sorted, i)
for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
if (!h->s->blocks[ec_idx])
continue;
@@ -872,9 +874,8 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
i);
ob->on_partial_list = false;
- rcu_read_lock();
- bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
- rcu_read_unlock();
+ scoped_guard(rcu)
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
ret = add_new_bucket(c, req, ob);
if (ret)
@@ -1056,9 +1057,8 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
ob->on_partial_list = false;
- rcu_read_lock();
- bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
- rcu_read_unlock();
+ scoped_guard(rcu)
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
@@ -1086,14 +1086,11 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
{
struct write_point *wp;
- rcu_read_lock();
+ guard(rcu)();
hlist_for_each_entry_rcu(wp, head, node)
if (wp->write_point == write_point)
- goto out;
- wp = NULL;
-out:
- rcu_read_unlock();
- return wp;
+ return wp;
+ return NULL;
}
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
@@ -1104,7 +1101,7 @@ static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
return stranded * factor > free;
}
-static bool try_increase_writepoints(struct bch_fs *c)
+static noinline bool try_increase_writepoints(struct bch_fs *c)
{
struct write_point *wp;
@@ -1117,7 +1114,7 @@ static bool try_increase_writepoints(struct bch_fs *c)
return true;
}
-static bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr)
+static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr)
{
struct bch_fs *c = trans->c;
struct write_point *wp;
@@ -1379,11 +1376,11 @@ err:
goto retry;
if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
- ret = -BCH_ERR_bucket_alloc_blocked;
+ ret = bch_err_throw(c, bucket_alloc_blocked);
if (cl && !(flags & BCH_WRITE_alloc_nowait) &&
bch2_err_matches(ret, BCH_ERR_freelist_empty))
- ret = -BCH_ERR_bucket_alloc_blocked;
+ ret = bch_err_throw(c, bucket_alloc_blocked);
return ret;
}
@@ -1637,19 +1634,16 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_printbuf_make_room(&buf, 4096);
- rcu_read_lock();
buf.atomic++;
-
- for_each_online_member_rcu(c, ca) {
- prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
- printbuf_indent_add(&buf, 2);
- bch2_dev_alloc_debug_to_text(&buf, ca);
- printbuf_indent_sub(&buf, 2);
- prt_newline(&buf);
- }
-
+ scoped_guard(rcu)
+ for_each_online_member_rcu(c, ca) {
+ prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
+ printbuf_indent_add(&buf, 2);
+ bch2_dev_alloc_debug_to_text(&buf, ca);
+ printbuf_indent_sub(&buf, 2);
+ prt_newline(&buf);
+ }
--buf.atomic;
- rcu_read_unlock();
prt_printf(&buf, "Copygc debug:\n");
printbuf_indent_add(&buf, 2);
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 2e01c7b61ed1..1b3fc8460096 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -42,6 +42,7 @@ struct alloc_request {
struct bch_devs_mask devs_may_alloc;
/* bch2_bucket_alloc_set_trans(): */
+ struct dev_alloc_list devs_sorted;
struct bch_dev_usage usage;
/* bch2_bucket_alloc_trans(): */
@@ -71,9 +72,10 @@ struct alloc_request {
struct bch_devs_mask scratch_devs_may_alloc;
};
-struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
- struct dev_stripe_state *,
- struct bch_devs_mask *);
+void bch2_dev_alloc_list(struct bch_fs *,
+ struct dev_stripe_state *,
+ struct bch_devs_mask *,
+ struct dev_alloc_list *);
void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob)
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index cde7dd115267..e76809e71858 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -48,17 +48,19 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
- rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
- if (ca) {
- u32 bucket_offset;
- struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset);
- rcu_read_unlock();
+ struct bch_dev *ca;
+ u32 bucket_offset;
+ struct bpos bucket;
+ scoped_guard(rcu) {
+ ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
+ if (ca)
+ bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset);
+ }
+
+ if (ca)
prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset);
- } else {
- rcu_read_unlock();
+ else
prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
- }
bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
prt_str(out, " data_type=");
@@ -140,7 +142,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
}
if (!will_check && __bch2_inconsistent_error(c, &buf))
- ret = -BCH_ERR_erofs_unfixed_errors;
+ ret = bch_err_throw(c, erofs_unfixed_errors);
bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
@@ -293,7 +295,7 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
return b;
if (btree_node_will_make_reachable(b)) {
- b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
+ b = ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node));
} else {
int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key),
last_flushed, commit);
@@ -351,7 +353,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} else {
struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
- if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
+ if (b == ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node)))
return bkey_s_c_null;
if (IS_ERR_OR_NULL(b))
return ((struct bkey_s_c) { .k = ERR_CAST(b) });
@@ -591,6 +593,7 @@ check_existing_bp:
bkey_for_each_ptr(other_extent_ptrs, ptr)
if (ptr->dev == bp->k.p.inode &&
dev_ptr_stale_rcu(ca, ptr)) {
+ rcu_read_unlock();
ret = drop_dev_and_update(trans, other_bp.v->btree_id,
other_extent, bp->k.p.inode);
if (ret)
@@ -648,7 +651,7 @@ check_existing_bp:
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
goto err;
missing:
printbuf_reset(&buf);
@@ -679,26 +682,23 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
continue;
- rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
- if (!ca) {
- rcu_read_unlock();
- continue;
- }
+ bool empty;
+ {
+ /* scoped_guard() is a loop, so it breaks continue */
+ guard(rcu)();
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
+ if (!ca)
+ continue;
- if (p.ptr.cached && dev_ptr_stale_rcu(ca, &p.ptr)) {
- rcu_read_unlock();
- continue;
- }
+ if (p.ptr.cached && dev_ptr_stale_rcu(ca, &p.ptr))
+ continue;
- u64 b = PTR_BUCKET_NR(ca, &p.ptr);
- if (!bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b)) {
- rcu_read_unlock();
- continue;
- }
+ u64 b = PTR_BUCKET_NR(ca, &p.ptr);
+ if (!bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b))
+ continue;
- bool empty = bch2_bucket_bitmap_test(&ca->bucket_backpointer_empty, b);
- rcu_read_unlock();
+ empty = bch2_bucket_bitmap_test(&ca->bucket_backpointer_empty, b);
+ }
struct bkey_i_backpointer bp;
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
@@ -953,7 +953,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
sectors[ALLOC_cached] > a->cached_sectors ||
sectors[ALLOC_stripe] > a->stripe_sectors) {
ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch_err_throw(c, transaction_restart_nested);
goto err;
}
@@ -981,7 +981,7 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
case KEY_TYPE_btree_ptr_v2: {
bool ret = false;
- rcu_read_lock();
+ guard(rcu)();
struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
while (pos.inode <= k.k->p.inode) {
if (pos.inode >= c->sb.nr_devices)
@@ -1009,7 +1009,6 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
next:
pos = SPOS(pos.inode + 1, 0, 0);
}
- rcu_read_unlock();
return ret;
}
@@ -1352,7 +1351,7 @@ static int bch2_bucket_bitmap_set(struct bch_dev *ca, struct bucket_bitmap *b, u
b->buckets = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
sizeof(unsigned long), GFP_KERNEL);
if (!b->buckets)
- return -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+ return bch_err_throw(ca->fs, ENOMEM_backpointer_mismatches_bitmap);
}
b->nr += !__test_and_set_bit(bit, b->buckets);
@@ -1361,7 +1360,8 @@ static int bch2_bucket_bitmap_set(struct bch_dev *ca, struct bucket_bitmap *b, u
return 0;
}
-int bch2_bucket_bitmap_resize(struct bucket_bitmap *b, u64 old_size, u64 new_size)
+int bch2_bucket_bitmap_resize(struct bch_dev *ca, struct bucket_bitmap *b,
+ u64 old_size, u64 new_size)
{
scoped_guard(mutex, &b->lock) {
if (!b->buckets)
@@ -1370,7 +1370,7 @@ int bch2_bucket_bitmap_resize(struct bucket_bitmap *b, u64 old_size, u64 new_siz
unsigned long *n = kvcalloc(BITS_TO_LONGS(new_size),
sizeof(unsigned long), GFP_KERNEL);
if (!n)
- return -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
+ return bch_err_throw(ca->fs, ENOMEM_backpointer_mismatches_bitmap);
memcpy(n, b->buckets,
BITS_TO_LONGS(min(old_size, new_size)) * sizeof(unsigned long));
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 6840561084ce..7e71afee1ac0 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -53,11 +53,10 @@ static inline struct bpos bp_pos_to_bucket_and_offset(const struct bch_dev *ca,
static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket)
{
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp_pos.inode);
if (ca)
*bucket = bp_pos_to_bucket(ca, bp_pos);
- rcu_read_unlock();
return ca != NULL;
}
@@ -195,7 +194,7 @@ static inline bool bch2_bucket_bitmap_test(struct bucket_bitmap *b, u64 i)
return bitmap && test_bit(i, bitmap);
}
-int bch2_bucket_bitmap_resize(struct bucket_bitmap *, u64, u64);
+int bch2_bucket_bitmap_resize(struct bch_dev *, struct bucket_bitmap *, u64, u64);
void bch2_bucket_bitmap_free(struct bucket_bitmap *);
#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 7824da2af9d0..3651a296d506 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -183,6 +183,16 @@
#define pr_fmt(fmt) "%s() " fmt "\n", __func__
#endif
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define ENUMERATED_REF_DEBUG
+#endif
+
+#ifndef dynamic_fault
+#define dynamic_fault(...) 0
+#endif
+
+#define race_fault(...) dynamic_fault("bcachefs:race")
+
#include <linux/backing-dev-defs.h>
#include <linux/bug.h>
#include <linux/bio.h>
@@ -219,15 +229,30 @@
#include "time_stats.h"
#include "util.h"
-#ifdef CONFIG_BCACHEFS_DEBUG
-#define ENUMERATED_REF_DEBUG
-#endif
-
-#ifndef dynamic_fault
-#define dynamic_fault(...) 0
-#endif
+#include "alloc_types.h"
+#include "async_objs_types.h"
+#include "btree_gc_types.h"
+#include "btree_types.h"
+#include "btree_node_scan_types.h"
+#include "btree_write_buffer_types.h"
+#include "buckets_types.h"
+#include "buckets_waiting_for_journal_types.h"
+#include "clock_types.h"
+#include "disk_groups_types.h"
+#include "ec_types.h"
+#include "enumerated_ref_types.h"
+#include "journal_types.h"
+#include "keylist_types.h"
+#include "quota_types.h"
+#include "rebalance_types.h"
+#include "recovery_passes_types.h"
+#include "replicas_types.h"
+#include "sb-members_types.h"
+#include "subvolume_types.h"
+#include "super_types.h"
+#include "thread_with_file_types.h"
-#define race_fault(...) dynamic_fault("bcachefs:race")
+#include "trace.h"
#define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name])
@@ -380,6 +405,14 @@ do { \
pr_info(fmt, ##__VA_ARGS__); \
} while (0)
+static inline int __bch2_err_trace(struct bch_fs *c, int err)
+{
+ trace_error_throw(c, err, _THIS_IP_);
+ return err;
+}
+
+#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err)
+
/* Parameters that are useful for debugging, but should always be compiled in: */
#define BCH_DEBUG_PARAMS_ALWAYS() \
BCH_DEBUG_PARAM(key_merging_disabled, \
@@ -486,29 +519,6 @@ enum bch_time_stats {
BCH_TIME_STAT_NR
};
-#include "alloc_types.h"
-#include "async_objs_types.h"
-#include "btree_gc_types.h"
-#include "btree_types.h"
-#include "btree_node_scan_types.h"
-#include "btree_write_buffer_types.h"
-#include "buckets_types.h"
-#include "buckets_waiting_for_journal_types.h"
-#include "clock_types.h"
-#include "disk_groups_types.h"
-#include "ec_types.h"
-#include "enumerated_ref_types.h"
-#include "journal_types.h"
-#include "keylist_types.h"
-#include "quota_types.h"
-#include "rebalance_types.h"
-#include "recovery_passes_types.h"
-#include "replicas_types.h"
-#include "sb-members_types.h"
-#include "subvolume_types.h"
-#include "super_types.h"
-#include "thread_with_file_types.h"
-
/* Number of nodes btree coalesce will try to coalesce at once */
#define GC_MERGE_NODES 4U
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 8557cbd3d818..91e0aa796e6b 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -149,7 +149,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
b->data = kvmalloc(btree_buf_bytes(b), gfp);
if (!b->data)
- return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
+ return bch_err_throw(c, ENOMEM_btree_node_mem_alloc);
#ifdef __KERNEL__
b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
#else
@@ -162,7 +162,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
if (!b->aux_data) {
kvfree(b->data);
b->data = NULL;
- return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
+ return bch_err_throw(c, ENOMEM_btree_node_mem_alloc);
}
return 0;
@@ -353,21 +353,21 @@ static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
if (btree_node_noevict(b)) {
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_noevict]++;
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
if (btree_node_write_blocked(b)) {
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_blocked]++;
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
if (btree_node_will_make_reachable(b)) {
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_will_make_reachable]++;
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
if (btree_node_dirty(b)) {
if (!flush) {
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_dirty]++;
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
if (locked) {
@@ -393,7 +393,7 @@ static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_read_in_flight]++;
else if (btree_node_write_in_flight(b))
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_in_flight]++;
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
if (locked)
@@ -424,13 +424,13 @@ retry_unlocked:
if (!six_trylock_intent(&b->c.lock)) {
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++;
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
if (!six_trylock_write(&b->c.lock)) {
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_write]++;
six_unlock_intent(&b->c.lock);
- return -BCH_ERR_ENOMEM_btree_node_reclaim;
+ return bch_err_throw(c, ENOMEM_btree_node_reclaim);
}
/* recheck under lock */
@@ -682,7 +682,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
return 0;
err:
- return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
}
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
@@ -727,7 +727,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure
if (!cl) {
trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
- return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
+ return bch_err_throw(c, ENOMEM_btree_cache_cannibalize_lock);
}
closure_wait(&bc->alloc_wait, cl);
@@ -741,7 +741,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure
}
trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
- return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
+ return bch_err_throw(c, btree_cache_cannibalize_lock_blocked);
success:
trace_and_count(c, btree_cache_cannibalize_lock, trans);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 91b6395421df..9ddcbe1bda78 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -150,7 +150,7 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
- return -BCH_ERR_ENOMEM_gc_repair_key;
+ return bch_err_throw(c, ENOMEM_gc_repair_key);
btree_ptr_to_v2(b, new);
b->data->min_key = new_min;
@@ -190,7 +190,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
- return -BCH_ERR_ENOMEM_gc_repair_key;
+ return bch_err_throw(c, ENOMEM_gc_repair_key);
btree_ptr_to_v2(b, new);
b->data->max_key = new_max;
@@ -935,7 +935,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
ret = genradix_prealloc(&ca->buckets_gc, ca->mi.nbuckets, GFP_KERNEL);
if (ret) {
bch2_dev_put(ca);
- ret = -BCH_ERR_ENOMEM_gc_alloc_start;
+ ret = bch_err_throw(c, ENOMEM_gc_alloc_start);
break;
}
}
@@ -1093,42 +1093,41 @@ static int gc_btree_gens_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- struct bkey_i *u;
- int ret;
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
return -EROFS;
- rcu_read_lock();
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
+ bool too_stale = false;
+ scoped_guard(rcu) {
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
+ if (!ca)
+ continue;
- if (dev_ptr_stale(ca, ptr) > 16) {
- rcu_read_unlock();
- goto update;
+ too_stale |= dev_ptr_stale(ca, ptr) > 16;
}
+
+ if (!too_stale)
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
+ if (!ca)
+ continue;
+
+ u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
+ if (gen_after(*gen, ptr->gen))
+ *gen = ptr->gen;
+ }
}
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
+ if (too_stale) {
+ struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k, 0);
+ int ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
- u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
- if (gen_after(*gen, ptr->gen))
- *gen = ptr->gen;
+ bch2_extent_normalize(c, bkey_i_to_s(u));
}
- rcu_read_unlock();
- return 0;
-update:
- u = bch2_bkey_make_mut(trans, iter, &k, 0);
- ret = PTR_ERR_OR_ZERO(u);
- if (ret)
- return ret;
- bch2_extent_normalize(c, bkey_i_to_s(u));
return 0;
}
@@ -1181,7 +1180,7 @@ int bch2_gc_gens(struct bch_fs *c)
ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL);
if (!ca->oldest_gen) {
bch2_dev_put(ca);
- ret = -BCH_ERR_ENOMEM_gc_gens;
+ ret = bch_err_throw(c, ENOMEM_gc_gens);
goto err;
}
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 34018296053a..57eff3012a7b 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -557,7 +557,7 @@ static int __btree_err(int ret,
const char *fmt, ...)
{
if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
- return -BCH_ERR_fsck_fix;
+ return bch_err_throw(c, fsck_fix);
bool have_retry = false;
int ret2;
@@ -572,9 +572,9 @@ static int __btree_err(int ret,
}
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
- ret = -BCH_ERR_btree_node_read_err_fixable;
+ ret = bch_err_throw(c, btree_node_read_err_fixable);
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
- ret = -BCH_ERR_btree_node_read_err_bad_node;
+ ret = bch_err_throw(c, btree_node_read_err_bad_node);
bch2_sb_error_count(c, err_type);
@@ -602,14 +602,14 @@ static int __btree_err(int ret,
switch (ret) {
case -BCH_ERR_btree_node_read_err_fixable:
ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type);
- if (ret2 != -BCH_ERR_fsck_fix &&
- ret2 != -BCH_ERR_fsck_ignore) {
+ if (!bch2_err_matches(ret2, BCH_ERR_fsck_fix) &&
+ !bch2_err_matches(ret2, BCH_ERR_fsck_ignore)) {
ret = ret2;
goto fsck_err;
}
if (!have_retry)
- ret = -BCH_ERR_fsck_fix;
+ ret = bch_err_throw(c, fsck_fix);
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
@@ -631,14 +631,14 @@ static int __btree_err(int ret,
switch (ret) {
case -BCH_ERR_btree_node_read_err_fixable:
ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf);
- if (ret2 != -BCH_ERR_fsck_fix &&
- ret2 != -BCH_ERR_fsck_ignore) {
+ if (!bch2_err_matches(ret2, BCH_ERR_fsck_fix) &&
+ !bch2_err_matches(ret2, BCH_ERR_fsck_ignore)) {
ret = ret2;
goto fsck_err;
}
if (!have_retry)
- ret = -BCH_ERR_fsck_fix;
+ ret = bch_err_throw(c, fsck_fix);
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
@@ -660,7 +660,7 @@ fsck_err:
failed, err_msg, \
msg, ##__VA_ARGS__); \
\
- if (_ret != -BCH_ERR_fsck_fix) { \
+ if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix)) { \
ret = _ret; \
goto fsck_err; \
} \
@@ -1325,14 +1325,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
- rcu_read_lock();
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
- struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
+ scoped_guard(rcu)
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
+ struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
- if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw)
- set_btree_node_need_rewrite(b);
- }
- rcu_read_unlock();
+ if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw)
+ set_btree_node_need_rewrite(b);
+ }
if (!ptr_written)
set_btree_node_need_rewrite(b);
@@ -1688,7 +1687,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
- return -BCH_ERR_ENOMEM_btree_node_read_all_replicas;
+ return bch_err_throw(c, ENOMEM_btree_node_read_all_replicas);
closure_init(&ra->cl, NULL);
ra->c = c;
@@ -1870,7 +1869,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
bch2_btree_node_hash_remove(&c->btree_cache, b);
mutex_unlock(&c->btree_cache.lock);
- ret = -BCH_ERR_btree_node_read_error;
+ ret = bch_err_throw(c, btree_node_read_error);
goto err;
}
@@ -2020,7 +2019,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
struct bch_fs *c = trans->c;
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_node_scrub))
- return -BCH_ERR_erofs_no_writes;
+ return bch_err_throw(c, erofs_no_writes);
struct extent_ptr_decoded pick;
int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev);
@@ -2030,7 +2029,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_node_scrub);
if (!ca) {
- ret = -BCH_ERR_device_offline;
+ ret = bch_err_throw(c, device_offline);
goto err;
}
@@ -2167,7 +2166,7 @@ static void btree_node_write_work(struct work_struct *work)
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
- ret = -BCH_ERR_btree_node_write_all_failed;
+ ret = bch_err_throw(c, btree_node_write_all_failed);
goto err;
}
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index b4bf4217a3fa..b78403376c07 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -890,8 +890,7 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
struct btree_path *path,
- unsigned flags,
- struct bkey_buf *out)
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_path_level *l = path_l(path);
@@ -915,7 +914,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
goto err;
}
- bch2_bkey_buf_reassemble(out, c, k);
+ bkey_reassemble(&trans->btree_path_down, k);
if ((flags & BTREE_ITER_prefetch) &&
c->opts.btree_node_prefetch)
@@ -926,6 +925,22 @@ err:
return ret;
}
+static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "node not found at pos ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, " within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+
+ bch2_fs_fatal_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return bch_err_throw(c, btree_need_topology_repair);
+}
+
static __always_inline int btree_path_down(struct btree_trans *trans,
struct btree_path *path,
unsigned flags,
@@ -936,51 +951,38 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
struct btree *b;
unsigned level = path->level - 1;
enum six_lock_type lock_type = __btree_lock_want(path, level);
- struct bkey_buf tmp;
int ret;
EBUG_ON(!btree_node_locked(path, path->level));
- bch2_bkey_buf_init(&tmp);
-
if (unlikely(trans->journal_replay_not_finished)) {
- ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
+ ret = btree_node_iter_and_journal_peek(trans, path, flags);
if (ret)
- goto err;
+ return ret;
} else {
struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
- if (!k) {
- struct printbuf buf = PRINTBUF;
-
- prt_str(&buf, "node not found at pos ");
- bch2_bpos_to_text(&buf, path->pos);
- prt_str(&buf, " within parent node ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
+ if (unlikely(!k))
+ return btree_node_missing_err(trans, path);
- bch2_fs_fatal_error(c, "%s", buf.buf);
- printbuf_exit(&buf);
- ret = -BCH_ERR_btree_need_topology_repair;
- goto err;
- }
-
- bch2_bkey_buf_unpack(&tmp, c, l->b, k);
+ bch2_bkey_unpack(l->b, &trans->btree_path_down, k);
- if ((flags & BTREE_ITER_prefetch) &&
+ if (unlikely((flags & BTREE_ITER_prefetch)) &&
c->opts.btree_node_prefetch) {
ret = btree_path_prefetch(trans, path);
if (ret)
- goto err;
+ return ret;
}
}
- b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
+ b = bch2_btree_node_get(trans, path, &trans->btree_path_down,
+ level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
if (unlikely(ret))
- goto err;
+ return ret;
- if (likely(!trans->journal_replay_not_finished &&
- tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
- unlikely(b != btree_node_mem_ptr(tmp.k)))
+ if (unlikely(b != btree_node_mem_ptr(&trans->btree_path_down)) &&
+ likely(!trans->journal_replay_not_finished &&
+ trans->btree_path_down.k.type == KEY_TYPE_btree_ptr_v2))
btree_node_mem_ptr_set(trans, path, level + 1, b);
if (btree_node_read_locked(path, level + 1))
@@ -992,9 +994,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
bch2_btree_path_level_init(trans, path, b);
bch2_btree_path_verify_locks(trans, path);
-err:
- bch2_bkey_buf_exit(&tmp, c);
- return ret;
+ return 0;
}
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
@@ -1006,7 +1006,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
int ret = 0;
if (trans->in_traverse_all)
- return -BCH_ERR_transaction_restart_in_traverse_all;
+ return bch_err_throw(c, transaction_restart_in_traverse_all);
trans->in_traverse_all = true;
retry_all:
@@ -3568,13 +3568,12 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
struct btree_bkey_cached_common *b)
{
struct six_lock_count c = six_lock_counts(&b->lock);
- struct task_struct *owner;
pid_t pid;
- rcu_read_lock();
- owner = READ_ONCE(b->lock.owner);
- pid = owner ? owner->pid : 0;
- rcu_read_unlock();
+ scoped_guard(rcu) {
+ struct task_struct *owner = READ_ONCE(b->lock.owner);
+ pid = owner ? owner->pid : 0;
+ }
prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b');
bch2_btree_id_to_text(out, b->btree_id);
@@ -3603,7 +3602,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn);
/* trans->paths is rcu protected vs. freeing */
- rcu_read_lock();
+ guard(rcu)();
out->atomic++;
struct btree_path *paths = rcu_dereference(trans->paths);
@@ -3646,7 +3645,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
}
out:
--out->atomic;
- rcu_read_unlock();
}
void bch2_fs_btree_iter_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 2cabb5f0f484..09dd3e52622e 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -963,16 +963,6 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *,
_p; \
})
-#define bch2_trans_run(_c, _do) \
-({ \
- struct btree_trans *trans = bch2_trans_get(_c); \
- int _ret = (_do); \
- bch2_trans_put(trans); \
- _ret; \
-})
-
-#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))
-
struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
void bch2_trans_put(struct btree_trans *);
@@ -990,6 +980,27 @@ unsigned bch2_trans_get_fn_idx(const char *);
__bch2_trans_get(_c, trans_fn_idx); \
})
+/*
+ * We don't use DEFINE_CLASS() because using a function for the constructor
+ * breaks bch2_trans_get()'s use of __func__
+ */
+typedef struct btree_trans * class_btree_trans_t;
+static inline void class_btree_trans_destructor(struct btree_trans **p)
+{
+ struct btree_trans *trans = *p;
+ bch2_trans_put(trans);
+}
+
+#define class_btree_trans_constructor(_c) bch2_trans_get(_c)
+
+#define bch2_trans_run(_c, _do) \
+({ \
+ CLASS(btree_trans, trans)(_c); \
+ (_do); \
+})
+
+#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))
+
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
void bch2_fs_btree_iter_exit(struct bch_fs *);
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index ade3b5addd75..cf7398751644 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -292,7 +292,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
if (!new_keys.data) {
bch_err(c, "%s: error allocating new key array (size %zu)",
__func__, new_keys.size);
- return -BCH_ERR_ENOMEM_journal_key_insert;
+ return bch_err_throw(c, ENOMEM_journal_key_insert);
}
/* Since @keys was full, there was no gap: */
@@ -331,7 +331,7 @@ int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
n = kmalloc(bkey_bytes(&k->k), GFP_KERNEL);
if (!n)
- return -BCH_ERR_ENOMEM_journal_key_insert;
+ return bch_err_throw(c, ENOMEM_journal_key_insert);
bkey_copy(n, k);
ret = bch2_journal_key_insert_take(c, id, level, n);
@@ -457,11 +457,9 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
{
- struct bkey_s_c ret = bkey_s_c_null;
-
journal_iter_verify(iter);
- rcu_read_lock();
+ guard(rcu)();
while (iter->idx < iter->keys->size) {
struct journal_key *k = iter->keys->data + iter->idx;
@@ -470,19 +468,16 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
break;
BUG_ON(cmp);
- if (!k->overwritten) {
- ret = bkey_i_to_s_c(k->k);
- break;
- }
+ if (!k->overwritten)
+ return bkey_i_to_s_c(k->k);
if (k->overwritten_range)
iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end);
else
bch2_journal_iter_advance(iter);
}
- rcu_read_unlock();
- return ret;
+ return bkey_s_c_null;
}
static void bch2_journal_iter_exit(struct journal_iter *iter)
@@ -741,7 +736,7 @@ int bch2_journal_keys_sort(struct bch_fs *c)
if (keys->nr * 8 > keys->size * 7) {
bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
- return -BCH_ERR_ENOMEM_journal_keys_sort;
+ return bch_err_throw(c, ENOMEM_journal_keys_sort);
}
BUG_ON(darray_push(keys, n));
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 9da950e7eb7d..d96188b92db2 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -187,27 +187,23 @@ lock:
static struct bkey_cached *
bkey_cached_reuse(struct btree_key_cache *c)
{
- struct bucket_table *tbl;
+
+ guard(rcu)();
+ struct bucket_table *tbl = rht_dereference_rcu(c->table.tbl, &c->table);
struct rhash_head *pos;
struct bkey_cached *ck;
- unsigned i;
- rcu_read_lock();
- tbl = rht_dereference_rcu(c->table.tbl, &c->table);
- for (i = 0; i < tbl->size; i++)
+ for (unsigned i = 0; i < tbl->size; i++)
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bkey_cached_lock_for_evict(ck)) {
if (bkey_cached_evict(c, ck))
- goto out;
+ return ck;
six_unlock_write(&ck->c.lock);
six_unlock_intent(&ck->c.lock);
}
}
- ck = NULL;
-out:
- rcu_read_unlock();
- return ck;
+ return NULL;
}
static int btree_key_cache_create(struct btree_trans *trans,
@@ -242,7 +238,7 @@ static int btree_key_cache_create(struct btree_trans *trans,
if (unlikely(!ck)) {
bch_err(c, "error allocating memory for key cache item, btree %s",
bch2_btree_id_str(ck_path->btree_id));
- return -BCH_ERR_ENOMEM_btree_key_cache_create;
+ return bch_err_throw(c, ENOMEM_btree_key_cache_create);
}
}
@@ -260,7 +256,7 @@ static int btree_key_cache_create(struct btree_trans *trans,
if (unlikely(!new_k)) {
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_id_str(ck->key.btree_id), key_u64s);
- ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
+ ret = bch_err_throw(c, ENOMEM_btree_key_cache_fill);
} else if (ret) {
kfree(new_k);
goto err;
@@ -826,20 +822,20 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
bc->nr_pending = alloc_percpu(size_t);
if (!bc->nr_pending)
- return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
if (rcu_pending_init(&bc->pending[0], &c->btree_trans_barrier, __bkey_cached_free) ||
rcu_pending_init(&bc->pending[1], &c->btree_trans_barrier, __bkey_cached_free))
- return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
- return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
bc->table_init_done = true;
shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name);
if (!shrink)
- return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
bc->shrink = shrink;
shrink->count_objects = bch2_btree_key_cache_count;
shrink->scan_objects = bch2_btree_key_cache_scan;
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 2f2aed0c9916..47035aae232e 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -194,6 +194,30 @@ static int btree_trans_abort_preference(struct btree_trans *trans)
return 3;
}
+static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
+{
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+
+ prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
+
+ for (struct trans_waiting_for_lock *i = g->g; i < g->g + g->nr; i++) {
+ struct btree_trans *trans = i->trans;
+
+ bch2_btree_trans_to_text(&buf, trans);
+
+ prt_printf(&buf, "backtrace:\n");
+ printbuf_indent_add(&buf, 2);
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
+ printbuf_indent_sub(&buf, 2);
+ prt_newline(&buf);
+ }
+
+ bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ BUG();
+}
+
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle,
struct trans_waiting_for_lock *from)
{
@@ -219,28 +243,8 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle,
}
}
- if (unlikely(!best)) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
-
- prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
-
- for (i = g->g; i < g->g + g->nr; i++) {
- struct btree_trans *trans = i->trans;
-
- bch2_btree_trans_to_text(&buf, trans);
-
- prt_printf(&buf, "backtrace:\n");
- printbuf_indent_add(&buf, 2);
- bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
- printbuf_indent_sub(&buf, 2);
- prt_newline(&buf);
- }
-
- bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
- BUG();
- }
+ if (unlikely(!best))
+ break_cycle_fail(g);
ret = abort_lock(g, abort);
out:
@@ -255,15 +259,14 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
struct printbuf *cycle)
{
struct btree_trans *orig_trans = g->g->trans;
- struct trans_waiting_for_lock *i;
- for (i = g->g; i < g->g + g->nr; i++)
+ for (struct trans_waiting_for_lock *i = g->g; i < g->g + g->nr; i++)
if (i->trans == trans) {
closure_put(&trans->ref);
return break_cycle(g, cycle, i);
}
- if (g->nr == ARRAY_SIZE(g->g)) {
+ if (unlikely(g->nr == ARRAY_SIZE(g->g))) {
closure_put(&trans->ref);
if (orig_trans->lock_may_not_fail)
@@ -308,7 +311,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
lock_graph_down(&g, trans);
/* trans->paths is rcu protected vs. freeing */
- rcu_read_lock();
+ guard(rcu)();
if (cycle)
cycle->atomic++;
next:
@@ -406,7 +409,6 @@ up:
out:
if (cycle)
--cycle->atomic;
- rcu_read_unlock();
return ret;
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 5a97a6b8a757..a35847734a60 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -363,6 +363,8 @@ static int handle_overwrites(struct bch_fs *c,
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
}
}
+
+ cond_resched();
}
return 0;
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 1c03c965d836..d9710801e3ee 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -376,7 +376,7 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
struct btree *b, unsigned u64s)
{
if (!bch2_btree_node_insert_fits(b, u64s))
- return -BCH_ERR_btree_insert_btree_node_full;
+ return bch_err_throw(trans->c, btree_insert_btree_node_full);
return 0;
}
@@ -394,9 +394,10 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
if (!new_k) {
- bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
+ struct bch_fs *c = trans->c;
+ bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_id_str(path->btree_id), new_u64s);
- return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+ return bch_err_throw(c, ENOMEM_btree_key_cache_insert);
}
ret = bch2_trans_relock(trans) ?:
@@ -432,7 +433,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
if (watermark < BCH_WATERMARK_reclaim &&
!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bch2_btree_key_cache_must_wait(c))
- return -BCH_ERR_btree_insert_need_journal_reclaim;
+ return bch_err_throw(c, btree_insert_need_journal_reclaim);
/*
* bch2_varint_decode can read past the end of the buffer by at most 7
@@ -894,7 +895,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < BCH_WATERMARK_reclaim) {
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ ret = bch_err_throw(c, journal_reclaim_would_deadlock);
goto out;
}
@@ -966,14 +967,27 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
for (struct jset_entry *i = btree_trans_journal_entries_start(trans);
i != btree_trans_journal_entries_top(trans);
- i = vstruct_next(i))
+ i = vstruct_next(i)) {
if (i->type == BCH_JSET_ENTRY_btree_keys ||
i->type == BCH_JSET_ENTRY_write_buffer_keys) {
- int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->start);
- if (ret)
- return ret;
+ jset_entry_for_each_key(i, k) {
+ int ret = bch2_journal_key_insert(c, i->btree_id, i->level, k);
+ if (ret)
+ return ret;
+ }
}
+ if (i->type == BCH_JSET_ENTRY_btree_root) {
+ guard(mutex)(&c->btree_root_lock);
+
+ struct btree_root *r = bch2_btree_id_root(c, i->btree_id);
+
+ bkey_copy(&r->key, i->start);
+ r->level = i->level;
+ r->alive = true;
+ }
+ }
+
for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
i != btree_trans_subbuf_top(trans, &trans->accounting);
i = bkey_next(i)) {
@@ -1011,7 +1025,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))
ret = do_bch2_trans_commit_to_journal_replay(trans);
else
- ret = -BCH_ERR_erofs_trans_commit;
+ ret = bch_err_throw(c, erofs_trans_commit);
goto out_reset;
}
@@ -1093,7 +1107,7 @@ err:
* restart:
*/
if (flags & BCH_TRANS_COMMIT_no_journal_res) {
- ret = -BCH_ERR_transaction_restart_nested;
+ ret = bch_err_throw(c, transaction_restart_nested);
goto out;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 9d641bf9d2a2..c61c4171ae50 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -555,6 +555,8 @@ struct btree_trans {
unsigned journal_u64s;
unsigned extra_disk_res; /* XXX kill */
+ __BKEY_PADDED(btree_path_down, BKEY_BTREE_PTR_VAL_U64s_MAX);
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index 5dac09c98026..e97e78c10f49 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -123,65 +123,44 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
}
int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
- enum btree_id id,
- struct bpos old_pos,
- struct bpos new_pos)
+ enum btree_id btree, struct bpos pos,
+ snapshot_id_list *s)
{
- struct bch_fs *c = trans->c;
- struct btree_iter old_iter, new_iter = {};
- struct bkey_s_c old_k, new_k;
- snapshot_id_list s;
- struct bkey_i *update;
int ret = 0;
- if (!bch2_snapshot_has_children(c, old_pos.snapshot))
- return 0;
-
- darray_init(&s);
+ darray_for_each(*s, id) {
+ pos.snapshot = *id;
- bch2_trans_iter_init(trans, &old_iter, id, old_pos,
- BTREE_ITER_not_extents|
- BTREE_ITER_all_snapshots);
- while ((old_k = bch2_btree_iter_prev(trans, &old_iter)).k &&
- !(ret = bkey_err(old_k)) &&
- bkey_eq(old_pos, old_k.k->p)) {
- struct bpos whiteout_pos =
- SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);
-
- if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
- snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
- continue;
-
- new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
- BTREE_ITER_not_extents|
- BTREE_ITER_intent);
- ret = bkey_err(new_k);
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, pos,
+ BTREE_ITER_not_extents|
+ BTREE_ITER_intent);
+ ret = bkey_err(k);
if (ret)
break;
- if (new_k.k->type == KEY_TYPE_deleted) {
- update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+ if (k.k->type == KEY_TYPE_deleted) {
+ struct bkey_i *update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
ret = PTR_ERR_OR_ZERO(update);
- if (ret)
+ if (ret) {
+ bch2_trans_iter_exit(trans, &iter);
break;
+ }
bkey_init(&update->k);
- update->k.p = whiteout_pos;
+ update->k.p = pos;
update->k.type = KEY_TYPE_whiteout;
- ret = bch2_trans_update(trans, &new_iter, update,
+ ret = bch2_trans_update(trans, &iter, update,
BTREE_UPDATE_internal_snapshot_node);
}
- bch2_trans_iter_exit(trans, &new_iter);
+ bch2_trans_iter_exit(trans, &iter);
- ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
if (ret)
break;
}
- bch2_trans_iter_exit(trans, &new_iter);
- bch2_trans_iter_exit(trans, &old_iter);
- darray_exit(&s);
+ darray_exit(s);
return ret;
}
@@ -608,7 +587,7 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
BUG_ON(k.k->type != KEY_TYPE_deleted);
if (bkey_gt(k.k->p, end)) {
- ret = -BCH_ERR_ENOSPC_btree_slot;
+ ret = bch_err_throw(trans->c, ENOSPC_btree_slot);
goto err;
}
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index f907eaa8b185..9feef1dc4de5 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -4,6 +4,7 @@
#include "btree_iter.h"
#include "journal.h"
+#include "snapshot.h"
struct bch_fs;
struct btree;
@@ -74,7 +75,7 @@ static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
}
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
- struct bpos, struct bpos);
+ struct bpos, snapshot_id_list *);
/*
* For use when splitting extents in existing snapshots:
@@ -88,11 +89,20 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
struct bpos old_pos,
struct bpos new_pos)
{
+ BUG_ON(old_pos.snapshot != new_pos.snapshot);
+
if (!btree_type_has_snapshots(btree) ||
bkey_eq(old_pos, new_pos))
return 0;
- return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
+ snapshot_id_list s;
+ int ret = bch2_get_snapshot_overwrites(trans, btree, old_pos, &s);
+ if (ret)
+ return ret;
+
+ return s.nr
+ ? __bch2_insert_snapshot_whiteouts(trans, btree, new_pos, &s)
+ : 0;
}
int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 74e65714fecd..d2ecb782919b 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -57,8 +57,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bkey_buf prev;
int ret = 0;
- printbuf_indent_add_nextline(&buf, 2);
-
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
b->data->min_key));
@@ -69,20 +67,23 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
if (b == btree_node_root(c, b)) {
if (!bpos_eq(b->data->min_key, POS_MIN)) {
- ret = __bch2_topology_error(c, &buf);
-
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "btree root with incorrect min_key: ");
bch2_bpos_to_text(&buf, b->data->min_key);
- log_fsck_err(trans, btree_root_bad_min_key,
- "btree root with incorrect min_key: %s", buf.buf);
- goto out;
+ prt_newline(&buf);
+
+ bch2_count_fsck_err(c, btree_root_bad_min_key, &buf);
+ goto err;
}
if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
- ret = __bch2_topology_error(c, &buf);
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "btree root with incorrect max_key: ");
bch2_bpos_to_text(&buf, b->data->max_key);
- log_fsck_err(trans, btree_root_bad_max_key,
- "btree root with incorrect max_key: %s", buf.buf);
- goto out;
+ prt_newline(&buf);
+
+ bch2_count_fsck_err(c, btree_root_bad_max_key, &buf);
+ goto err;
}
}
@@ -100,19 +101,15 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
: bpos_successor(prev.k->k.p);
if (!bpos_eq(expected_min, bp.v->min_key)) {
- ret = __bch2_topology_error(c, &buf);
-
- prt_str(&buf, "end of prev node doesn't match start of next node\nin ");
- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_str(&buf, " node ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "end of prev node doesn't match start of next node");
prt_str(&buf, "\nprev ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
prt_str(&buf, "\nnext ");
bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
- log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf);
- goto out;
+ bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &buf);
+ goto err;
}
bch2_bkey_buf_reassemble(&prev, c, k);
@@ -120,32 +117,34 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
}
if (bkey_deleted(&prev.k->k)) {
- ret = __bch2_topology_error(c, &buf);
-
- prt_str(&buf, "empty interior node\nin ");
- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_str(&buf, " node ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
-
- log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf);
- } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
- ret = __bch2_topology_error(c, &buf);
+ prt_printf(&buf, "empty interior node\n");
+ bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &buf);
+ goto err;
+ }
- prt_str(&buf, "last child node doesn't end at end of parent node\nin ");
- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_str(&buf, " node ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- prt_str(&buf, "\nlast key ");
+ if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
+ prt_str(&buf, "last child node doesn't end at end of parent node\nchild: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+ prt_newline(&buf);
- log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf);
+ bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &buf);
+ goto err;
}
out:
-fsck_err:
bch2_btree_and_journal_iter_exit(&iter);
bch2_bkey_buf_exit(&prev, c);
printbuf_exit(&buf);
return ret;
+err:
+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
+ prt_char(&buf, ' ');
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_newline(&buf);
+
+ ret = __bch2_topology_error(c, &buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ BUG_ON(!ret);
+ goto out;
}
/* Calculate ideal packed bkey format for new btree nodes: */
@@ -685,12 +684,31 @@ static void btree_update_nodes_written(struct btree_update *as)
/*
* Wait for any in flight writes to finish before we free the old nodes
- * on disk:
+ * on disk. But we haven't pinned those old nodes in the btree cache,
+ * they might have already been evicted.
+ *
+ * The update we're completing deleted references to those nodes from the
+ * btree, so we know if they've been evicted they can't be pulled back in.
+ * We just have to check if the nodes we have pointers to are still those
+ * old nodes, and haven't been reused.
+ *
+ * This can't be done locklessly because the data buffer might have been
+ * vmalloc allocated, and they're not RCU freed. We also need the
+ * __no_kmsan_checks annotation because even with the btree node read
+ * lock, nothing tells us that the data buffer has been initialized (if
+ * the btree node has been reused for a different node, and the data
+ * buffer swapped for a new data buffer).
*/
for (i = 0; i < as->nr_old_nodes; i++) {
b = as->old_nodes[i];
- if (btree_node_seq_matches(b, as->old_nodes_seq[i]))
+ bch2_trans_begin(trans);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ bool seq_matches = btree_node_seq_matches(b, as->old_nodes_seq[i]);
+ six_unlock_read(&b->c.lock);
+ bch2_trans_unlock_long(trans);
+
+ if (seq_matches)
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
TASK_UNINTERRUPTIBLE);
}
@@ -1245,7 +1263,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < BCH_WATERMARK_reclaim) {
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ ret = bch_err_throw(c, journal_reclaim_would_deadlock);
goto err;
}
@@ -2178,7 +2196,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter,
if (btree_iter_path(trans, iter)->l[b->c.level].b != b) {
/* node has been freed: */
BUG_ON(!btree_node_dying(b));
- ret = -BCH_ERR_btree_node_dying;
+ ret = bch_err_throw(trans->c, btree_node_dying);
goto err;
}
@@ -2792,16 +2810,16 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
c->btree_interior_update_worker =
alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 8);
if (!c->btree_interior_update_worker)
- return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
+ return bch_err_throw(c, ENOMEM_btree_interior_update_worker_init);
c->btree_node_rewrite_worker =
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
if (!c->btree_node_rewrite_worker)
- return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
+ return bch_err_throw(c, ENOMEM_btree_interior_update_worker_init);
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
sizeof(struct btree_update)))
- return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
+ return bch_err_throw(c, ENOMEM_btree_interior_update_pool_init);
return 0;
}
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index efb0c64d0aac..90b21e61d2b6 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -394,7 +394,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bool accounting_accumulated = false;
do {
if (race_fault()) {
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ ret = bch_err_throw(c, journal_reclaim_would_deadlock);
break;
}
@@ -633,7 +633,7 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
struct bch_fs *c = trans->c;
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer))
- return -BCH_ERR_erofs_no_writes;
+ return bch_err_throw(c, erofs_no_writes);
int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
@@ -676,7 +676,7 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
goto err;
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
- ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+ ret = bch_err_throw(c, transaction_restart_write_buffer_flush);
}
err:
bch2_bkey_buf_exit(&tmp, c);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 09eb5a543ae4..f25903c10e8a 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -221,6 +221,20 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
if (!p.ptr.cached &&
data_type == BCH_DATA_btree) {
+ switch (g->data_type) {
+ case BCH_DATA_sb:
+ bch_err(c, "btree and superblock in the same bucket - cannot repair");
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
+ goto out;
+ case BCH_DATA_journal:
+ ret = bch2_dev_journal_bucket_delete(ca, PTR_BUCKET_NR(ca, &p.ptr));
+ bch_err_msg(c, ret, "error deleting journal bucket %zu",
+ PTR_BUCKET_NR(ca, &p.ptr));
+ if (ret)
+ goto out;
+ break;
+ }
+
g->data_type = data_type;
g->stripe_sectors = 0;
g->dirty_sectors = 0;
@@ -270,6 +284,9 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
+ /* We don't yet do btree key updates correctly for when we're RW */
+ BUG_ON(test_bit(BCH_FS_rw, &c->flags));
+
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
if (ret)
@@ -277,20 +294,13 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
}
if (do_update) {
- if (flags & BTREE_TRIGGER_is_root) {
- bch_err(c, "cannot update btree roots yet");
- ret = -EINVAL;
- goto err;
- }
-
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
goto err;
- rcu_read_lock();
- bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev));
- rcu_read_unlock();
+ scoped_guard(rcu)
+ bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev));
if (level) {
/*
@@ -299,14 +309,11 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
* sort it out:
*/
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
- rcu_read_lock();
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- struct bucket *g = PTR_GC_BUCKET(ca, ptr);
-
- ptr->gen = g->gen;
- }
- rcu_read_unlock();
+ scoped_guard(rcu)
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
+ ptr->gen = PTR_GC_BUCKET(ca, ptr)->gen;
+ }
} else {
struct bkey_ptrs ptrs;
union bch_extent_entry *entry;
@@ -370,19 +377,41 @@ found:
bch_info(c, "new key %s", buf.buf);
}
- struct btree_iter iter;
- bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
- BTREE_ITER_intent|BTREE_ITER_all_snapshots);
- ret = bch2_btree_iter_traverse(trans, &iter) ?:
- bch2_trans_update(trans, &iter, new,
- BTREE_UPDATE_internal_snapshot_node|
- BTREE_TRIGGER_norun);
- bch2_trans_iter_exit(trans, &iter);
- if (ret)
- goto err;
+ if (!(flags & BTREE_TRIGGER_is_root)) {
+ struct btree_iter iter;
+ bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
+ BTREE_ITER_intent|BTREE_ITER_all_snapshots);
+ ret = bch2_btree_iter_traverse(trans, &iter) ?:
+ bch2_trans_update(trans, &iter, new,
+ BTREE_UPDATE_internal_snapshot_node|
+ BTREE_TRIGGER_norun);
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ goto err;
+
+ if (level)
+ bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
+ } else {
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans,
+ jset_u64s(new->k.u64s));
+ ret = PTR_ERR_OR_ZERO(e);
+ if (ret)
+ goto err;
+
+ journal_entry_set(e,
+ BCH_JSET_ENTRY_btree_root,
+ btree, level - 1,
+ new, new->k.u64s);
- if (level)
- bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
+ /*
+ * no locking, we're single threaded and not rw yet, see
+ * the big assertino above that we repeat here:
+ */
+ BUG_ON(test_bit(BCH_FS_rw, &c->flags));
+
+ struct btree *b = bch2_btree_id_root(c, btree)->b;
+ bkey_copy(&b->key, new);
+ }
}
err:
printbuf_exit(&buf);
@@ -406,7 +435,15 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf
if (insert) {
bch2_trans_updates_to_text(buf, trans);
__bch2_inconsistent_error(c, buf);
- ret = -BCH_ERR_bucket_ref_update;
+ /*
+ * If we're in recovery, run_explicit_recovery_pass might give
+ * us an error code for rewinding recovery
+ */
+ if (!ret)
+ ret = bch_err_throw(c, bucket_ref_update);
+ } else {
+ /* Always ignore overwrite errors, so that deletion works */
+ ret = 0;
}
if (print || insert)
@@ -595,7 +632,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (unlikely(!ca)) {
if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
- ret = -BCH_ERR_trigger_pointer;
+ ret = bch_err_throw(c, trigger_pointer);
goto err;
}
@@ -603,7 +640,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
if (!bucket_valid(ca, bucket.offset)) {
if (insert) {
bch2_dev_bucket_missing(ca, bucket.offset);
- ret = -BCH_ERR_trigger_pointer;
+ ret = bch_err_throw(c, trigger_pointer);
}
goto err;
}
@@ -625,7 +662,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
p.ptr.dev,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- ret = -BCH_ERR_trigger_pointer;
+ ret = bch_err_throw(c, trigger_pointer);
goto err;
}
@@ -651,6 +688,8 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
s64 sectors,
enum btree_iter_update_trigger_flags flags)
{
+ struct bch_fs *c = trans->c;
+
if (flags & BTREE_TRIGGER_transactional) {
struct btree_iter iter;
struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
@@ -668,7 +707,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
bch2_trans_inconsistent(trans,
"stripe pointer doesn't match stripe %llu",
(u64) p.ec.idx);
- ret = -BCH_ERR_trigger_stripe_pointer;
+ ret = bch_err_throw(c, trigger_stripe_pointer);
goto err;
}
@@ -688,13 +727,11 @@ err:
}
if (flags & BTREE_TRIGGER_gc) {
- struct bch_fs *c = trans->c;
-
struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
if (!m) {
bch_err(c, "error allocating memory for gc_stripes, idx %llu",
(u64) p.ec.idx);
- return -BCH_ERR_ENOMEM_mark_stripe_ptr;
+ return bch_err_throw(c, ENOMEM_mark_stripe_ptr);
}
gc_stripe_lock(m);
@@ -709,7 +746,7 @@ err:
__bch2_inconsistent_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_trigger_stripe_pointer;
+ return bch_err_throw(c, trigger_stripe_pointer);
}
m->block_sectors[p.ec.block] += sectors;
@@ -732,8 +769,7 @@ err:
static int __trigger_extent(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k,
- enum btree_iter_update_trigger_flags flags,
- s64 *replicas_sectors)
+ enum btree_iter_update_trigger_flags flags)
{
bool gc = flags & BTREE_TRIGGER_gc;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -744,6 +780,8 @@ static int __trigger_extent(struct btree_trans *trans,
: BCH_DATA_user;
int ret = 0;
+ s64 replicas_sectors = 0;
+
struct disk_accounting_pos acc_replicas_key;
memset(&acc_replicas_key, 0, sizeof(acc_replicas_key));
acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas;
@@ -770,7 +808,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (ret)
return ret;
} else if (!p.has_ec) {
- *replicas_sectors += disk_sectors;
+ replicas_sectors += disk_sectors;
replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev);
} else {
ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
@@ -808,13 +846,13 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (acc_replicas_key.replicas.nr_devs) {
- ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc);
+ ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc);
if (ret)
return ret;
}
if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) {
- ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, snapshot, k.k->p.snapshot);
+ ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, snapshot, k.k->p.snapshot);
if (ret)
return ret;
}
@@ -830,7 +868,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (level) {
- ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, btree, btree_id);
+ ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, btree, btree_id);
if (ret)
return ret;
} else {
@@ -839,7 +877,7 @@ static int __trigger_extent(struct btree_trans *trans,
s64 v[3] = {
insert ? 1 : -1,
insert ? k.k->size : -((s64) k.k->size),
- *replicas_sectors,
+ replicas_sectors,
};
ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode);
if (ret)
@@ -871,20 +909,16 @@ int bch2_trigger_extent(struct btree_trans *trans,
return 0;
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
- s64 old_replicas_sectors = 0, new_replicas_sectors = 0;
-
if (old.k->type) {
int ret = __trigger_extent(trans, btree, level, old,
- flags & ~BTREE_TRIGGER_insert,
- &old_replicas_sectors);
+ flags & ~BTREE_TRIGGER_insert);
if (ret)
return ret;
}
if (new.k->type) {
int ret = __trigger_extent(trans, btree, level, new.s_c,
- flags & ~BTREE_TRIGGER_overwrite,
- &new_replicas_sectors);
+ flags & ~BTREE_TRIGGER_overwrite);
if (ret)
return ret;
}
@@ -971,15 +1005,16 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
bch2_data_type_str(type),
bch2_data_type_str(type));
- bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
+ bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
- bch2_run_explicit_recovery_pass(c, &buf,
+ ret = bch2_run_explicit_recovery_pass(c, &buf,
BCH_RECOVERY_PASS_check_allocations, 0);
- if (print)
- bch2_print_str(c, KERN_ERR, buf.buf);
+ /* Always print, this is always fatal */
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
- ret = -BCH_ERR_metadata_bucket_inconsistency;
+ if (!ret)
+ ret = bch_err_throw(c, metadata_bucket_inconsistency);
goto err;
}
@@ -1032,7 +1067,7 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
err_unlock:
bucket_unlock(g);
err:
- return -BCH_ERR_metadata_bucket_inconsistency;
+ return bch_err_throw(c, metadata_bucket_inconsistency);
}
int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
@@ -1247,7 +1282,7 @@ recalculate:
ret = 0;
} else {
atomic64_set(&c->sectors_available, sectors_available);
- ret = -BCH_ERR_ENOSPC_disk_reservation;
+ ret = bch_err_throw(c, ENOSPC_disk_reservation);
}
mutex_unlock(&c->sectors_available_lock);
@@ -1276,7 +1311,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c)
GFP_KERNEL|__GFP_ZERO);
if (!ca->buckets_nouse) {
bch2_dev_put(ca);
- return -BCH_ERR_ENOMEM_buckets_nouse;
+ return bch_err_throw(c, ENOMEM_buckets_nouse);
}
}
@@ -1301,12 +1336,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
lockdep_assert_held(&c->state_lock);
if (resize && ca->buckets_nouse)
- return -BCH_ERR_no_resize_with_buckets_nouse;
+ return bch_err_throw(c, no_resize_with_buckets_nouse);
bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets),
GFP_KERNEL|__GFP_ZERO);
if (!bucket_gens) {
- ret = -BCH_ERR_ENOMEM_bucket_gens;
+ ret = bch_err_throw(c, ENOMEM_bucket_gens);
goto err;
}
@@ -1325,9 +1360,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
sizeof(bucket_gens->b[0]) * copy);
}
- ret = bch2_bucket_bitmap_resize(&ca->bucket_backpointer_mismatch,
+ ret = bch2_bucket_bitmap_resize(ca, &ca->bucket_backpointer_mismatch,
ca->mi.nbuckets, nbuckets) ?:
- bch2_bucket_bitmap_resize(&ca->bucket_backpointer_empty,
+ bch2_bucket_bitmap_resize(ca, &ca->bucket_backpointer_empty,
ca->mi.nbuckets, nbuckets);
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
@@ -1354,7 +1389,7 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
{
ca->usage = alloc_percpu(struct bch_dev_usage_full);
if (!ca->usage)
- return -BCH_ERR_ENOMEM_usage_init;
+ return bch_err_throw(c, ENOMEM_usage_init);
return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
}
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index af1532de4a37..49a3807a5eab 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -84,10 +84,8 @@ static inline int bucket_gen_get_rcu(struct bch_dev *ca, size_t b)
static inline int bucket_gen_get(struct bch_dev *ca, size_t b)
{
- rcu_read_lock();
- int ret = bucket_gen_get_rcu(ca, b);
- rcu_read_unlock();
- return ret;
+ guard(rcu)();
+ return bucket_gen_get_rcu(ca, b);
}
static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
@@ -156,10 +154,8 @@ static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_
*/
static inline int dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr *ptr)
{
- rcu_read_lock();
- int ret = dev_ptr_stale_rcu(ca, ptr);
- rcu_read_unlock();
- return ret;
+ guard(rcu)();
+ return dev_ptr_stale_rcu(ca, ptr);
}
/* Device usage: */
diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c
index c8a488e6b7b8..832eff93acb6 100644
--- a/fs/bcachefs/buckets_waiting_for_journal.c
+++ b/fs/bcachefs/buckets_waiting_for_journal.c
@@ -108,7 +108,8 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
realloc:
n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
if (!n) {
- ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set;
+ struct bch_fs *c = container_of(b, struct bch_fs, buckets_waiting_for_journal);
+ ret = bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set);
goto out;
}
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 4066946b26bc..2d38466eddfd 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -613,15 +613,12 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
if (!dev)
return -EINVAL;
- rcu_read_lock();
+ guard(rcu)();
for_each_online_member_rcu(c, ca)
- if (ca->dev == dev) {
- rcu_read_unlock();
+ if (ca->dev == dev)
return ca->dev_idx;
- }
- rcu_read_unlock();
- return -BCH_ERR_ENOENT_dev_idx_not_found;
+ return bch_err_throw(c, ENOENT_dev_idx_not_found);
}
static long bch2_ioctl_disk_resize(struct bch_fs *c,
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index d3e2e4f776c6..a6795e73f0b9 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -173,7 +173,7 @@ int bch2_encrypt(struct bch_fs *c, unsigned type,
if (bch2_fs_inconsistent_on(!c->chacha20_key_set,
c, "attempting to encrypt without encryption key"))
- return -BCH_ERR_no_encryption_key;
+ return bch_err_throw(c, no_encryption_key);
bch2_chacha20(&c->chacha20_key, nonce, data, len);
return 0;
@@ -262,7 +262,7 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
if (bch2_fs_inconsistent_on(!c->chacha20_key_set,
c, "attempting to encrypt without encryption key"))
- return -BCH_ERR_no_encryption_key;
+ return bch_err_throw(c, no_encryption_key);
bch2_chacha20_init(&chacha_state, &c->chacha20_key, nonce);
@@ -375,7 +375,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
prt_str(&buf, ")");
WARN_RATELIMIT(1, "%s", buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_recompute_checksum;
+ return bch_err_throw(c, recompute_checksum);
}
for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
@@ -659,7 +659,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
crypt = bch2_sb_field_resize(&c->disk_sb, crypt,
sizeof(*crypt) / sizeof(u64));
if (!crypt) {
- ret = -BCH_ERR_ENOSPC_sb_crypt;
+ ret = bch_err_throw(c, ENOSPC_sb_crypt);
goto err;
}
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index f57f9f4774e6..8e9264b5a84e 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -53,7 +53,6 @@ void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
struct io_clock_wait {
struct io_timer io_timer;
- struct timer_list cpu_timer;
struct task_struct *task;
int expired;
};
@@ -67,15 +66,6 @@ static void io_clock_wait_fn(struct io_timer *timer)
wake_up_process(wait->task);
}
-static void io_clock_cpu_timeout(struct timer_list *timer)
-{
- struct io_clock_wait *wait = container_of(timer,
- struct io_clock_wait, cpu_timer);
-
- wait->expired = 1;
- wake_up_process(wait->task);
-}
-
void bch2_io_clock_schedule_timeout(struct io_clock *clock, u64 until)
{
struct io_clock_wait wait = {
@@ -90,8 +80,8 @@ void bch2_io_clock_schedule_timeout(struct io_clock *clock, u64 until)
bch2_io_timer_del(clock, &wait.io_timer);
}
-void bch2_kthread_io_clock_wait(struct io_clock *clock,
- u64 io_until, unsigned long cpu_timeout)
+unsigned long bch2_kthread_io_clock_wait_once(struct io_clock *clock,
+ u64 io_until, unsigned long cpu_timeout)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct io_clock_wait wait = {
@@ -103,27 +93,26 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
bch2_io_timer_add(clock, &wait.io_timer);
- timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
-
- if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
- mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
-
- do {
- set_current_state(TASK_INTERRUPTIBLE);
- if (kthread && kthread_should_stop())
- break;
-
- if (wait.expired)
- break;
-
- schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!(kthread && kthread_should_stop())) {
+ cpu_timeout = schedule_timeout(cpu_timeout);
try_to_freeze();
- } while (0);
+ }
__set_current_state(TASK_RUNNING);
- timer_delete_sync(&wait.cpu_timer);
- timer_destroy_on_stack(&wait.cpu_timer);
bch2_io_timer_del(clock, &wait.io_timer);
+ return cpu_timeout;
+}
+
+void bch2_kthread_io_clock_wait(struct io_clock *clock,
+ u64 io_until, unsigned long cpu_timeout)
+{
+ bool kthread = (current->flags & PF_KTHREAD) != 0;
+
+ while (!(kthread && kthread_should_stop()) &&
+ cpu_timeout &&
+ atomic64_read(&clock->now) < io_until)
+ cpu_timeout = bch2_kthread_io_clock_wait_once(clock, io_until, cpu_timeout);
}
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h
index 82c79c8baf92..8769be2aa21e 100644
--- a/fs/bcachefs/clock.h
+++ b/fs/bcachefs/clock.h
@@ -4,6 +4,7 @@
void bch2_io_timer_add(struct io_clock *, struct io_timer *);
void bch2_io_timer_del(struct io_clock *, struct io_timer *);
+unsigned long bch2_kthread_io_clock_wait_once(struct io_clock *, u64, unsigned long);
void bch2_kthread_io_clock_wait(struct io_clock *, u64, unsigned long);
void __bch2_increment_clock(struct io_clock *, u64);
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index 1bca61d17092..b37b1f325f0a 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -187,7 +187,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
__bch2_compression_types[crc.compression_type]))
ret = bch2_check_set_has_compressed_data(c, opt);
else
- ret = -BCH_ERR_compression_workspace_not_initialized;
+ ret = bch_err_throw(c, compression_workspace_not_initialized);
if (ret)
goto err;
}
@@ -200,7 +200,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
ret2 = LZ4_decompress_safe_partial(src_data.b, dst_data,
src_len, dst_len, dst_len);
if (ret2 != dst_len)
- ret = -BCH_ERR_decompress_lz4;
+ ret = bch_err_throw(c, decompress_lz4);
break;
case BCH_COMPRESSION_TYPE_gzip: {
z_stream strm = {
@@ -219,7 +219,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
mempool_free(workspace, workspace_pool);
if (ret2 != Z_STREAM_END)
- ret = -BCH_ERR_decompress_gzip;
+ ret = bch_err_throw(c, decompress_gzip);
break;
}
case BCH_COMPRESSION_TYPE_zstd: {
@@ -227,7 +227,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
size_t real_src_len = le32_to_cpup(src_data.b);
if (real_src_len > src_len - 4) {
- ret = -BCH_ERR_decompress_zstd_src_len_bad;
+ ret = bch_err_throw(c, decompress_zstd_src_len_bad);
goto err;
}
@@ -241,7 +241,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
mempool_free(workspace, workspace_pool);
if (ret2 != dst_len)
- ret = -BCH_ERR_decompress_zstd;
+ ret = bch_err_throw(c, decompress_zstd);
break;
}
default:
@@ -270,7 +270,7 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
bch2_write_op_error(op, op->pos.offset,
"extent too big to decompress (%u > %u)",
crc->uncompressed_size << 9, c->opts.encoded_extent_max);
- return -BCH_ERR_decompress_exceeded_max_encoded_extent;
+ return bch_err_throw(c, decompress_exceeded_max_encoded_extent);
}
data = __bounce_alloc(c, dst_len, WRITE);
@@ -314,7 +314,7 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
crc.compressed_size << 9 > c->opts.encoded_extent_max)
- return -BCH_ERR_decompress_exceeded_max_encoded_extent;
+ return bch_err_throw(c, decompress_exceeded_max_encoded_extent);
dst_data = dst_len == dst_iter.bi_size
? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
@@ -656,12 +656,12 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
if (!mempool_initialized(&c->compression_bounce[READ]) &&
mempool_init_kvmalloc_pool(&c->compression_bounce[READ],
1, c->opts.encoded_extent_max))
- return -BCH_ERR_ENOMEM_compression_bounce_read_init;
+ return bch_err_throw(c, ENOMEM_compression_bounce_read_init);
if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
mempool_init_kvmalloc_pool(&c->compression_bounce[WRITE],
1, c->opts.encoded_extent_max))
- return -BCH_ERR_ENOMEM_compression_bounce_write_init;
+ return bch_err_throw(c, ENOMEM_compression_bounce_write_init);
for (i = compression_types;
i < compression_types + ARRAY_SIZE(compression_types);
@@ -675,7 +675,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
if (mempool_init_kvmalloc_pool(
&c->compress_workspace[i->type],
1, i->compress_workspace))
- return -BCH_ERR_ENOMEM_compression_workspace_init;
+ return bch_err_throw(c, ENOMEM_compression_workspace_init);
}
return 0;
diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h
index 50ec3decfe8c..4080ee99aadd 100644
--- a/fs/bcachefs/darray.h
+++ b/fs/bcachefs/darray.h
@@ -8,6 +8,7 @@
* Inspired by CCAN's darray
*/
+#include <linux/cleanup.h>
#include <linux/slab.h>
#define DARRAY_PREALLOCATED(_type, _nr) \
@@ -87,7 +88,23 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
#define darray_remove_item(_d, _pos) \
array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
-#define __darray_for_each(_d, _i) \
+#define darray_find_p(_d, _i, cond) \
+({ \
+ typeof((_d).data) _ret = NULL; \
+ \
+ darray_for_each(_d, _i) \
+ if (cond) { \
+ _ret = _i; \
+ break; \
+ } \
+ _ret; \
+})
+
+#define darray_find(_d, _item) darray_find_p(_d, _i, *_i == _item)
+
+/* Iteration: */
+
+#define __darray_for_each(_d, _i) \
for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++)
#define darray_for_each(_d, _i) \
@@ -96,6 +113,8 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
#define darray_for_each_reverse(_d, _i) \
for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i)
+/* Init/exit */
+
#define darray_init(_d) \
do { \
(_d)->nr = 0; \
@@ -111,4 +130,29 @@ do { \
darray_init(_d); \
} while (0)
+#define DEFINE_DARRAY_CLASS(_type) \
+DEFINE_CLASS(_type, _type, darray_exit(&(_T)), (_type) {}, void)
+
+#define DEFINE_DARRAY(_type) \
+typedef DARRAY(_type) darray_##_type; \
+DEFINE_DARRAY_CLASS(darray_##_type)
+
+#define DEFINE_DARRAY_NAMED(_name, _type) \
+typedef DARRAY(_type) _name; \
+DEFINE_DARRAY_CLASS(_name)
+
+DEFINE_DARRAY_CLASS(darray_char);
+DEFINE_DARRAY_CLASS(darray_str)
+DEFINE_DARRAY_CLASS(darray_const_str)
+
+DEFINE_DARRAY_CLASS(darray_u8)
+DEFINE_DARRAY_CLASS(darray_u16)
+DEFINE_DARRAY_CLASS(darray_u32)
+DEFINE_DARRAY_CLASS(darray_u64)
+
+DEFINE_DARRAY_CLASS(darray_s8)
+DEFINE_DARRAY_CLASS(darray_s16)
+DEFINE_DARRAY_CLASS(darray_s32)
+DEFINE_DARRAY_CLASS(darray_s64)
+
#endif /* _BCACHEFS_DARRAY_H */
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index c34e5b88ba9d..5f1174348974 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -66,37 +66,46 @@ static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k)
}
}
-static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_s_c k)
+static noinline_for_stack
+bool __bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_ptrs_c ptrs,
+ const struct bch_extent_ptr *start)
{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ if (!ctxt) {
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr == start)
+ break;
+
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+ struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
+ bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0);
+ }
+ return false;
+ }
- bkey_for_each_ptr(ptrs, ptr) {
+ __bkey_for_each_ptr(start, ptrs.end, ptr) {
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
- if (ctxt) {
- bool locked;
-
- move_ctxt_wait_event(ctxt,
- (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) ||
- list_empty(&ctxt->ios));
+ bool locked;
+ move_ctxt_wait_event(ctxt,
+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) ||
+ list_empty(&ctxt->ios));
+ if (!locked)
+ bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0);
+ }
+ return true;
+}
- if (!locked)
- bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0);
- } else {
- if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) {
- bkey_for_each_ptr(ptrs, ptr2) {
- if (ptr2 == ptr)
- break;
+static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_ptrs_c ptrs)
+{
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+ struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
- ca = bch2_dev_have_ref(c, ptr2->dev);
- bucket = PTR_BUCKET_POS(ca, ptr2);
- bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0);
- }
- return false;
- }
- }
+ if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0))
+ return __bkey_nocow_lock(c, ctxt, ptrs, ptr);
}
+
return true;
}
@@ -246,7 +255,7 @@ static int data_update_invalid_bkey(struct data_update *m,
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_invalid_bkey;
+ return bch_err_throw(c, invalid_bkey);
}
static int __bch2_data_update_index_update(struct btree_trans *trans,
@@ -367,21 +376,21 @@ restart_drop_conflicting_replicas:
bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
/* Now, drop excess replicas: */
- rcu_read_lock();
+ scoped_guard(rcu) {
restart_drop_extra_replicas:
- bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
- unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
+ unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
- if (!p.ptr.cached &&
- durability - ptr_durability >= m->op.opts.data_replicas) {
- durability -= ptr_durability;
+ if (!p.ptr.cached &&
+ durability - ptr_durability >= m->op.opts.data_replicas) {
+ durability -= ptr_durability;
- bch2_extent_ptr_set_cached(c, &m->op.opts,
- bkey_i_to_s(insert), &entry->ptr);
- goto restart_drop_extra_replicas;
+ bch2_extent_ptr_set_cached(c, &m->op.opts,
+ bkey_i_to_s(insert), &entry->ptr);
+ goto restart_drop_extra_replicas;
+ }
}
}
- rcu_read_unlock();
/* Finally, add the pointers we just wrote: */
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
@@ -523,8 +532,9 @@ void bch2_data_update_exit(struct data_update *update)
bch2_bkey_buf_exit(&update->k, c);
}
-static int bch2_update_unwritten_extent(struct btree_trans *trans,
- struct data_update *update)
+static noinline_for_stack
+int bch2_update_unwritten_extent(struct btree_trans *trans,
+ struct data_update *update)
{
struct bch_fs *c = update->op.c;
struct bkey_i_extent *e;
@@ -716,18 +726,10 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
}
-int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
- struct bch_io_opts *io_opts)
+static int __bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
+ struct bch_io_opts *io_opts,
+ unsigned buf_bytes)
{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
-
- /* write path might have to decompress data: */
- unsigned buf_bytes = 0;
- bkey_for_each_ptr_decode(&m->k.k->k, ptrs, p, entry)
- buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
-
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
@@ -751,11 +753,26 @@ int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
return 0;
}
+int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
+ struct bch_io_opts *io_opts)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ /* write path might have to decompress data: */
+ unsigned buf_bytes = 0;
+ bkey_for_each_ptr_decode(&m->k.k->k, ptrs, p, entry)
+ buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
+
+ return __bch2_data_update_bios_init(m, c, io_opts, buf_bytes);
+}
+
static int can_write_extent(struct bch_fs *c, struct data_update *m)
{
if ((m->op.flags & BCH_WRITE_alloc_nowait) &&
unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark)))
- return -BCH_ERR_data_update_done_would_block;
+ return bch_err_throw(c, data_update_done_would_block);
unsigned target = m->op.flags & BCH_WRITE_only_specified_devs
? m->op.target
@@ -765,7 +782,8 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
darray_for_each(m->op.devs_have, i)
__clear_bit(*i, devs.d);
- rcu_read_lock();
+ guard(rcu)();
+
unsigned nr_replicas = 0, i;
for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) {
struct bch_dev *ca = bch2_dev_rcu_noerror(c, i);
@@ -782,12 +800,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
if (nr_replicas >= m->op.nr_replicas)
break;
}
- rcu_read_unlock();
if (!nr_replicas)
- return -BCH_ERR_data_update_done_no_rw_devs;
+ return bch_err_throw(c, data_update_done_no_rw_devs);
if (nr_replicas < m->op.nr_replicas)
- return -BCH_ERR_insufficient_devices;
+ return bch_err_throw(c, insufficient_devices);
return 0;
}
@@ -802,19 +819,21 @@ int bch2_data_update_init(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- unsigned reserve_sectors = k.k->size * data_opts.extra_replicas;
int ret = 0;
- /*
- * fs is corrupt we have a key for a snapshot node that doesn't exist,
- * and we have to check for this because we go rw before repairing the
- * snapshots table - just skip it, we can move it later.
- */
- if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot)))
- return -BCH_ERR_data_update_done_no_snapshot;
+ if (k.k->p.snapshot) {
+ ret = bch2_check_key_has_snapshot(trans, iter, k);
+ if (bch2_err_matches(ret, BCH_ERR_recovery_will_run)) {
+ /* Can't repair yet, waiting on other recovery passes */
+ return bch_err_throw(c, data_update_done_no_snapshot);
+ }
+ if (ret < 0)
+ return ret;
+ if (ret) /* key was deleted */
+ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ bch_err_throw(c, data_update_done_no_snapshot);
+ ret = 0;
+ }
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -842,10 +861,17 @@ int bch2_data_update_init(struct btree_trans *trans,
unsigned durability_have = 0, durability_removing = 0;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned reserve_sectors = k.k->size * data_opts.extra_replicas;
+ unsigned buf_bytes = 0;
+ bool unwritten = false;
+
unsigned ptr_bit = 1;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (!p.ptr.cached) {
- rcu_read_lock();
+ guard(rcu)();
if (ptr_bit & m->data_opts.rewrite_ptrs) {
if (crc_is_compressed(p.crc))
reserve_sectors += k.k->size;
@@ -856,7 +882,6 @@ int bch2_data_update_init(struct btree_trans *trans,
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
durability_have += bch2_extent_ptr_durability(c, &p);
}
- rcu_read_unlock();
}
/*
@@ -872,6 +897,9 @@ int bch2_data_update_init(struct btree_trans *trans,
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
m->op.incompressible = true;
+ buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
+ unwritten |= p.ptr.unwritten;
+
ptr_bit <<= 1;
}
@@ -910,7 +938,7 @@ int bch2_data_update_init(struct btree_trans *trans,
if (iter)
ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts);
if (!ret)
- ret = -BCH_ERR_data_update_done_no_writes_needed;
+ ret = bch_err_throw(c, data_update_done_no_writes_needed);
goto out_bkey_buf_exit;
}
@@ -941,23 +969,25 @@ int bch2_data_update_init(struct btree_trans *trans,
}
if (!bkey_get_dev_refs(c, k)) {
- ret = -BCH_ERR_data_update_done_no_dev_refs;
+ ret = bch_err_throw(c, data_update_done_no_dev_refs);
goto out_put_disk_res;
}
if (c->opts.nocow_enabled &&
- !bkey_nocow_lock(c, ctxt, k)) {
- ret = -BCH_ERR_nocow_lock_blocked;
+ !bkey_nocow_lock(c, ctxt, ptrs)) {
+ ret = bch_err_throw(c, nocow_lock_blocked);
goto out_put_dev_refs;
}
- if (bkey_extent_is_unwritten(k)) {
+ if (unwritten) {
ret = bch2_update_unwritten_extent(trans, m) ?:
- -BCH_ERR_data_update_done_unwritten;
+ bch_err_throw(c, data_update_done_unwritten);
goto out_nocow_unlock;
}
- ret = bch2_data_update_bios_init(m, c, io_opts);
+ bch2_trans_unlock(trans);
+
+ ret = __bch2_data_update_bios_init(m, c, io_opts, buf_bytes);
if (ret)
goto out_nocow_unlock;
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 4fa70634c90e..901f643ead83 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -492,6 +492,8 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
prt_printf(out, "journal pin %px:\t%llu\n",
&b->writes[1].journal, b->writes[1].journal.seq);
+ prt_printf(out, "ob:\t%u\n", b->ob.nr);
+
printbuf_indent_sub(out, 2);
}
@@ -508,27 +510,27 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
i->ret = 0;
do {
- struct bucket_table *tbl;
- struct rhash_head *pos;
- struct btree *b;
-
ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
- rcu_read_lock();
i->buf.atomic++;
- tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
- &c->btree_cache.table);
- if (i->iter < tbl->size) {
- rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
- bch2_cached_btree_node_to_text(&i->buf, c, b);
- i->iter++;
- } else {
- done = true;
+ scoped_guard(rcu) {
+ struct bucket_table *tbl =
+ rht_dereference_rcu(c->btree_cache.table.tbl,
+ &c->btree_cache.table);
+ if (i->iter < tbl->size) {
+ struct rhash_head *pos;
+ struct btree *b;
+
+ rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
+ bch2_cached_btree_node_to_text(&i->buf, c, b);
+ i->iter++;
+ } else {
+ done = true;
+ }
}
--i->buf.atomic;
- rcu_read_unlock();
} while (!done);
if (i->buf.allocation_failure)
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index d198001838f3..300f7cc8abdf 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -231,70 +231,64 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
}
-static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
- subvol_inum dir,
- u8 type,
- int name_len, int cf_name_len,
- u64 dst)
+int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
+ const struct bch_hash_info *hash_info,
+ const struct qstr *name,
+ const struct qstr *cf_name)
{
- struct bkey_i_dirent *dirent;
- unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len);
+ EBUG_ON(hash_info->cf_encoding == NULL && cf_name);
+ int cf_len = 0;
- BUG_ON(u64s > U8_MAX);
-
- dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
- if (IS_ERR(dirent))
- return dirent;
+ if (name->len > BCH_NAME_MAX)
+ return -ENAMETOOLONG;
- bkey_dirent_init(&dirent->k_i);
- dirent->k.u64s = u64s;
+ dirent->v.d_casefold = hash_info->cf_encoding != NULL;
- if (type != DT_SUBVOL) {
- dirent->v.d_inum = cpu_to_le64(dst);
+ if (!dirent->v.d_casefold) {
+ memcpy(&dirent->v.d_name[0], name->name, name->len);
+ memset(&dirent->v.d_name[name->len], 0,
+ bkey_val_bytes(&dirent->k) -
+ offsetof(struct bch_dirent, d_name) -
+ name->len);
} else {
- dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
- dirent->v.d_child_subvol = cpu_to_le32(dst);
- }
+#ifdef CONFIG_UNICODE
+ memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
- dirent->v.d_type = type;
- dirent->v.d_unused = 0;
- dirent->v.d_casefold = cf_name_len ? 1 : 0;
+ char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len];
- return dirent;
-}
+ if (cf_name) {
+ cf_len = cf_name->len;
-static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
- const struct qstr *name)
-{
- EBUG_ON(dirent->v.d_casefold);
+ memcpy(cf_out, cf_name->name, cf_name->len);
+ } else {
+ cf_len = utf8_casefold(hash_info->cf_encoding, name,
+ cf_out,
+ bkey_val_end(bkey_i_to_s(&dirent->k_i)) - (void *) cf_out);
+ if (cf_len <= 0)
+ return cf_len;
+ }
- memcpy(&dirent->v.d_name[0], name->name, name->len);
- memset(&dirent->v.d_name[name->len], 0,
- bkey_val_bytes(&dirent->k) -
- offsetof(struct bch_dirent, d_name) -
- name->len);
-}
+ memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_len], 0,
+ bkey_val_bytes(&dirent->k) -
+ offsetof(struct bch_dirent, d_cf_name_block.d_names) -
+ name->len + cf_len);
-static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
- const struct qstr *name,
- const struct qstr *cf_name)
-{
- EBUG_ON(!dirent->v.d_casefold);
- EBUG_ON(!cf_name->len);
-
- dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
- dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len);
- memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
- memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
- memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
- bkey_val_bytes(&dirent->k) -
- offsetof(struct bch_dirent, d_cf_name_block.d_names) -
- name->len + cf_name->len);
-
- EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len);
+ dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
+ dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
+
+ EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
+#else
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ unsigned u64s = dirent_val_u64s(name->len, cf_len);
+ BUG_ON(u64s > bkey_val_u64s(&dirent->k));
+ set_bkey_val_u64s(&dirent->k, u64s);
+ return 0;
}
-static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
+struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *trans,
const struct bch_hash_info *hash_info,
subvol_inum dir,
u8 type,
@@ -302,31 +296,28 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
const struct qstr *cf_name,
u64 dst)
{
- struct bkey_i_dirent *dirent;
- struct qstr _cf_name;
-
- if (name->len > BCH_NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
+ struct bkey_i_dirent *dirent = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
+ if (IS_ERR(dirent))
+ return dirent;
- if (hash_info->cf_encoding && !cf_name) {
- int ret = bch2_casefold(trans, hash_info, name, &_cf_name);
- if (ret)
- return ERR_PTR(ret);
+ bkey_dirent_init(&dirent->k_i);
+ dirent->k.u64s = BKEY_U64s_MAX;
- cf_name = &_cf_name;
+ if (type != DT_SUBVOL) {
+ dirent->v.d_inum = cpu_to_le64(dst);
+ } else {
+ dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
+ dirent->v.d_child_subvol = cpu_to_le32(dst);
}
- dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
- if (IS_ERR(dirent))
- return dirent;
+ dirent->v.d_type = type;
+ dirent->v.d_unused = 0;
- if (cf_name)
- dirent_init_casefolded_name(dirent, name, cf_name);
- else
- dirent_init_regular_name(dirent, name);
+ int ret = bch2_dirent_init_name(dirent, hash_info, name, cf_name);
+ if (ret)
+ return ERR_PTR(ret);
EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
-
return dirent;
}
@@ -341,7 +332,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum);
+ dirent = bch2_dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@@ -365,7 +356,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum);
+ dirent = bch2_dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@@ -402,8 +393,8 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
}
int bch2_dirent_rename(struct btree_trans *trans,
- subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size,
- subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size,
+ subvol_inum src_dir, struct bch_hash_info *src_hash,
+ subvol_inum dst_dir, struct bch_hash_info *dst_hash,
const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
enum bch_rename_mode mode)
@@ -470,8 +461,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
*src_offset = dst_iter.pos.offset;
/* Create new dst key: */
- new_dst = dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name,
- dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
+ new_dst = bch2_dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name,
+ dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_dst);
if (ret)
goto out;
@@ -481,8 +472,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
/* Create new src key: */
if (mode == BCH_RENAME_EXCHANGE) {
- new_src = dirent_create_key(trans, src_hash, src_dir, 0, src_name,
- src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
+ new_src = bch2_dirent_create_key(trans, src_hash, src_dir, 0, src_name,
+ src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_src);
if (ret)
goto out;
@@ -542,14 +533,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
new_src->v.d_type == DT_SUBVOL)
new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
- if (old_dst.k)
- *dst_dir_i_size -= bkey_bytes(old_dst.k);
- *src_dir_i_size -= bkey_bytes(old_src.k);
-
- if (mode == BCH_RENAME_EXCHANGE)
- *src_dir_i_size += bkey_bytes(&new_src->k);
- *dst_dir_i_size += bkey_bytes(&new_dst->k);
-
ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
if (ret)
goto out;
@@ -656,7 +639,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol)
continue;
- ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
+ ret = bch_err_throw(trans->c, ENOTEMPTY_dir_not_empty);
break;
}
bch2_trans_iter_exit(trans, &iter);
@@ -692,7 +675,9 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv
return !ret;
}
-int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
+int bch2_readdir(struct bch_fs *c, subvol_inum inum,
+ struct bch_hash_info *hash_info,
+ struct dir_context *ctx)
{
struct bkey_buf sk;
bch2_bkey_buf_init(&sk);
@@ -710,7 +695,11 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
struct bkey_s_c_dirent dirent = bkey_i_to_s_c_dirent(sk.k);
subvol_inum target;
- int ret2 = bch2_dirent_read_target(trans, inum, dirent, &target);
+
+ bool need_second_pass = false;
+ int ret2 = bch2_str_hash_check_key(trans, NULL, &bch2_dirent_hash_desc,
+ hash_info, &iter, k, &need_second_pass) ?:
+ bch2_dirent_read_target(trans, inum, dirent, &target);
if (ret2 > 0)
continue;
@@ -740,7 +729,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
ret = bch2_inode_unpack(k, inode);
goto found;
}
- ret = -BCH_ERR_ENOENT_inode;
+ ret = bch_err_throw(trans->c, ENOENT_inode);
found:
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
bch2_trans_iter_exit(trans, &iter);
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index d3e7ae669575..70fb0b581221 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -38,7 +38,7 @@ static inline int bch2_maybe_casefold(struct btree_trans *trans,
}
}
-struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent);
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
{
@@ -59,6 +59,14 @@ static inline void dirent_copy_target(struct bkey_i_dirent *dst,
dst->v.d_type = src.v->d_type;
}
+int bch2_dirent_init_name(struct bkey_i_dirent *,
+ const struct bch_hash_info *,
+ const struct qstr *,
+ const struct qstr *);
+struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *,
+ const struct bch_hash_info *, subvol_inum, u8,
+ const struct qstr *, const struct qstr *, u64);
+
int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *,
@@ -80,8 +88,8 @@ enum bch_rename_mode {
};
int bch2_dirent_rename(struct btree_trans *,
- subvol_inum, struct bch_hash_info *, u64 *,
- subvol_inum, struct bch_hash_info *, u64 *,
+ subvol_inum, struct bch_hash_info *,
+ subvol_inum, struct bch_hash_info *,
const struct qstr *, subvol_inum *, u64 *,
const struct qstr *, subvol_inum *, u64 *,
enum bch_rename_mode);
@@ -95,7 +103,7 @@ u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32, u32);
int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
-int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
+int bch2_readdir(struct bch_fs *, subvol_inum, struct bch_hash_info *, struct dir_context *);
int bch2_fsck_remove_dirent(struct btree_trans *, struct bpos);
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index b3840ff7c407..3d59a57a5256 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -390,7 +390,7 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
err:
free_percpu(n.v[1]);
free_percpu(n.v[0]);
- return -BCH_ERR_ENOMEM_disk_accounting;
+ return bch_err_throw(c, ENOMEM_disk_accounting);
}
int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
@@ -401,7 +401,7 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
if (mode != BCH_ACCOUNTING_read &&
accounting_to_replicas(&r.e, a.k->p) &&
!bch2_replicas_marked_locked(c, &r.e))
- return -BCH_ERR_btree_insert_need_mark_replicas;
+ return bch_err_throw(c, btree_insert_need_mark_replicas);
percpu_up_read(&c->mark_lock);
percpu_down_write(&c->mark_lock);
@@ -419,7 +419,7 @@ int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounti
if (mode != BCH_ACCOUNTING_read &&
accounting_to_replicas(&r.e, a.k->p) &&
!bch2_replicas_marked_locked(c, &r.e))
- return -BCH_ERR_btree_insert_need_mark_replicas;
+ return bch_err_throw(c, btree_insert_need_mark_replicas);
return __bch2_accounting_mem_insert(c, a);
}
@@ -559,7 +559,7 @@ int bch2_gc_accounting_start(struct bch_fs *c)
sizeof(u64), GFP_KERNEL);
if (!e->v[1]) {
bch2_accounting_free_counters(acc, true);
- ret = -BCH_ERR_ENOMEM_disk_accounting;
+ ret = bch_err_throw(c, ENOMEM_disk_accounting);
break;
}
}
@@ -737,7 +737,7 @@ invalid_device:
bch2_disk_accounting_mod(trans, acc, v, nr, false)) ?:
-BCH_ERR_remove_disk_accounting_entry;
} else {
- ret = -BCH_ERR_remove_disk_accounting_entry;
+ ret = bch_err_throw(c, remove_disk_accounting_entry);
}
goto fsck_err;
}
@@ -897,8 +897,8 @@ int bch2_accounting_read(struct bch_fs *c)
case BCH_DISK_ACCOUNTING_replicas:
fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]);
break;
- case BCH_DISK_ACCOUNTING_dev_data_type:
- rcu_read_lock();
+ case BCH_DISK_ACCOUNTING_dev_data_type: {
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
if (ca) {
struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
@@ -910,9 +910,9 @@ int bch2_accounting_read(struct bch_fs *c)
k.dev_data_type.data_type == BCH_DATA_journal)
usage->hidden += v[0] * ca->mi.bucket_size;
}
- rcu_read_unlock();
break;
}
+ }
}
preempt_enable();
fsck_err:
@@ -1006,19 +1006,18 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
case BCH_DISK_ACCOUNTING_replicas:
fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]);
break;
- case BCH_DISK_ACCOUNTING_dev_data_type: {
- rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
- if (!ca) {
- rcu_read_unlock();
- continue;
+ case BCH_DISK_ACCOUNTING_dev_data_type:
+ {
+ guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
+ if (!ca)
+ continue;
+
+ v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets);
+ v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors);
+ v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented);
}
- v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets);
- v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors);
- v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented);
- rcu_read_unlock();
-
if (memcmp(a.v->d, v, 3 * sizeof(u64))) {
struct printbuf buf = PRINTBUF;
@@ -1032,7 +1031,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
mismatch = true;
}
}
- }
0;
})));
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index f6098e33ab30..d61abebf3e0b 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -174,17 +174,17 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
case BCH_DISK_ACCOUNTING_replicas:
fs_usage_data_type_to_base(&trans->fs_usage_delta, acc_k.replicas.data_type, a.v->d[0]);
break;
- case BCH_DISK_ACCOUNTING_dev_data_type:
- rcu_read_lock();
+ case BCH_DISK_ACCOUNTING_dev_data_type: {
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (ca) {
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]);
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]);
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]);
}
- rcu_read_unlock();
break;
}
+ }
}
unsigned idx;
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index c20ecf5e5381..cde842ac1886 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -130,7 +130,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
cpu_g = kzalloc(struct_size(cpu_g, entries, nr_groups), GFP_KERNEL);
if (!cpu_g)
- return -BCH_ERR_ENOMEM_disk_groups_to_cpu;
+ return bch_err_throw(c, ENOMEM_disk_groups_to_cpu);
cpu_g->nr = nr_groups;
@@ -170,36 +170,28 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
{
struct target t = target_decode(target);
- struct bch_devs_mask *devs;
- rcu_read_lock();
+ guard(rcu)();
switch (t.type) {
case TARGET_NULL:
- devs = NULL;
- break;
+ return NULL;
case TARGET_DEV: {
struct bch_dev *ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
: NULL;
- devs = ca ? &ca->self : NULL;
- break;
+ return ca ? &ca->self : NULL;
}
case TARGET_GROUP: {
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
- devs = g && t.group < g->nr && !g->entries[t.group].deleted
+ return g && t.group < g->nr && !g->entries[t.group].deleted
? &g->entries[t.group].devs
: NULL;
- break;
}
default:
BUG();
}
-
- rcu_read_unlock();
-
- return devs;
}
bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
@@ -384,7 +376,7 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
bch2_printbuf_make_room(out, 4096);
out->atomic++;
- rcu_read_lock();
+ guard(rcu)();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
for (unsigned i = 0; i < (g ? g->nr : 0); i++) {
@@ -405,16 +397,14 @@ next:
prt_newline(out);
}
- rcu_read_unlock();
out->atomic--;
}
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
{
out->atomic++;
- rcu_read_lock();
+ guard(rcu)();
__bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v),
- rcu_read_unlock();
--out->atomic;
}
@@ -535,13 +525,11 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
switch (t.type) {
case TARGET_NULL:
prt_printf(out, "none");
- break;
+ return;
case TARGET_DEV: {
- struct bch_dev *ca;
-
out->atomic++;
- rcu_read_lock();
- ca = t.dev < c->sb.nr_devices
+ guard(rcu)();
+ struct bch_dev *ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
: NULL;
@@ -552,13 +540,12 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
else
prt_printf(out, "invalid device %u", t.dev);
- rcu_read_unlock();
out->atomic--;
- break;
+ return;
}
case TARGET_GROUP:
bch2_disk_path_to_text(out, c, t.group);
- break;
+ return;
default:
BUG();
}
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index c581426e3894..543dbba9b14f 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -213,7 +213,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->dirty_sectors,
a->stripe, s.k->p.offset,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -224,7 +224,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->dirty_sectors,
a->cached_sectors,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
} else {
@@ -234,7 +234,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bucket.inode, bucket.offset, a->gen,
a->stripe,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -244,7 +244,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bch2_data_type_str(a->data_type),
bch2_data_type_str(data_type),
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -256,7 +256,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->dirty_sectors,
a->cached_sectors,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
}
@@ -295,7 +295,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
if (unlikely(!ca)) {
if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite))
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -325,7 +325,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -428,7 +428,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
if (!gc) {
bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx);
- return -BCH_ERR_ENOMEM_mark_stripe;
+ return bch_err_throw(c, ENOMEM_mark_stripe);
}
/*
@@ -536,7 +536,8 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
}
/* XXX: this is a non-mempoolified memory allocation: */
-static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
+static int ec_stripe_buf_init(struct bch_fs *c,
+ struct ec_stripe_buf *buf,
unsigned offset, unsigned size)
{
struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
@@ -564,7 +565,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
return 0;
err:
ec_stripe_buf_exit(buf);
- return -BCH_ERR_ENOMEM_stripe_buf;
+ return bch_err_throw(c, ENOMEM_stripe_buf);
}
/* Checksumming: */
@@ -840,7 +841,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
buf = kzalloc(sizeof(*buf), GFP_NOFS);
if (!buf)
- return -BCH_ERR_ENOMEM_ec_read_extent;
+ return bch_err_throw(c, ENOMEM_ec_read_extent);
ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
if (ret) {
@@ -861,7 +862,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
goto err;
}
- ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
+ ret = ec_stripe_buf_init(c, buf, offset, bio_sectors(&rbio->bio));
if (ret) {
msg = "-ENOMEM";
goto err;
@@ -894,7 +895,7 @@ err:
bch_err_ratelimited(c,
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
printbuf_exit(&msgbuf);
- ret = -BCH_ERR_stripe_reconstruct;
+ ret = bch_err_throw(c, stripe_reconstruct);
goto out;
}
@@ -904,7 +905,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{
if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
- return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
+ return bch_err_throw(c, ENOMEM_ec_stripe_mem_alloc);
return 0;
}
@@ -1129,7 +1130,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
bch2_fs_inconsistent(c, "%s", buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_erasure_coding_found_btree_node;
+ return bch_err_throw(c, erasure_coding_found_btree_node);
}
k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed);
@@ -1195,7 +1196,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
if (!ca)
- return -BCH_ERR_ENOENT_dev_not_found;
+ return bch_err_throw(c, ENOENT_dev_not_found);
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
@@ -1256,7 +1257,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE,
BCH_DEV_WRITE_REF_ec_bucket_zero);
if (!ca) {
- s->err = -BCH_ERR_erofs_no_writes;
+ s->err = bch_err_throw(c, erofs_no_writes);
return;
}
@@ -1320,7 +1321,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ec_do_recov(c, &s->existing_stripe)) {
bch_err(c, "error creating stripe: error reading existing stripe");
- ret = -BCH_ERR_ec_block_read;
+ ret = bch_err_throw(c, ec_block_read);
goto err;
}
@@ -1346,7 +1347,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ec_nr_failed(&s->new_stripe)) {
bch_err(c, "error creating stripe: error writing redundancy buckets");
- ret = -BCH_ERR_ec_block_write;
+ ret = bch_err_throw(c, ec_block_write);
goto err;
}
@@ -1578,26 +1579,26 @@ static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_str
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
{
struct bch_devs_mask devs = h->devs;
+ unsigned nr_devs, nr_devs_with_durability;
- rcu_read_lock();
- h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
- ? group_to_target(h->disk_label - 1)
- : 0);
- unsigned nr_devs = dev_mask_nr(&h->devs);
+ scoped_guard(rcu) {
+ h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
+ ? group_to_target(h->disk_label - 1)
+ : 0);
+ nr_devs = dev_mask_nr(&h->devs);
- for_each_member_device_rcu(c, ca, &h->devs)
- if (!ca->mi.durability)
- __clear_bit(ca->dev_idx, h->devs.d);
- unsigned nr_devs_with_durability = dev_mask_nr(&h->devs);
+ for_each_member_device_rcu(c, ca, &h->devs)
+ if (!ca->mi.durability)
+ __clear_bit(ca->dev_idx, h->devs.d);
+ nr_devs_with_durability = dev_mask_nr(&h->devs);
- h->blocksize = pick_blocksize(c, &h->devs);
+ h->blocksize = pick_blocksize(c, &h->devs);
- h->nr_active_devs = 0;
- for_each_member_device_rcu(c, ca, &h->devs)
- if (ca->mi.bucket_size == h->blocksize)
- h->nr_active_devs++;
-
- rcu_read_unlock();
+ h->nr_active_devs = 0;
+ for_each_member_device_rcu(c, ca, &h->devs)
+ if (ca->mi.bucket_size == h->blocksize)
+ h->nr_active_devs++;
+ }
/*
* If we only have redundancy + 1 devices, we're better off with just
@@ -1865,7 +1866,7 @@ static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new
s->nr_data = existing_v->nr_blocks -
existing_v->nr_redundant;
- int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
+ int ret = ec_stripe_buf_init(c, &s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
if (ret) {
bch2_stripe_close(c, s);
return ret;
@@ -1925,7 +1926,7 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
}
bch2_trans_iter_exit(trans, &lru_iter);
if (!ret)
- ret = -BCH_ERR_stripe_alloc_blocked;
+ ret = bch_err_throw(c, stripe_alloc_blocked);
if (ret == 1)
ret = 0;
if (ret)
@@ -1966,7 +1967,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
continue;
}
- ret = -BCH_ERR_ENOSPC_stripe_create;
+ ret = bch_err_throw(c, ENOSPC_stripe_create);
break;
}
@@ -2024,7 +2025,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
if (!h->s) {
h->s = ec_new_stripe_alloc(c, h);
if (!h->s) {
- ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
+ ret = bch_err_throw(c, ENOMEM_ec_new_stripe_alloc);
bch_err(c, "failed to allocate new stripe");
goto err;
}
@@ -2089,7 +2090,7 @@ alloc_existing:
goto err;
allocate_buf:
- ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize);
+ ret = ec_stripe_buf_init(c, &s->new_stripe, 0, h->blocksize);
if (ret)
goto err;
@@ -2115,6 +2116,7 @@ int bch2_invalidate_stripe_to_dev(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_stripe)
return 0;
+ struct bch_fs *c = trans->c;
struct bkey_i_stripe *s =
bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe);
int ret = PTR_ERR_OR_ZERO(s);
@@ -2141,23 +2143,22 @@ int bch2_invalidate_stripe_to_dev(struct btree_trans *trans,
unsigned nr_good = 0;
- rcu_read_lock();
- bkey_for_each_ptr(ptrs, ptr) {
- if (ptr->dev == dev_idx)
- ptr->dev = BCH_SB_MEMBER_INVALID;
+ scoped_guard(rcu)
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == dev_idx)
+ ptr->dev = BCH_SB_MEMBER_INVALID;
- struct bch_dev *ca = bch2_dev_rcu(trans->c, ptr->dev);
- nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed;
- }
- rcu_read_unlock();
+ struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
+ nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed;
+ }
if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED))
- return -BCH_ERR_remove_would_lose_data;
+ return bch_err_throw(c, remove_would_lose_data);
unsigned nr_data = s->v.nr_blocks - s->v.nr_redundant;
if (nr_good < nr_data && !(flags & BCH_FORCE_IF_DATA_LOST))
- return -BCH_ERR_remove_would_lose_data;
+ return bch_err_throw(c, remove_would_lose_data);
sectors = -sectors;
@@ -2178,14 +2179,15 @@ static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, s
return 0;
if (a->stripe_sectors) {
- bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data");
- return -BCH_ERR_invalidate_stripe_to_dev;
+ struct bch_fs *c = trans->c;
+ bch_err(c, "trying to invalidate device in stripe when bucket has stripe data");
+ return bch_err_throw(c, invalidate_stripe_to_dev);
}
struct btree_iter iter;
struct bkey_s_c_stripe s =
bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
- BTREE_ITER_slots, stripe);
+ BTREE_ITER_slots, stripe);
int ret = bkey_err(s);
if (ret)
return ret;
diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c
index 43557bebd0f8..c39cf304c681 100644
--- a/fs/bcachefs/errcode.c
+++ b/fs/bcachefs/errcode.c
@@ -13,12 +13,13 @@ static const char * const bch2_errcode_strs[] = {
NULL
};
-static unsigned bch2_errcode_parents[] = {
+static const unsigned bch2_errcode_parents[] = {
#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class,
BCH_ERRCODES()
#undef x
};
+__attribute__((const))
const char *bch2_err_str(int err)
{
const char *errstr;
@@ -36,6 +37,7 @@ const char *bch2_err_str(int err)
return errstr ?: "(Invalid error)";
}
+__attribute__((const))
bool __bch2_err_matches(int err, int class)
{
err = abs(err);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 62843e772b2c..ac3264134a15 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -182,9 +182,12 @@
x(BCH_ERR_fsck, fsck_errors_not_fixed) \
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
x(BCH_ERR_fsck, fsck_repair_impossible) \
- x(EINVAL, restart_recovery) \
- x(EINVAL, cannot_rewind_recovery) \
+ x(EINVAL, recovery_will_run) \
+ x(BCH_ERR_recovery_will_run, restart_recovery) \
+ x(BCH_ERR_recovery_will_run, cannot_rewind_recovery) \
+ x(BCH_ERR_recovery_will_run, recovery_pass_will_run) \
x(0, data_update_done) \
+ x(0, bkey_was_deleted) \
x(BCH_ERR_data_update_done, data_update_done_would_block) \
x(BCH_ERR_data_update_done, data_update_done_unwritten) \
x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \
@@ -211,6 +214,8 @@
x(EINVAL, remove_would_lose_data) \
x(EINVAL, no_resize_with_buckets_nouse) \
x(EINVAL, inode_unpack_error) \
+ x(EINVAL, inode_not_unlinked) \
+ x(EINVAL, inode_has_child_snapshot) \
x(EINVAL, varint_decode_error) \
x(EINVAL, erasure_coding_found_btree_node) \
x(EINVAL, option_negative) \
@@ -357,9 +362,11 @@ enum bch_errcode {
BCH_ERR_MAX
};
-const char *bch2_err_str(int);
-bool __bch2_err_matches(int, int);
+__attribute__((const)) const char *bch2_err_str(int);
+__attribute__((const)) bool __bch2_err_matches(int, int);
+
+__attribute__((const))
static inline bool _bch2_err_matches(int err, int class)
{
return err < 0 && __bch2_err_matches(err, class);
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index c2cad28635bf..63951e293c47 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -100,10 +100,10 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
set_bit(BCH_FS_topology_error, &c->flags);
if (!test_bit(BCH_FS_in_recovery, &c->flags)) {
__bch2_inconsistent_error(c, out);
- return -BCH_ERR_btree_need_topology_repair;
+ return bch_err_throw(c, btree_need_topology_repair);
} else {
return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
- -BCH_ERR_btree_node_read_validate_error;
+ bch_err_throw(c, btree_node_read_validate_error);
}
}
@@ -403,23 +403,23 @@ int bch2_fsck_err_opt(struct bch_fs *c,
if (test_bit(BCH_FS_in_fsck, &c->flags)) {
if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
- return -BCH_ERR_fsck_repair_unimplemented;
+ return bch_err_throw(c, fsck_repair_unimplemented);
switch (c->opts.fix_errors) {
case FSCK_FIX_exit:
- return -BCH_ERR_fsck_errors_not_fixed;
+ return bch_err_throw(c, fsck_errors_not_fixed);
case FSCK_FIX_yes:
if (flags & FSCK_CAN_FIX)
- return -BCH_ERR_fsck_fix;
+ return bch_err_throw(c, fsck_fix);
fallthrough;
case FSCK_FIX_no:
if (flags & FSCK_CAN_IGNORE)
- return -BCH_ERR_fsck_ignore;
- return -BCH_ERR_fsck_errors_not_fixed;
+ return bch_err_throw(c, fsck_ignore);
+ return bch_err_throw(c, fsck_errors_not_fixed);
case FSCK_FIX_ask:
if (flags & FSCK_AUTOFIX)
- return -BCH_ERR_fsck_fix;
- return -BCH_ERR_fsck_ask;
+ return bch_err_throw(c, fsck_fix);
+ return bch_err_throw(c, fsck_ask);
default:
BUG();
}
@@ -427,12 +427,12 @@ int bch2_fsck_err_opt(struct bch_fs *c,
if ((flags & FSCK_AUTOFIX) &&
(c->opts.errors == BCH_ON_ERROR_continue ||
c->opts.errors == BCH_ON_ERROR_fix_safe))
- return -BCH_ERR_fsck_fix;
+ return bch_err_throw(c, fsck_fix);
if (c->opts.errors == BCH_ON_ERROR_continue &&
(flags & FSCK_CAN_IGNORE))
- return -BCH_ERR_fsck_ignore;
- return -BCH_ERR_fsck_errors_not_fixed;
+ return bch_err_throw(c, fsck_ignore);
+ return bch_err_throw(c, fsck_errors_not_fixed);
}
}
@@ -444,7 +444,7 @@ int __bch2_fsck_err(struct bch_fs *c,
{
va_list args;
struct printbuf buf = PRINTBUF, *out = &buf;
- int ret = -BCH_ERR_fsck_ignore;
+ int ret = 0;
const char *action_orig = "fix?", *action = action_orig;
might_sleep();
@@ -474,8 +474,8 @@ int __bch2_fsck_err(struct bch_fs *c,
if (test_bit(err, c->sb.errors_silent))
return flags & FSCK_CAN_FIX
- ? -BCH_ERR_fsck_fix
- : -BCH_ERR_fsck_ignore;
+ ? bch_err_throw(c, fsck_fix)
+ : bch_err_throw(c, fsck_ignore);
printbuf_indent_add_nextline(out, 2);
@@ -517,10 +517,10 @@ int __bch2_fsck_err(struct bch_fs *c,
prt_str(out, ", ");
if (flags & FSCK_CAN_FIX) {
prt_actioning(out, action);
- ret = -BCH_ERR_fsck_fix;
+ ret = bch_err_throw(c, fsck_fix);
} else {
prt_str(out, ", continuing");
- ret = -BCH_ERR_fsck_ignore;
+ ret = bch_err_throw(c, fsck_ignore);
}
goto print;
@@ -532,18 +532,18 @@ int __bch2_fsck_err(struct bch_fs *c,
"run fsck, and forward to devs so error can be marked for self-healing");
inconsistent = true;
print = true;
- ret = -BCH_ERR_fsck_errors_not_fixed;
+ ret = bch_err_throw(c, fsck_errors_not_fixed);
} else if (flags & FSCK_CAN_FIX) {
prt_str(out, ", ");
prt_actioning(out, action);
- ret = -BCH_ERR_fsck_fix;
+ ret = bch_err_throw(c, fsck_fix);
} else {
prt_str(out, ", continuing");
- ret = -BCH_ERR_fsck_ignore;
+ ret = bch_err_throw(c, fsck_ignore);
}
} else if (c->opts.fix_errors == FSCK_FIX_exit) {
prt_str(out, ", exiting");
- ret = -BCH_ERR_fsck_errors_not_fixed;
+ ret = bch_err_throw(c, fsck_errors_not_fixed);
} else if (flags & FSCK_CAN_FIX) {
int fix = s && s->fix
? s->fix
@@ -562,30 +562,37 @@ int __bch2_fsck_err(struct bch_fs *c,
: FSCK_FIX_yes;
ret = ret & 1
- ? -BCH_ERR_fsck_fix
- : -BCH_ERR_fsck_ignore;
+ ? bch_err_throw(c, fsck_fix)
+ : bch_err_throw(c, fsck_ignore);
} else if (fix == FSCK_FIX_yes ||
(c->opts.nochanges &&
!(flags & FSCK_CAN_IGNORE))) {
prt_str(out, ", ");
prt_actioning(out, action);
- ret = -BCH_ERR_fsck_fix;
+ ret = bch_err_throw(c, fsck_fix);
} else {
prt_str(out, ", not ");
prt_actioning(out, action);
+ ret = bch_err_throw(c, fsck_ignore);
+ }
+ } else {
+ if (flags & FSCK_CAN_IGNORE) {
+ prt_str(out, ", continuing");
+ ret = bch_err_throw(c, fsck_ignore);
+ } else {
+ prt_str(out, " (repair unimplemented)");
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
}
- } else if (!(flags & FSCK_CAN_IGNORE)) {
- prt_str(out, " (repair unimplemented)");
}
- if (ret == -BCH_ERR_fsck_ignore &&
+ if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) &&
(c->opts.fix_errors == FSCK_FIX_exit ||
!(flags & FSCK_CAN_IGNORE)))
- ret = -BCH_ERR_fsck_errors_not_fixed;
+ ret = bch_err_throw(c, fsck_errors_not_fixed);
if (test_bit(BCH_FS_in_fsck, &c->flags) &&
- (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore)) {
+ (!bch2_err_matches(ret, BCH_ERR_fsck_fix) &&
+ !bch2_err_matches(ret, BCH_ERR_fsck_ignore))) {
exiting = true;
print = true;
}
@@ -613,26 +620,26 @@ print:
if (s)
s->ret = ret;
-
+err_unlock:
+ mutex_unlock(&c->fsck_error_msgs_lock);
+err:
/*
* We don't yet track whether the filesystem currently has errors, for
* log_fsck_err()s: that would require us to track for every error type
* which recovery pass corrects it, to get the fsck exit status correct:
*/
- if (flags & FSCK_CAN_FIX) {
- if (ret == -BCH_ERR_fsck_fix) {
- set_bit(BCH_FS_errors_fixed, &c->flags);
- } else {
- set_bit(BCH_FS_errors_not_fixed, &c->flags);
- set_bit(BCH_FS_error, &c->flags);
- }
+ if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
+ set_bit(BCH_FS_errors_fixed, &c->flags);
+ } else {
+ set_bit(BCH_FS_errors_not_fixed, &c->flags);
+ set_bit(BCH_FS_error, &c->flags);
}
-err_unlock:
- mutex_unlock(&c->fsck_error_msgs_lock);
-err:
+
if (action != action_orig)
kfree(action);
printbuf_exit(&buf);
+
+ BUG_ON(!ret);
return ret;
}
@@ -650,12 +657,12 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
const char *fmt, ...)
{
if (from.flags & BCH_VALIDATE_silent)
- return -BCH_ERR_fsck_delete_bkey;
+ return bch_err_throw(c, fsck_delete_bkey);
unsigned fsck_flags = 0;
if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) {
if (test_bit(err, c->sb.errors_silent))
- return -BCH_ERR_fsck_delete_bkey;
+ return bch_err_throw(c, fsck_delete_bkey);
fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX;
}
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 5123d4c86770..0c3c3a24fc6f 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -105,13 +105,13 @@ void bch2_free_fsck_errs(struct bch_fs *);
#define fsck_err_wrap(_do) \
({ \
int _ret = _do; \
- if (_ret != -BCH_ERR_fsck_fix && \
- _ret != -BCH_ERR_fsck_ignore) { \
+ if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix) && \
+ !bch2_err_matches(_ret, BCH_ERR_fsck_ignore)) { \
ret = _ret; \
goto fsck_err; \
} \
\
- _ret == -BCH_ERR_fsck_fix; \
+ bch2_err_matches(_ret, BCH_ERR_fsck_fix); \
})
#define __fsck_err(...) fsck_err_wrap(bch2_fsck_err(__VA_ARGS__))
@@ -170,10 +170,10 @@ do { \
int _ret = __bch2_bkey_fsck_err(c, k, from, \
BCH_FSCK_ERR_##_err_type, \
_err_msg, ##__VA_ARGS__); \
- if (_ret != -BCH_ERR_fsck_fix && \
- _ret != -BCH_ERR_fsck_ignore) \
+ if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix) && \
+ !bch2_err_matches(_ret, BCH_ERR_fsck_ignore)) \
ret = _ret; \
- ret = -BCH_ERR_fsck_delete_bkey; \
+ ret = bch_err_throw(c, fsck_delete_bkey); \
goto fsck_err; \
} while (0)
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 1ac9897f189d..036e4ad95987 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -65,15 +65,15 @@ void bch2_io_failures_to_text(struct printbuf *out,
continue;
bch2_printbuf_make_room(out, 1024);
- rcu_read_lock();
out->atomic++;
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
- if (ca)
- prt_str(out, ca->name);
- else
- prt_printf(out, "(invalid device %u)", f->dev);
+ scoped_guard(rcu) {
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
+ if (ca)
+ prt_str(out, ca->name);
+ else
+ prt_printf(out, "(invalid device %u)", f->dev);
+ }
--out->atomic;
- rcu_read_unlock();
prt_char(out, ' ');
@@ -193,7 +193,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
bool have_dirty_ptrs = false, have_pick = false;
if (k.k->type == KEY_TYPE_error)
- return -BCH_ERR_key_type_error;
+ return bch_err_throw(c, key_type_error);
rcu_read_lock();
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -286,17 +286,17 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (!have_dirty_ptrs)
return 0;
if (have_missing_devs)
- return -BCH_ERR_no_device_to_read_from;
+ return bch_err_throw(c, no_device_to_read_from);
if (have_csum_errors)
- return -BCH_ERR_data_read_csum_err;
+ return bch_err_throw(c, data_read_csum_err);
if (have_io_errors)
- return -BCH_ERR_data_read_io_err;
+ return bch_err_throw(c, data_read_io_err);
/*
* If we get here, we have pointers (bkey_ptrs_validate() ensures that),
* but they don't point to valid devices:
*/
- return -BCH_ERR_no_devices_valid;
+ return bch_err_throw(c, no_devices_valid);
}
/* KEY_TYPE_btree_ptr: */
@@ -407,6 +407,8 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
lp.crc = bch2_extent_crc_unpack(l.k, NULL);
rp.crc = bch2_extent_crc_unpack(r.k, NULL);
+ guard(rcu)();
+
while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) &&
__bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) {
if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
@@ -418,10 +420,8 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
return false;
/* Extents may not straddle buckets: */
- rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, lp.ptr.dev);
bool same_bucket = ca && PTR_BUCKET_NR(ca, &lp.ptr) == PTR_BUCKET_NR(ca, &rp.ptr);
- rcu_read_unlock();
if (!same_bucket)
return false;
@@ -838,11 +838,9 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
struct extent_ptr_decoded p;
unsigned durability = 0;
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
durability += bch2_extent_ptr_durability(c, &p);
- rcu_read_unlock();
-
return durability;
}
@@ -853,12 +851,10 @@ static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k)
struct extent_ptr_decoded p;
unsigned durability = 0;
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (p.ptr.dev < c->sb.nr_devices && c->devs[p.ptr.dev])
durability += bch2_extent_ptr_durability(c, &p);
- rcu_read_unlock();
-
return durability;
}
@@ -1015,20 +1011,16 @@ bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct bch_dev *ca;
- bool ret = false;
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr(ptrs, ptr)
if (bch2_dev_in_target(c, ptr->dev, target) &&
(ca = bch2_dev_rcu(c, ptr->dev)) &&
(!ptr->cached ||
- !dev_ptr_stale_rcu(ca, ptr))) {
- ret = true;
- break;
- }
- rcu_read_unlock();
+ !dev_ptr_stale_rcu(ca, ptr)))
+ return true;
- return ret;
+ return false;
}
bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
@@ -1142,7 +1134,7 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c,
bool have_cached_ptr;
unsigned drop_dev = ptr->dev;
- rcu_read_lock();
+ guard(rcu)();
restart_drop_ptrs:
ptrs = bch2_bkey_ptrs(k);
have_cached_ptr = false;
@@ -1175,10 +1167,8 @@ restart_drop_ptrs:
goto drop;
ptr->cached = true;
- rcu_read_unlock();
return;
drop:
- rcu_read_unlock();
bch2_bkey_drop_ptr_noerror(k, ptr);
}
@@ -1194,12 +1184,11 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
{
struct bch_dev *ca;
- rcu_read_lock();
+ guard(rcu)();
bch2_bkey_drop_ptrs(k, ptr,
ptr->cached &&
(!(ca = bch2_dev_rcu(c, ptr->dev)) ||
dev_ptr_stale_rcu(ca, ptr) > 0));
- rcu_read_unlock();
return bkey_deleted(k.k);
}
@@ -1217,7 +1206,7 @@ bool bch2_extent_normalize_by_opts(struct bch_fs *c,
struct bkey_ptrs ptrs;
bool have_cached_ptr;
- rcu_read_lock();
+ guard(rcu)();
restart_drop_ptrs:
ptrs = bch2_bkey_ptrs(k);
have_cached_ptr = false;
@@ -1230,7 +1219,6 @@ restart_drop_ptrs:
}
have_cached_ptr = true;
}
- rcu_read_unlock();
return bkey_deleted(k.k);
}
@@ -1238,7 +1226,7 @@ restart_drop_ptrs:
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
{
out->atomic++;
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
if (!ca) {
prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
@@ -1262,7 +1250,6 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
else if (stale)
prt_printf(out, " invalid");
}
- rcu_read_unlock();
--out->atomic;
}
@@ -1528,7 +1515,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
prt_printf(err, "invalid compression opt %u:%u",
opt.type, opt.level);
- return -BCH_ERR_invalid_bkey;
+ return bch_err_throw(c, invalid_bkey);
}
#endif
break;
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index e3a75dcca60c..66bacdd49f78 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -394,17 +394,9 @@ struct bch_writepage_state {
struct bch_io_opts opts;
struct bch_folio_sector *tmp;
unsigned tmp_sectors;
+ struct blk_plug plug;
};
-static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
- struct bch_inode_info *inode)
-{
- struct bch_writepage_state ret = { 0 };
-
- bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode);
- return ret;
-}
-
/*
* Determine when a writepage io is full. We have to limit writepage bios to a
* single page per bvec (i.e. 1MB with 4k pages) because that is the limit to
@@ -666,17 +658,17 @@ do_io:
int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct bch_fs *c = mapping->host->i_sb->s_fs_info;
- struct bch_writepage_state w =
- bch_writepage_state_init(c, to_bch_ei(mapping->host));
- struct blk_plug plug;
- int ret;
+ struct bch_writepage_state *w = kzalloc(sizeof(*w), GFP_NOFS|__GFP_NOFAIL);
- blk_start_plug(&plug);
- ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
- if (w.io)
- bch2_writepage_do_io(&w);
- blk_finish_plug(&plug);
- kfree(w.tmp);
+ bch2_inode_opts_get(&w->opts, c, &to_bch_ei(mapping->host)->ei_inode);
+
+ blk_start_plug(&w->plug);
+ int ret = write_cache_pages(mapping, wbc, __bch2_writepage, w);
+ if (w->io)
+ bch2_writepage_do_io(w);
+ blk_finish_plug(&w->plug);
+ kfree(w->tmp);
+ kfree(w);
return bch2_err_class(ret);
}
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index fbae9c1de746..c2cc405822f2 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -447,7 +447,7 @@ static int __bch2_folio_reservation_get(struct bch_fs *c,
if (!reserved) {
bch2_disk_reservation_put(c, &disk_res);
- return -BCH_ERR_ENOSPC_disk_reservation;
+ return bch_err_throw(c, ENOSPC_disk_reservation);
}
break;
}
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index b1e9ee28fc0f..a233f45875e9 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -71,12 +71,12 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
- rcu_read_lock();
- ca = rcu_dereference(c->devs[dev]);
- if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE],
- BCH_DEV_WRITE_REF_nocow_flush))
- ca = NULL;
- rcu_read_unlock();
+ scoped_guard(rcu) {
+ ca = rcu_dereference(c->devs[dev]);
+ if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE],
+ BCH_DEV_WRITE_REF_nocow_flush))
+ ca = NULL;
+ }
if (!ca)
continue;
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 05361a793206..4e72e654da96 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -268,13 +268,13 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
}
if (dst_dentry->d_inode) {
- error = -BCH_ERR_EEXIST_subvolume_create;
+ error = bch_err_throw(c, EEXIST_subvolume_create);
goto err3;
}
dir = dst_path.dentry->d_inode;
if (IS_DEADDIR(dir)) {
- error = -BCH_ERR_ENOENT_directory_dead;
+ error = bch_err_throw(c, ENOENT_directory_dead);
goto err3;
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index ddfe89d84966..85d13f800165 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -124,8 +124,9 @@ retry:
goto err;
struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u);
+ bool rebalance_changed = memcmp(&old_r, &new_r, sizeof(new_r));
- if (memcmp(&old_r, &new_r, sizeof(new_r))) {
+ if (rebalance_changed) {
ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum);
if (ret)
goto err;
@@ -146,6 +147,9 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
+ if (rebalance_changed)
+ bch2_rebalance_wakeup(c);
+
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
"%s: inode %llu:%llu not found when updating",
bch2_err_str(ret),
@@ -1569,11 +1573,12 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
if (!dir_emit_dots(file, ctx))
return 0;
- int ret = bch2_readdir(c, inode_inum(inode), ctx);
+ int ret = bch2_readdir(c, inode_inum(inode), &hash, ctx);
bch_err_fn(c, ret);
return bch2_err_class(ret);
@@ -2002,14 +2007,14 @@ retry:
goto err;
if (k.k->type != KEY_TYPE_dirent) {
- ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+ ret = bch_err_throw(c, ENOENT_dirent_doesnt_match_inode);
goto err;
}
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
if (ret > 0)
- ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+ ret = bch_err_throw(c, ENOENT_dirent_doesnt_match_inode);
if (ret)
goto err;
@@ -2175,7 +2180,13 @@ static void bch2_evict_inode(struct inode *vinode)
KEY_TYPE_QUOTA_WARN);
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
- bch2_inode_rm(c, inode_inum(inode));
+ int ret = bch2_inode_rm(c, inode_inum(inode));
+ if (ret && !bch2_err_matches(ret, EROFS)) {
+ bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu",
+ inode->ei_inum.subvol,
+ inode->ei_inum.inum);
+ bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm);
+ }
/*
* If we are deleting, we need it present in the vfs hash table
@@ -2322,14 +2333,13 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
struct bch_fs *c = root->d_sb->s_fs_info;
bool first = true;
- rcu_read_lock();
+ guard(rcu)();
for_each_online_member_rcu(c, ca) {
if (!first)
seq_putc(seq, ':');
first = false;
seq_puts(seq, ca->disk_sb.sb_name);
}
- rcu_read_unlock();
return 0;
}
@@ -2526,16 +2536,16 @@ got_sb:
sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
- rcu_read_lock();
- for_each_online_member_rcu(c, ca) {
- struct block_device *bdev = ca->disk_sb.bdev;
+ scoped_guard(rcu) {
+ for_each_online_member_rcu(c, ca) {
+ struct block_device *bdev = ca->disk_sb.bdev;
- /* XXX: create an anonymous device for multi device filesystems */
- sb->s_bdev = bdev;
- sb->s_dev = bdev->bd_dev;
- break;
+ /* XXX: create an anonymous device for multi device filesystems */
+ sb->s_bdev = bdev;
+ sb->s_dev = bdev->bd_dev;
+ break;
+ }
}
- rcu_read_unlock();
c->dev = sb->s_dev;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 49f46df8340e..68ed69a255e1 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -23,14 +23,15 @@
#include <linux/bsearch.h>
#include <linux/dcache.h> /* struct qstr */
-static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
+static int dirent_points_to_inode_nowarn(struct bch_fs *c,
+ struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
if (d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
: le64_to_cpu(d.v->d_inum) == inode->bi_inum)
return 0;
- return -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+ return bch_err_throw(c, ENOENT_dirent_doesnt_match_inode);
}
static void dirent_inode_mismatch_msg(struct printbuf *out,
@@ -49,7 +50,7 @@ static int dirent_points_to_inode(struct bch_fs *c,
struct bkey_s_c_dirent dirent,
struct bch_inode_unpacked *inode)
{
- int ret = dirent_points_to_inode_nowarn(dirent, inode);
+ int ret = dirent_points_to_inode_nowarn(c, dirent, inode);
if (ret) {
struct printbuf buf = PRINTBUF;
dirent_inode_mismatch_msg(&buf, c, dirent, inode);
@@ -152,7 +153,7 @@ static int find_snapshot_tree_subvol(struct btree_trans *trans,
goto found;
}
}
- ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol;
+ ret = bch_err_throw(trans->c, ENOENT_no_snapshot_tree_subvol);
found:
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -229,7 +230,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
if (d_type != DT_DIR) {
bch_err(c, "error looking up lost+found: not a directory");
- return -BCH_ERR_ENOENT_not_directory;
+ return bch_err_throw(c, ENOENT_not_directory);
}
/*
@@ -531,7 +532,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub
if (!bch2_snapshot_is_leaf(c, snapshotid)) {
bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
- return -BCH_ERR_fsck_repair_unimplemented;
+ return bch_err_throw(c, fsck_repair_unimplemented);
}
/*
@@ -643,11 +644,6 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32
return __bch2_fsck_write_inode(trans, &new_inode);
}
-struct snapshots_seen {
- struct bpos pos;
- snapshot_id_list ids;
-};
-
static inline void snapshots_seen_exit(struct snapshots_seen *s)
{
darray_exit(&s->ids);
@@ -890,14 +886,11 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
{
struct bch_fs *c = trans->c;
- struct inode_walker_entry *i;
- __darray_for_each(w->inodes, i)
- if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot))
- goto found;
+ struct inode_walker_entry *i = darray_find_p(w->inodes, i,
+ bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot));
- return NULL;
-found:
- BUG_ON(k.k->p.snapshot > i->inode.bi_snapshot);
+ if (!i)
+ return NULL;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -947,7 +940,7 @@ found:
if (ret)
goto fsck_err;
- ret = -BCH_ERR_transaction_restart_nested;
+ ret = bch_err_throw(c, transaction_restart_nested);
goto fsck_err;
}
@@ -992,7 +985,8 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans,
int ret = 0;
if (d->v.d_type == DT_SUBVOL) {
- BUG();
+ bch_err(trans->c, "%s does not support DT_SUBVOL", __func__);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
} else {
ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
if (ret)
@@ -1048,7 +1042,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
- if ((ret || dirent_points_to_inode_nowarn(d, inode)) &&
+ if ((ret || dirent_points_to_inode_nowarn(c, d, inode)) &&
inode->bi_subvol &&
(inode->bi_flags & BCH_INODE_has_child_snapshot)) {
/* Older version of a renamed subvolume root: we won't have a
@@ -1069,7 +1063,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
trans, inode_points_to_missing_dirent,
"inode points to missing dirent\n%s",
(bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) ||
- fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode),
+ fsck_err_on(!ret && dirent_points_to_inode_nowarn(c, d, inode),
trans, inode_points_to_wrong_dirent,
"%s",
(printbuf_reset(&buf),
@@ -1174,6 +1168,14 @@ static int check_inode(struct btree_trans *trans,
ret = 0;
}
+ if (fsck_err_on(S_ISDIR(u.bi_mode) && u.bi_size,
+ trans, inode_dir_has_nonzero_i_size,
+ "directory %llu:%u with nonzero i_size %lli",
+ u.bi_inum, u.bi_snapshot, u.bi_size)) {
+ u.bi_size = 0;
+ do_update = true;
+ }
+
ret = bch2_inode_has_child_snapshots(trans, k.k->p);
if (ret < 0)
goto err;
@@ -1452,7 +1454,7 @@ static int check_key_has_inode(struct btree_trans *trans,
goto err;
inode->last_pos.inode--;
- ret = -BCH_ERR_transaction_restart_nested;
+ ret = bch_err_throw(c, transaction_restart_nested);
goto err;
}
@@ -1569,7 +1571,7 @@ static int extent_ends_at(struct bch_fs *c,
sizeof(seen->ids.data[0]) * seen->ids.size,
GFP_KERNEL);
if (!n.seen.ids.data)
- return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
+ return bch_err_throw(c, ENOMEM_fsck_extent_ends_at);
__darray_for_each(extent_ends->e, i) {
if (i->snapshot == k.k->p.snapshot) {
@@ -1619,7 +1621,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
__func__, buf.buf);
- ret = -BCH_ERR_internal_fsck_err;
+ ret = bch_err_throw(c, internal_fsck_err);
goto err;
}
@@ -1644,7 +1646,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
pos2.size != k2.k->size) {
bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
__func__, buf.buf);
- ret = -BCH_ERR_internal_fsck_err;
+ ret = bch_err_throw(c, internal_fsck_err);
goto err;
}
@@ -1692,7 +1694,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
* We overwrote the second extent - restart
* check_extent() from the top:
*/
- ret = -BCH_ERR_transaction_restart_nested;
+ ret = bch_err_throw(c, transaction_restart_nested);
}
}
fsck_err:
@@ -2045,7 +2047,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
if (!new_parent_subvol) {
bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
- return -BCH_ERR_fsck_repair_unimplemented;
+ return bch_err_throw(c, fsck_repair_unimplemented);
}
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
@@ -2107,7 +2109,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
if (ret) {
bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
goto err;
}
@@ -2139,7 +2141,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct bch_hash_info *hash_info,
struct inode_walker *dir,
struct inode_walker *target,
- struct snapshots_seen *s)
+ struct snapshots_seen *s,
+ bool *need_second_pass)
{
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
@@ -2181,7 +2184,12 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
- ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k);
+#ifdef CONFIG_UNICODE
+ hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
+#endif
+
+ ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
+ iter, k, need_second_pass);
if (ret < 0)
goto err;
if (ret) {
@@ -2202,31 +2210,34 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
- struct qstr name = bch2_dirent_get_name(d);
- u32 subvol = d.v->d_type == DT_SUBVOL
- ? le32_to_cpu(d.v->d_parent_subvol)
- : 0;
+ subvol_inum dir_inum = { .subvol = d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_parent_subvol)
+ : 0,
+ };
u64 target = d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol)
: le64_to_cpu(d.v->d_inum);
- u64 dir_offset;
+ struct qstr name = bch2_dirent_get_name(d);
+
+ struct bkey_i_dirent *new_d =
+ bch2_dirent_create_key(trans, hash_info, dir_inum,
+ d.v->d_type, &name, NULL, target);
+ ret = PTR_ERR_OR_ZERO(new_d);
+ if (ret)
+ goto out;
- ret = bch2_hash_delete_at(trans,
+ new_d->k.p.inode = d.k->p.inode;
+ new_d->k.p.snapshot = d.k->p.snapshot;
+
+ struct btree_iter dup_iter = {};
+ ret = bch2_hash_delete_at(trans,
bch2_dirent_hash_desc, hash_info, iter,
BTREE_UPDATE_internal_snapshot_node) ?:
- bch2_dirent_create_snapshot(trans, subvol,
- d.k->p.inode, d.k->p.snapshot,
- hash_info,
- d.v->d_type,
- &name,
- target,
- &dir_offset,
- BTREE_ITER_with_updates|
- BTREE_UPDATE_internal_snapshot_node|
- STR_HASH_must_create) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
-
- /* might need another check_dirents pass */
+ bch2_str_hash_repair_key(trans, s,
+ &bch2_dirent_hash_desc, hash_info,
+ iter, bkey_i_to_s_c(&new_d->k_i),
+ &dup_iter, bkey_s_c_null,
+ need_second_pass);
goto out;
}
@@ -2294,7 +2305,6 @@ out:
err:
fsck_err:
printbuf_exit(&buf);
- bch_err_fn(c, ret);
return ret;
}
@@ -2308,16 +2318,31 @@ int bch2_check_dirents(struct bch_fs *c)
struct inode_walker target = inode_walker_init();
struct snapshots_seen s;
struct bch_hash_info hash_info;
+ bool need_second_pass = false, did_second_pass = false;
+ int ret;
snapshots_seen_init(&s);
-
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_dirents,
+again:
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
- check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s,
+ &need_second_pass)) ?:
check_subdir_count_notnested(trans, &dir));
+ if (!ret && need_second_pass && !did_second_pass) {
+ bch_info(c, "check_dirents requires second pass");
+ swap(did_second_pass, need_second_pass);
+ goto again;
+ }
+
+ if (!ret && need_second_pass) {
+ bch_err(c, "dirents not repairing");
+ ret = -EINVAL;
+ }
+
snapshots_seen_exit(&s);
inode_walker_exit(&dir);
inode_walker_exit(&target);
@@ -2331,16 +2356,14 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
struct inode_walker *inode)
{
struct bch_fs *c = trans->c;
- struct inode_walker_entry *i;
- int ret;
- ret = bch2_check_key_has_snapshot(trans, iter, k);
+ int ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret < 0)
return ret;
if (ret)
return 0;
- i = walk_inode(trans, inode, k);
+ struct inode_walker_entry *i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i);
if (ret)
return ret;
@@ -2356,9 +2379,9 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
inode->first_this_inode = false;
- ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k);
- bch_err_fn(c, ret);
- return ret;
+ bool need_second_pass = false;
+ return bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info,
+ iter, k, &need_second_pass);
}
/*
@@ -2747,7 +2770,7 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t,
if (!d) {
bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
new_size);
- return -BCH_ERR_ENOMEM_fsck_add_nlink;
+ return bch_err_throw(c, ENOMEM_fsck_add_nlink);
}
if (t->d)
diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
index 574948278cd4..e5fe7cf7b251 100644
--- a/fs/bcachefs/fsck.h
+++ b/fs/bcachefs/fsck.h
@@ -4,6 +4,12 @@
#include "str_hash.h"
+/* recoverds snapshot IDs of overwrites at @pos */
+struct snapshots_seen {
+ struct bpos pos;
+ snapshot_id_list ids;
+};
+
int bch2_fsck_update_backpointers(struct btree_trans *,
struct snapshots_seen *,
const struct bch_hash_desc,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 5cf70108ae2f..53e5dc1f6ac1 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -38,6 +38,7 @@ static const char * const bch2_inode_flag_strs[] = {
#undef x
static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos);
+static int may_delete_deleted_inum(struct btree_trans *, subvol_inum);
static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
@@ -1041,7 +1042,7 @@ again:
goto found_slot;
if (!ret && start == min)
- ret = -BCH_ERR_ENOSPC_inode_create;
+ ret = bch_err_throw(trans->c, ENOSPC_inode_create);
if (ret) {
bch2_trans_iter_exit(trans, iter);
@@ -1130,19 +1131,23 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
u32 snapshot;
int ret;
+ ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum));
+ if (ret)
+ goto err2;
+
/*
* If this was a directory, there shouldn't be any real dirents left -
* but there could be whiteouts (from hash collisions) that we should
* delete:
*
- * XXX: the dirent could ideally would delete whiteouts when they're no
+ * XXX: the dirent code ideally would delete whiteouts when they're no
* longer needed
*/
ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
if (ret)
- goto err;
+ goto err2;
retry:
bch2_trans_begin(trans);
@@ -1161,7 +1166,7 @@ retry:
bch2_fs_inconsistent(c,
"inode %llu:%u not found when deleting",
inum.inum, snapshot);
- ret = -BCH_ERR_ENOENT_inode;
+ ret = bch_err_throw(c, ENOENT_inode);
goto err;
}
@@ -1328,7 +1333,7 @@ retry:
bch2_fs_inconsistent(c,
"inode %llu:%u not found when deleting",
inum, snapshot);
- ret = -BCH_ERR_ENOENT_inode;
+ ret = bch_err_throw(c, ENOENT_inode);
goto err;
}
@@ -1392,10 +1397,8 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot));
}
-static int may_delete_deleted_inode(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bpos pos,
- bool *need_another_pass)
+static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
+ bool from_deleted_inodes)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter;
@@ -1409,12 +1412,14 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (ret)
return ret;
- ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
- if (fsck_err_on(!bkey_is_inode(k.k),
+ ret = bkey_is_inode(k.k) ? 0 : bch_err_throw(c, ENOENT_inode);
+ if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_missing,
"nonexistent inode %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
goto delete;
+ if (ret)
+ goto out;
ret = bch2_inode_unpack(k, &inode);
if (ret)
@@ -1422,7 +1427,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (S_ISDIR(inode.bi_mode)) {
ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot);
- if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY),
+ if (fsck_err_on(from_deleted_inodes &&
+ bch2_err_matches(ret, ENOTEMPTY),
trans, deleted_inode_is_dir,
"non empty directory %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
@@ -1431,17 +1437,25 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
goto out;
}
- if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked),
+ ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked);
+ if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_not_unlinked,
"non-deleted inode %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
goto delete;
+ if (ret)
+ goto out;
- if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot,
+ ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot)
+ ? 0 : bch_err_throw(c, inode_has_child_snapshot);
+
+ if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_has_child_snapshots,
"inode with child snapshots %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
goto delete;
+ if (ret)
+ goto out;
ret = bch2_inode_has_child_snapshots(trans, k.k->p);
if (ret < 0)
@@ -1458,19 +1472,28 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (ret)
goto out;
}
+
+ if (!from_deleted_inodes) {
+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ bch_err_throw(c, inode_has_child_snapshot);
+ goto out;
+ }
+
goto delete;
}
- if (test_bit(BCH_FS_clean_recovery, &c->flags) &&
- !fsck_err(trans, deleted_inode_but_clean,
- "filesystem marked as clean but have deleted inode %llu:%u",
- pos.offset, pos.snapshot)) {
- ret = 0;
- goto out;
- }
+ if (from_deleted_inodes) {
+ if (test_bit(BCH_FS_clean_recovery, &c->flags) &&
+ !fsck_err(trans, deleted_inode_but_clean,
+ "filesystem marked as clean but have deleted inode %llu:%u",
+ pos.offset, pos.snapshot)) {
+ ret = 0;
+ goto out;
+ }
- ret = 1;
+ ret = 1;
+ }
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
@@ -1481,12 +1504,19 @@ delete:
goto out;
}
+static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum)
+{
+ u32 snapshot;
+
+ return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
+ may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false);
+}
+
int bch2_delete_dead_inodes(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
- bool need_another_pass;
int ret;
-again:
+
/*
* if we ran check_inodes() unlinked inodes will have already been
* cleaned up but the write buffer will be out of sync; therefore we
@@ -1496,8 +1526,6 @@ again:
if (ret)
goto err;
- need_another_pass = false;
-
/*
* Weird transaction restart handling here because on successful delete,
* bch2_inode_rm_snapshot() will return a nested transaction restart,
@@ -1507,7 +1535,7 @@ again:
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
- ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
+ ret = may_delete_deleted_inode(trans, k.k->p, true);
if (ret > 0) {
bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u",
k.k->p.offset, k.k->p.snapshot);
@@ -1528,10 +1556,8 @@ again:
ret;
}));
-
- if (!ret && need_another_pass)
- goto again;
err:
bch2_trans_put(trans);
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index 77ad2d549541..82cec2836cbd 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -283,15 +283,6 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
int bch2_inode_nlink_inc(struct bch_inode_unpacked *);
void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
-static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *inode)
-{
- bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset;
-
- return S_ISDIR(inode->bi_mode) ||
- inode->bi_subvol ||
- (!inode->bi_nlink && inode_has_bp);
-}
-
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
struct bch_inode_unpacked *);
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index cc07729a4b62..bf72b1d2e2cb 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -91,7 +91,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
opts.data_replicas,
BCH_WATERMARK_normal, 0, &cl, &wp);
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
- ret = -BCH_ERR_transaction_restart_nested;
+ ret = bch_err_throw(c, transaction_restart_nested);
if (ret)
goto err;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index cc708d46557e..a77779afad01 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -56,7 +56,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
if (!target)
return false;
- rcu_read_lock();
+ guard(rcu)();
devs = bch2_target_to_mask(c, target) ?:
&c->rw_devs[BCH_DATA_user];
@@ -73,7 +73,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
total += max(congested, 0LL);
nr++;
}
- rcu_read_unlock();
return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
@@ -138,21 +137,21 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
BUG_ON(!opts.promote_target);
if (!(flags & BCH_READ_may_promote))
- return -BCH_ERR_nopromote_may_not;
+ return bch_err_throw(c, nopromote_may_not);
if (bch2_bkey_has_target(c, k, opts.promote_target))
- return -BCH_ERR_nopromote_already_promoted;
+ return bch_err_throw(c, nopromote_already_promoted);
if (bkey_extent_is_unwritten(k))
- return -BCH_ERR_nopromote_unwritten;
+ return bch_err_throw(c, nopromote_unwritten);
if (bch2_target_congested(c, opts.promote_target))
- return -BCH_ERR_nopromote_congested;
+ return bch_err_throw(c, nopromote_congested);
}
if (rhashtable_lookup_fast(&c->promote_table, &pos,
bch_promote_params))
- return -BCH_ERR_nopromote_in_flight;
+ return bch_err_throw(c, nopromote_in_flight);
return 0;
}
@@ -240,7 +239,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
if (!op) {
- ret = -BCH_ERR_nopromote_enomem;
+ ret = bch_err_throw(c, nopromote_enomem);
goto err_put;
}
@@ -249,7 +248,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
bch_promote_params)) {
- ret = -BCH_ERR_nopromote_in_flight;
+ ret = bch_err_throw(c, nopromote_in_flight);
goto err;
}
@@ -545,7 +544,7 @@ retry:
if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) {
/* extent we wanted to read no longer exists: */
- rbio->ret = -BCH_ERR_data_read_key_overwritten;
+ rbio->ret = bch_err_throw(trans->c, data_read_key_overwritten);
goto err;
}
@@ -1036,7 +1035,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
!orig->data_update)
- return -BCH_ERR_extent_poisoned;
+ return bch_err_throw(c, extent_poisoned);
retry_pick:
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
@@ -1074,7 +1073,7 @@ retry_pick:
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
- ret = -BCH_ERR_data_read_no_encryption_key;
+ ret = bch_err_throw(c, data_read_no_encryption_key);
goto err;
}
@@ -1128,7 +1127,7 @@ retry_pick:
if (ca)
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_io_read);
- rbio->ret = -BCH_ERR_data_read_buffer_too_small;
+ rbio->ret = bch_err_throw(c, data_read_buffer_too_small);
goto out_read_done;
}
@@ -1333,7 +1332,7 @@ hole:
* have to signal that:
*/
if (u)
- orig->ret = -BCH_ERR_data_read_key_overwritten;
+ orig->ret = bch_err_throw(c, data_read_key_overwritten);
zero_fill_bio_iter(&orig->bio, iter);
out_read_done:
@@ -1510,18 +1509,18 @@ int bch2_fs_io_read_init(struct bch_fs *c)
c->opts.btree_node_size,
c->opts.encoded_extent_max) /
PAGE_SIZE, 0))
- return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
+ return bch_err_throw(c, ENOMEM_bio_bounce_pages_init);
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
BIOSET_NEED_BVECS))
- return -BCH_ERR_ENOMEM_bio_read_init;
+ return bch_err_throw(c, ENOMEM_bio_read_init);
if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
BIOSET_NEED_BVECS))
- return -BCH_ERR_ENOMEM_bio_read_split_init;
+ return bch_err_throw(c, ENOMEM_bio_read_split_init);
if (rhashtable_init(&c->promote_table, &bch_promote_params))
- return -BCH_ERR_ENOMEM_promote_table_init;
+ return bch_err_throw(c, ENOMEM_promote_table_init);
return 0;
}
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index c08b9c047b3e..45c959018919 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -91,6 +91,8 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans,
return 0;
*data_btree = BTREE_ID_reflink;
+
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter,
offset_into_extent,
@@ -102,10 +104,10 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans,
if (bkey_deleted(k.k)) {
bch2_trans_iter_exit(trans, &iter);
- return -BCH_ERR_missing_indirect_extent;
+ return bch_err_throw(c, missing_indirect_extent);
}
- bch2_bkey_buf_reassemble(extent, trans->c, k);
+ bch2_bkey_buf_reassemble(extent, c, k);
bch2_trans_iter_exit(trans, &iter);
return 0;
}
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 52a60982a66b..88b1eec8eff3 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -558,6 +558,7 @@ static void bch2_write_done(struct closure *cl)
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
{
+ struct bch_fs *c = op->c;
struct keylist *keys = &op->insert_keys;
struct bkey_i *src, *dst = keys->keys, *n;
@@ -569,7 +570,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
test_bit(ptr->dev, op->failed.d));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src)))
- return -BCH_ERR_data_write_io;
+ return bch_err_throw(c, data_write_io);
}
if (dst != src)
@@ -976,7 +977,7 @@ csum_err:
op->crc.csum_type < BCH_CSUM_NR
? __bch2_csum_types[op->crc.csum_type]
: "(unknown)");
- return -BCH_ERR_data_write_csum;
+ return bch_err_throw(c, data_write_csum);
}
static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
@@ -1208,16 +1209,13 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op,
e = bkey_s_c_to_extent(k);
- rcu_read_lock();
+ guard(rcu)();
extent_for_each_ptr_decode(e, p, entry) {
- if (crc_is_encoded(p.crc) || p.has_ec) {
- rcu_read_unlock();
+ if (crc_is_encoded(p.crc) || p.has_ec)
return false;
- }
replicas += bch2_extent_ptr_durability(c, &p);
}
- rcu_read_unlock();
return replicas >= op->opts.data_replicas;
}
@@ -1290,7 +1288,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
static void __bch2_nocow_write_done(struct bch_write_op *op)
{
if (unlikely(op->flags & BCH_WRITE_io_error)) {
- op->error = -BCH_ERR_data_write_io;
+ op->error = bch_err_throw(op->c, data_write_io);
} else if (unlikely(op->flags & BCH_WRITE_convert_unwritten))
bch2_nocow_write_convert_unwritten(op);
}
@@ -1483,10 +1481,10 @@ err_bucket_stale:
"pointer to invalid bucket in nocow path on device %llu\n %s",
stale_at->b.inode,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- ret = -BCH_ERR_data_write_invalid_ptr;
+ ret = bch_err_throw(c, data_write_invalid_ptr);
} else {
/* We can retry this: */
- ret = -BCH_ERR_transaction_restart;
+ ret = bch_err_throw(c, transaction_restart);
}
printbuf_exit(&buf);
@@ -1693,18 +1691,18 @@ CLOSURE_CALLBACK(bch2_write)
if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) {
bch2_write_op_error(op, op->pos.offset, "misaligned write");
- op->error = -BCH_ERR_data_write_misaligned;
+ op->error = bch_err_throw(c, data_write_misaligned);
goto err;
}
if (c->opts.nochanges) {
- op->error = -BCH_ERR_erofs_no_writes;
+ op->error = bch_err_throw(c, erofs_no_writes);
goto err;
}
if (!(op->flags & BCH_WRITE_move) &&
!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_write)) {
- op->error = -BCH_ERR_erofs_no_writes;
+ op->error = bch_err_throw(c, erofs_no_writes);
goto err;
}
@@ -1776,7 +1774,7 @@ int bch2_fs_io_write_init(struct bch_fs *c)
{
if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), BIOSET_NEED_BVECS) ||
bioset_init(&c->replica_set, 4, offsetof(struct bch_write_bio, bio), 0))
- return -BCH_ERR_ENOMEM_bio_write_init;
+ return bch_err_throw(c, ENOMEM_bio_write_init);
return 0;
}
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 09b70fd140a1..dda802a656cf 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -397,7 +397,7 @@ static int journal_entry_open(struct journal *j)
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
if (j->blocked)
- return -BCH_ERR_journal_blocked;
+ return bch_err_throw(c, journal_blocked);
if (j->cur_entry_error)
return j->cur_entry_error;
@@ -407,23 +407,23 @@ static int journal_entry_open(struct journal *j)
return ret;
if (!fifo_free(&j->pin))
- return -BCH_ERR_journal_pin_full;
+ return bch_err_throw(c, journal_pin_full);
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
- return -BCH_ERR_journal_max_in_flight;
+ return bch_err_throw(c, journal_max_in_flight);
if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
- return -BCH_ERR_journal_max_open;
+ return bch_err_throw(c, journal_max_open);
if (unlikely(journal_cur_seq(j) >= JOURNAL_SEQ_MAX)) {
bch_err(c, "cannot start: journal seq overflow");
if (bch2_fs_emergency_read_only_locked(c))
bch_err(c, "fatal error - emergency read only");
- return -BCH_ERR_journal_shutdown;
+ return bch_err_throw(c, journal_shutdown);
}
if (!j->free_buf && !buf->data)
- return -BCH_ERR_journal_buf_enomem; /* will retry after write completion frees up a buf */
+ return bch_err_throw(c, journal_buf_enomem); /* will retry after write completion frees up a buf */
BUG_ON(!j->cur_entry_sectors);
@@ -447,7 +447,7 @@ static int journal_entry_open(struct journal *j)
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= (ssize_t) j->early_journal_entries.nr)
- return -BCH_ERR_journal_full;
+ return bch_err_throw(c, journal_full);
if (fifo_empty(&j->pin) && j->reclaim_thread)
wake_up_process(j->reclaim_thread);
@@ -464,7 +464,7 @@ static int journal_entry_open(struct journal *j)
journal_cur_seq(j));
if (bch2_fs_emergency_read_only_locked(c))
bch_err(c, "fatal error - emergency read only");
- return -BCH_ERR_journal_shutdown;
+ return bch_err_throw(c, journal_shutdown);
}
BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
@@ -597,16 +597,16 @@ retry:
return ret;
if (j->blocked)
- return -BCH_ERR_journal_blocked;
+ return bch_err_throw(c, journal_blocked);
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
- ret = -BCH_ERR_journal_full;
+ ret = bch_err_throw(c, journal_full);
can_discard = j->can_discard;
goto out;
}
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
- ret = -BCH_ERR_journal_max_in_flight;
+ ret = bch_err_throw(c, journal_max_in_flight);
goto out;
}
@@ -647,7 +647,7 @@ out:
goto retry;
if (journal_error_check_stuck(j, ret, flags))
- ret = -BCH_ERR_journal_stuck;
+ ret = bch_err_throw(c, journal_stuck);
if (ret == -BCH_ERR_journal_max_in_flight &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
@@ -708,10 +708,9 @@ static unsigned max_dev_latency(struct bch_fs *c)
{
u64 nsecs = 0;
- rcu_read_lock();
+ guard(rcu)();
for_each_rw_member_rcu(c, ca)
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
- rcu_read_unlock();
return nsecs_to_jiffies(nsecs);
}
@@ -813,6 +812,7 @@ out:
int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
struct closure *parent)
{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *buf;
int ret = 0;
@@ -828,7 +828,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
/* Recheck under lock: */
if (j->err_seq && seq >= j->err_seq) {
- ret = -BCH_ERR_journal_flush_err;
+ ret = bch_err_throw(c, journal_flush_err);
goto out;
}
@@ -999,7 +999,7 @@ int bch2_journal_meta(struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_journal))
- return -BCH_ERR_erofs_no_writes;
+ return bch_err_throw(c, erofs_no_writes);
int ret = __bch2_journal_meta(j);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_journal);
@@ -1132,7 +1132,7 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL);
new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
if (!bu || !ob || !new_buckets || !new_bucket_seq) {
- ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets;
+ ret = bch_err_throw(c, ENOMEM_set_nr_journal_buckets);
goto err_free;
}
@@ -1304,6 +1304,66 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
return ret;
}
+int bch2_dev_journal_bucket_delete(struct bch_dev *ca, u64 b)
+{
+ struct bch_fs *c = ca->fs;
+ struct journal *j = &c->journal;
+ struct journal_device *ja = &ca->journal;
+
+ guard(mutex)(&c->sb_lock);
+ unsigned pos;
+ for (pos = 0; pos < ja->nr; pos++)
+ if (ja->buckets[pos] == b)
+ break;
+
+ if (pos == ja->nr) {
+ bch_err(ca, "journal bucket %llu not found when deleting", b);
+ return -EINVAL;
+ }
+
+ u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);;
+ if (!new_buckets)
+ return bch_err_throw(c, ENOMEM_set_nr_journal_buckets);
+
+ memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
+ memmove(&new_buckets[pos],
+ &new_buckets[pos + 1],
+ (ja->nr - 1 - pos) * sizeof(new_buckets[0]));
+
+ int ret = bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr - 1) ?:
+ bch2_write_super(c);
+ if (ret) {
+ kfree(new_buckets);
+ return ret;
+ }
+
+ scoped_guard(spinlock, &j->lock) {
+ if (pos < ja->discard_idx)
+ --ja->discard_idx;
+ if (pos < ja->dirty_idx_ondisk)
+ --ja->dirty_idx_ondisk;
+ if (pos < ja->dirty_idx)
+ --ja->dirty_idx;
+ if (pos < ja->cur_idx)
+ --ja->cur_idx;
+
+ ja->nr--;
+
+ memmove(&ja->buckets[pos],
+ &ja->buckets[pos + 1],
+ (ja->nr - pos) * sizeof(ja->buckets[0]));
+
+ memmove(&ja->bucket_seq[pos],
+ &ja->bucket_seq[pos + 1],
+ (ja->nr - pos) * sizeof(ja->bucket_seq[0]));
+
+ bch2_journal_space_available(j);
+ }
+
+ kfree(new_buckets);
+ return 0;
+}
+
int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
struct bch_fs *c = ca->fs;
@@ -1313,14 +1373,14 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
bch_err(c, "cannot allocate journal, filesystem is an unresized image file");
- return -BCH_ERR_erofs_filesystem_full;
+ return bch_err_throw(c, erofs_filesystem_full);
}
unsigned nr;
int ret;
if (dynamic_fault("bcachefs:add:journal_alloc")) {
- ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets;
+ ret = bch_err_throw(c, ENOMEM_set_nr_journal_buckets);
goto err;
}
@@ -1459,7 +1519,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
- return -BCH_ERR_ENOMEM_journal_pin_fifo;
+ return bch_err_throw(c, ENOMEM_journal_pin_fifo);
}
j->replay_journal_seq = last_seq;
@@ -1547,6 +1607,7 @@ void bch2_dev_journal_exit(struct bch_dev *ca)
int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
{
+ struct bch_fs *c = ca->fs;
struct journal_device *ja = &ca->journal;
struct bch_sb_field_journal *journal_buckets =
bch2_sb_field_get(sb, journal);
@@ -1566,7 +1627,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
if (!ja->bucket_seq)
- return -BCH_ERR_ENOMEM_dev_journal_init;
+ return bch_err_throw(c, ENOMEM_dev_journal_init);
unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
@@ -1574,7 +1635,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
nr_bvecs), GFP_KERNEL);
if (!ja->bio[i])
- return -BCH_ERR_ENOMEM_dev_journal_init;
+ return bch_err_throw(c, ENOMEM_dev_journal_init);
ja->bio[i]->ca = ca;
ja->bio[i]->buf_idx = i;
@@ -1583,7 +1644,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
if (!ja->buckets)
- return -BCH_ERR_ENOMEM_dev_journal_init;
+ return bch_err_throw(c, ENOMEM_dev_journal_init);
if (journal_buckets_v2) {
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
@@ -1637,10 +1698,12 @@ void bch2_fs_journal_init_early(struct journal *j)
int bch2_fs_journal_init(struct journal *j)
{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
if (!j->free_buf)
- return -BCH_ERR_ENOMEM_journal_buf;
+ return bch_err_throw(c, ENOMEM_journal_buf);
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
j->buf[i].idx = i;
@@ -1648,7 +1711,7 @@ int bch2_fs_journal_init(struct journal *j)
j->wq = alloc_workqueue("bcachefs_journal",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512);
if (!j->wq)
- return -BCH_ERR_ENOMEM_fs_other_alloc;
+ return bch_err_throw(c, ENOMEM_fs_other_alloc);
return 0;
}
@@ -1672,7 +1735,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
printbuf_tabstop_push(out, 28);
out->atomic++;
- rcu_read_lock();
+ guard(rcu)();
s = READ_ONCE(j->reservations);
prt_printf(out, "flags:\t");
@@ -1763,8 +1826,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required);
- rcu_read_unlock();
-
--out->atomic;
}
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 8ff00a0ec778..83734fe4331f 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -444,8 +444,9 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
-int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
- unsigned nr);
+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned);
+int bch2_dev_journal_bucket_delete(struct bch_dev *, u64);
+
int bch2_dev_journal_alloc(struct bch_dev *, bool);
int bch2_fs_journal_alloc(struct bch_fs *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 63bb207208b2..0b15d71a8d2d 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -49,25 +49,27 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c)
mutex_unlock(&c->sb_lock);
}
-void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
- struct journal_replay *j)
+static void bch2_journal_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct journal_ptr *p)
+{
+ struct bch_dev *ca = bch2_dev_tryget_noerror(c, p->dev);
+ prt_printf(out, "%s %u:%u:%u (sector %llu)",
+ ca ? ca->name : "(invalid dev)",
+ p->dev, p->bucket, p->bucket_offset, p->sector);
+ bch2_dev_put(ca);
+}
+
+void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct journal_replay *j)
{
darray_for_each(j->ptrs, i) {
if (i != j->ptrs.data)
prt_printf(out, " ");
- prt_printf(out, "%u:%u:%u (sector %llu)",
- i->dev, i->bucket, i->bucket_offset, i->sector);
+ bch2_journal_ptr_to_text(out, c, i);
}
}
-static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c,
- struct journal_replay *j)
+static void bch2_journal_datetime_to_text(struct printbuf *out, struct jset *j)
{
- prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq));
-
- bch2_journal_ptrs_to_text(out, c, j);
-
- for_each_jset_entry_type(entry, &j->j, BCH_JSET_ENTRY_datetime) {
+ for_each_jset_entry_type(entry, j, BCH_JSET_ENTRY_datetime) {
struct jset_entry_datetime *datetime =
container_of(entry, struct jset_entry_datetime, entry);
bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
@@ -75,6 +77,15 @@ static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c,
}
}
+static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c,
+ struct journal_replay *j)
+{
+ prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq));
+ bch2_journal_datetime_to_text(out, &j->j);
+ prt_char(out, ' ');
+ bch2_journal_ptrs_to_text(out, c, j);
+}
+
static struct nonce journal_nonce(const struct jset *jset)
{
return (struct nonce) {{
@@ -188,7 +199,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
journal_entry_radix_idx(c, le64_to_cpu(j->seq)),
GFP_KERNEL);
if (!_i)
- return -BCH_ERR_ENOMEM_journal_entry_add;
+ return bch_err_throw(c, ENOMEM_journal_entry_add);
/*
* Duplicate journal entries? If so we want the one that didn't have a
@@ -231,7 +242,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
replace:
i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
if (!i)
- return -BCH_ERR_ENOMEM_journal_entry_add;
+ return bch_err_throw(c, ENOMEM_journal_entry_add);
darray_init(&i->ptrs);
i->csum_good = entry_ptr.csum_good;
@@ -311,7 +322,7 @@ static void journal_entry_err_msg(struct printbuf *out,
bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \
if (bch2_fs_inconsistent(c, \
"corrupt metadata before write: %s\n", _buf.buf)) {\
- ret = -BCH_ERR_fsck_errors_not_fixed; \
+ ret = bch_err_throw(c, fsck_errors_not_fixed); \
goto fsck_err; \
} \
break; \
@@ -418,6 +429,10 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
bool first = true;
jset_entry_for_each_key(entry, k) {
+ /* We may be called on entries that haven't been validated: */
+ if (!k->k.u64s)
+ break;
+
if (!first) {
prt_newline(out);
bch2_prt_jset_entry_type(out, entry->type);
@@ -1005,19 +1020,19 @@ struct journal_read_buf {
size_t size;
};
-static int journal_read_buf_realloc(struct journal_read_buf *b,
+static int journal_read_buf_realloc(struct bch_fs *c, struct journal_read_buf *b,
size_t new_size)
{
void *n;
/* the bios are sized for this many pages, max: */
if (new_size > JOURNAL_ENTRY_SIZE_MAX)
- return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
+ return bch_err_throw(c, ENOMEM_journal_read_buf_realloc);
new_size = roundup_pow_of_two(new_size);
n = kvmalloc(new_size, GFP_KERNEL);
if (!n)
- return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
+ return bch_err_throw(c, ENOMEM_journal_read_buf_realloc);
kvfree(b->data);
b->data = n;
@@ -1037,7 +1052,6 @@ static int journal_read_bucket(struct bch_dev *ca,
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
end = offset + ca->mi.bucket_size;
bool saw_bad = false, csum_good;
- struct printbuf err = PRINTBUF;
int ret = 0;
pr_debug("reading %u", bucket);
@@ -1053,7 +1067,7 @@ reread:
bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
if (!bio)
- return -BCH_ERR_ENOMEM_journal_read_bucket;
+ return bch_err_throw(c, ENOMEM_journal_read_bucket);
bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ);
bio->bi_iter.bi_sector = offset;
@@ -1064,7 +1078,7 @@ reread:
kfree(bio);
if (!ret && bch2_meta_read_fault("journal"))
- ret = -BCH_ERR_EIO_fault_injected;
+ ret = bch_err_throw(c, EIO_fault_injected);
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
submit_time, !ret);
@@ -1078,7 +1092,7 @@ reread:
* found on a different device, and missing or
* no journal entries will be handled later
*/
- goto out;
+ return 0;
}
j = buf->data;
@@ -1092,15 +1106,15 @@ reread:
break;
case JOURNAL_ENTRY_REREAD:
if (vstruct_bytes(j) > buf->size) {
- ret = journal_read_buf_realloc(buf,
+ ret = journal_read_buf_realloc(c, buf,
vstruct_bytes(j));
if (ret)
- goto err;
+ return ret;
}
goto reread;
case JOURNAL_ENTRY_NONE:
if (!saw_bad)
- goto out;
+ return 0;
/*
* On checksum error we don't really trust the size
* field of the journal entry we read, so try reading
@@ -1109,7 +1123,7 @@ reread:
sectors = block_sectors(c);
goto next_block;
default:
- goto err;
+ return ret;
}
if (le64_to_cpu(j->seq) > ja->highest_seq_found) {
@@ -1126,22 +1140,20 @@ reread:
* bucket:
*/
if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
- goto out;
+ return 0;
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
- enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
struct bch_csum csum;
csum_good = jset_csum_good(c, j, &csum);
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_checksum, 0, csum_good);
if (!csum_good) {
- bch_err_dev_ratelimited(ca, "%s",
- (printbuf_reset(&err),
- prt_str(&err, "journal "),
- bch2_csum_err_msg(&err, csum_type, j->csum, csum),
- err.buf));
+ /*
+ * Don't print an error here, we'll print the error
+ * later if we need this journal entry
+ */
saw_bad = true;
}
@@ -1153,6 +1165,7 @@ reread:
mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, (struct journal_ptr) {
.csum_good = csum_good,
+ .csum = csum,
.dev = ca->dev_idx,
.bucket = bucket,
.bucket_offset = offset -
@@ -1167,7 +1180,7 @@ reread:
case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
break;
default:
- goto err;
+ return ret;
}
next_block:
pr_debug("next");
@@ -1176,11 +1189,7 @@ next_block:
j = ((void *) j) + (sectors << 9);
}
-out:
- ret = 0;
-err:
- printbuf_exit(&err);
- return ret;
+ return 0;
}
static CLOSURE_CALLBACK(bch2_journal_read_device)
@@ -1197,7 +1206,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
if (!ja->nr)
goto out;
- ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
+ ret = journal_read_buf_realloc(c, &buf, PAGE_SIZE);
if (ret)
goto err;
@@ -1229,13 +1238,105 @@ err:
goto out;
}
+noinline_for_stack
+static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j)
+{
+ struct printbuf buf = PRINTBUF;
+ enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j);
+ bool have_good = false;
+
+ prt_printf(&buf, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq));
+ bch2_journal_datetime_to_text(&buf, &j->j);
+ prt_newline(&buf);
+
+ darray_for_each(j->ptrs, ptr)
+ if (!ptr->csum_good) {
+ bch2_journal_ptr_to_text(&buf, c, ptr);
+ prt_char(&buf, ' ');
+ bch2_csum_to_text(&buf, csum_type, ptr->csum);
+ prt_newline(&buf);
+ } else {
+ have_good = true;
+ }
+
+ prt_printf(&buf, "should be ");
+ bch2_csum_to_text(&buf, csum_type, j->j.csum);
+
+ if (have_good)
+ prt_printf(&buf, "\n(had good copy on another device)");
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+}
+
+noinline_for_stack
+static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq)
+{
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ struct genradix_iter radix_iter;
+ struct journal_replay *i, **_i, *prev = NULL;
+ u64 seq = start_seq;
+
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
+ i = *_i;
+
+ if (journal_replay_ignore(i))
+ continue;
+
+ BUG_ON(seq > le64_to_cpu(i->j.seq));
+
+ while (seq < le64_to_cpu(i->j.seq)) {
+ while (seq < le64_to_cpu(i->j.seq) &&
+ bch2_journal_seq_is_blacklisted(c, seq, false))
+ seq++;
+
+ if (seq == le64_to_cpu(i->j.seq))
+ break;
+
+ u64 missing_start = seq;
+
+ while (seq < le64_to_cpu(i->j.seq) &&
+ !bch2_journal_seq_is_blacklisted(c, seq, false))
+ seq++;
+
+ u64 missing_end = seq - 1;
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)",
+ missing_start, missing_end,
+ start_seq, end_seq);
+
+ prt_printf(&buf, "\nprev at ");
+ if (prev) {
+ bch2_journal_ptrs_to_text(&buf, c, prev);
+ prt_printf(&buf, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
+ } else
+ prt_printf(&buf, "(none)");
+
+ prt_printf(&buf, "\nnext at ");
+ bch2_journal_ptrs_to_text(&buf, c, i);
+ prt_printf(&buf, ", continue?");
+
+ fsck_err(c, journal_entries_missing, "%s", buf.buf);
+ }
+
+ prev = i;
+ seq++;
+ }
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
int bch2_journal_read(struct bch_fs *c,
u64 *last_seq,
u64 *blacklist_seq,
u64 *start_seq)
{
struct journal_list jlist;
- struct journal_replay *i, **_i, *prev = NULL;
+ struct journal_replay *i, **_i;
struct genradix_iter radix_iter;
struct printbuf buf = PRINTBUF;
bool degraded = false, last_write_torn = false;
@@ -1326,12 +1427,12 @@ int bch2_journal_read(struct bch_fs *c,
return 0;
}
- bch_info(c, "journal read done, replaying entries %llu-%llu",
- *last_seq, *blacklist_seq - 1);
-
+ printbuf_reset(&buf);
+ prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
+ *last_seq, *blacklist_seq - 1);
if (*start_seq != *blacklist_seq)
- bch_info(c, "dropped unflushed entries %llu-%llu",
- *blacklist_seq, *start_seq - 1);
+ prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
+ bch_info(c, "%s", buf.buf);
/* Drop blacklisted entries and entries older than last_seq: */
genradix_for_each(&c->journal_entries, radix_iter, _i) {
@@ -1354,56 +1455,9 @@ int bch2_journal_read(struct bch_fs *c,
}
}
- /* Check for missing entries: */
- seq = *last_seq;
- genradix_for_each(&c->journal_entries, radix_iter, _i) {
- i = *_i;
-
- if (journal_replay_ignore(i))
- continue;
-
- BUG_ON(seq > le64_to_cpu(i->j.seq));
-
- while (seq < le64_to_cpu(i->j.seq)) {
- u64 missing_start, missing_end;
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-
- while (seq < le64_to_cpu(i->j.seq) &&
- bch2_journal_seq_is_blacklisted(c, seq, false))
- seq++;
-
- if (seq == le64_to_cpu(i->j.seq))
- break;
-
- missing_start = seq;
-
- while (seq < le64_to_cpu(i->j.seq) &&
- !bch2_journal_seq_is_blacklisted(c, seq, false))
- seq++;
-
- if (prev) {
- bch2_journal_ptrs_to_text(&buf1, c, prev);
- prt_printf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
- } else
- prt_printf(&buf1, "(none)");
- bch2_journal_ptrs_to_text(&buf2, c, i);
-
- missing_end = seq - 1;
- fsck_err(c, journal_entries_missing,
- "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
- "prev at %s\n"
- "next at %s, continue?",
- missing_start, missing_end,
- *last_seq, *blacklist_seq - 1,
- buf1.buf, buf2.buf);
-
- printbuf_exit(&buf1);
- printbuf_exit(&buf2);
- }
-
- prev = i;
- seq++;
- }
+ ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1);
+ if (ret)
+ goto err;
genradix_for_each(&c->journal_entries, radix_iter, _i) {
union bch_replicas_padded replicas = {
@@ -1416,15 +1470,15 @@ int bch2_journal_read(struct bch_fs *c,
if (journal_replay_ignore(i))
continue;
- darray_for_each(i->ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
-
- if (!ptr->csum_good)
- bch_err_dev_offset(ca, ptr->sector,
- "invalid journal checksum, seq %llu%s",
- le64_to_cpu(i->j.seq),
- i->csum_good ? " (had good copy on another device)" : "");
- }
+ /*
+ * Don't print checksum errors until we know we're going to use
+ * a given journal entry:
+ */
+ darray_for_each(i->ptrs, ptr)
+ if (!ptr->csum_good) {
+ bch2_journal_print_checksum_error(c, i);
+ break;
+ }
ret = jset_validate(c,
bch2_dev_have_ref(c, i->ptrs.data[0].dev),
@@ -1467,7 +1521,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- rcu_read_lock();
+ guard(rcu)();
darray_for_each(*devs, i) {
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
if (!ca)
@@ -1489,7 +1543,6 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
}
}
- rcu_read_unlock();
}
static void __journal_write_alloc(struct journal *j,
@@ -1559,7 +1612,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
retry_target:
devs = target_rw_devs(c, BCH_DATA_journal, target);
- devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
+ bch2_dev_alloc_list(c, &j->wp.stripe, &devs, &devs_sorted);
retry_alloc:
__journal_write_alloc(j, w, &devs_sorted, sectors, replicas, replicas_want);
@@ -1581,6 +1634,16 @@ retry_alloc:
done:
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
+#if 0
+ /*
+ * XXX: we need a way to alert the user when we go degraded for any
+ * reason
+ */
+ if (*replicas < min(replicas_want,
+ dev_mask_nr(&c->rw_devs[BCH_DATA_free]))) {
+ }
+#endif
+
return *replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
}
@@ -1628,7 +1691,7 @@ static CLOSURE_CALLBACK(journal_write_done)
: j->noflush_write_time, j->write_start_time);
if (!w->devs_written.nr) {
- err = -BCH_ERR_journal_write_err;
+ err = bch_err_throw(c, journal_write_err);
} else {
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
w->devs_written);
@@ -2058,7 +2121,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
union bch_replicas_padded replicas;
- unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]);
+ unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_free]);
int ret;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
index 12b39fcb4424..6fa82c4050fe 100644
--- a/fs/bcachefs/journal_io.h
+++ b/fs/bcachefs/journal_io.h
@@ -9,6 +9,7 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *);
struct journal_ptr {
bool csum_good;
+ struct bch_csum csum;
u8 dev;
u32 bucket;
u32 bucket_offset;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 70f36f6bc482..cd6201741c59 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -83,18 +83,20 @@ static struct journal_space
journal_dev_space_available(struct journal *j, struct bch_dev *ca,
enum journal_space_from from)
{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_device *ja = &ca->journal;
unsigned sectors, buckets, unwritten;
+ unsigned bucket_size_aligned = round_down(ca->mi.bucket_size, block_sectors(c));
u64 seq;
if (from == journal_space_total)
return (struct journal_space) {
- .next_entry = ca->mi.bucket_size,
- .total = ca->mi.bucket_size * ja->nr,
+ .next_entry = bucket_size_aligned,
+ .total = bucket_size_aligned * ja->nr,
};
buckets = bch2_journal_dev_buckets_available(j, ja, from);
- sectors = ja->sectors_free;
+ sectors = round_down(ja->sectors_free, block_sectors(c));
/*
* We that we don't allocate the space for a journal entry
@@ -109,7 +111,7 @@ journal_dev_space_available(struct journal *j, struct bch_dev *ca,
continue;
/* entry won't fit on this device, skip: */
- if (unwritten > ca->mi.bucket_size)
+ if (unwritten > bucket_size_aligned)
continue;
if (unwritten >= sectors) {
@@ -119,7 +121,7 @@ journal_dev_space_available(struct journal *j, struct bch_dev *ca,
}
buckets--;
- sectors = ca->mi.bucket_size;
+ sectors = bucket_size_aligned;
}
sectors -= unwritten;
@@ -127,12 +129,12 @@ journal_dev_space_available(struct journal *j, struct bch_dev *ca,
if (sectors < ca->mi.bucket_size && buckets) {
buckets--;
- sectors = ca->mi.bucket_size;
+ sectors = bucket_size_aligned;
}
return (struct journal_space) {
.next_entry = sectors,
- .total = sectors + buckets * ca->mi.bucket_size,
+ .total = sectors + buckets * bucket_size_aligned,
};
}
@@ -146,7 +148,6 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
- rcu_read_lock();
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
if (!ca->journal.nr ||
!ca->mi.durability)
@@ -164,7 +165,6 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
array_insert_item(dev_space, nr_devs, pos, space);
}
- rcu_read_unlock();
if (nr_devs < nr_devs_want)
return (struct journal_space) { 0, 0 };
@@ -189,8 +189,8 @@ void bch2_journal_space_available(struct journal *j)
int ret = 0;
lockdep_assert_held(&j->lock);
+ guard(rcu)();
- rcu_read_lock();
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
struct journal_device *ja = &ca->journal;
@@ -210,7 +210,6 @@ void bch2_journal_space_available(struct journal *j)
max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
nr_online++;
}
- rcu_read_unlock();
j->can_discard = can_discard;
@@ -221,15 +220,13 @@ void bch2_journal_space_available(struct journal *j)
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));
- rcu_read_lock();
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
prt_printf(&buf, " %s", ca->name);
- rcu_read_unlock();
bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
}
- ret = -BCH_ERR_insufficient_journal_devices;
+ ret = bch_err_throw(c, insufficient_journal_devices);
goto out;
}
@@ -243,7 +240,7 @@ void bch2_journal_space_available(struct journal *j)
total = j->space[journal_space_total].total;
if (!j->space[journal_space_discarded].next_entry)
- ret = -BCH_ERR_journal_full;
+ ret = bch_err_throw(c, journal_full);
if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) &&
@@ -256,8 +253,7 @@ void bch2_journal_space_available(struct journal *j)
bch2_journal_set_watermark(j);
out:
j->cur_entry_sectors = !ret
- ? round_down(j->space[journal_space_discarded].next_entry,
- block_sectors(c))
+ ? j->space[journal_space_discarded].next_entry
: 0;
j->cur_entry_error = ret;
@@ -625,9 +621,9 @@ static u64 journal_seq_to_flush(struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
u64 seq_to_flush = 0;
- spin_lock(&j->lock);
+ guard(spinlock)(&j->lock);
+ guard(rcu)();
- rcu_read_lock();
for_each_rw_member_rcu(c, ca) {
struct journal_device *ja = &ca->journal;
unsigned nr_buckets, bucket_to_flush;
@@ -642,15 +638,11 @@ static u64 journal_seq_to_flush(struct journal *j)
seq_to_flush = max(seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
}
- rcu_read_unlock();
/* Also flush if the pin fifo is more than half full */
- seq_to_flush = max_t(s64, seq_to_flush,
- (s64) journal_cur_seq(j) -
- (j->pin.size >> 1));
- spin_unlock(&j->lock);
-
- return seq_to_flush;
+ return max_t(s64, seq_to_flush,
+ (s64) journal_cur_seq(j) -
+ (j->pin.size >> 1));
}
/**
diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c
index 62b910f2fb27..0cb9b93f13e7 100644
--- a/fs/bcachefs/journal_sb.c
+++ b/fs/bcachefs/journal_sb.c
@@ -210,7 +210,7 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
j = bch2_sb_field_resize(&ca->disk_sb, journal_v2,
(sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
if (!j)
- return -BCH_ERR_ENOSPC_sb_journal;
+ return bch_err_throw(c, ENOSPC_sb_journal);
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index c5a7d800a0f5..af4fe416d9ec 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -78,7 +78,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
sb_blacklist_u64s(nr + 1));
if (!bl) {
- ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist;
+ ret = bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist);
goto out;
}
@@ -152,7 +152,7 @@ int bch2_blacklist_table_initialize(struct bch_fs *c)
t = kzalloc(struct_size(t, entries, nr), GFP_KERNEL);
if (!t)
- return -BCH_ERR_ENOMEM_blacklist_table_init;
+ return bch_err_throw(c, ENOMEM_blacklist_table_init);
t->nr = nr;
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 2f63fc6d456f..57b5b3263b08 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -145,13 +145,11 @@ static u64 bkey_lru_type_idx(struct bch_fs *c,
case BCH_LRU_fragmentation: {
a = bch2_alloc_to_v4(k, &a_convert);
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode);
- u64 idx = ca
+ return ca
? alloc_lru_idx_fragmentation(*a, ca)
: 0;
- rcu_read_unlock();
- return idx;
}
case BCH_LRU_stripes:
return k.k->type == KEY_TYPE_stripe
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index bb7a92270c09..f296cce95338 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -35,7 +35,7 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
nr_good = bch2_bkey_durability(c, k.s_c);
if ((!nr_good && !(flags & lost)) ||
(nr_good < replicas && !(flags & degraded)))
- return -BCH_ERR_remove_would_lose_data;
+ return bch_err_throw(c, remove_would_lose_data);
return 0;
}
@@ -156,7 +156,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
/* don't handle this yet: */
if (flags & BCH_FORCE_IF_METADATA_LOST)
- return -BCH_ERR_remove_with_metadata_missing_unimplemented;
+ return bch_err_throw(c, remove_with_metadata_missing_unimplemented);
trans = bch2_trans_get(c);
bch2_bkey_buf_init(&k);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 79f4722621d5..eec591e947bd 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -38,30 +38,74 @@ const char * const bch2_data_ops_strs[] = {
NULL
};
-static void trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_update_opts *data_opts)
+struct evacuate_bucket_arg {
+ struct bpos bucket;
+ int gen;
+ struct data_update_opts data_opts;
+};
+
+static bool evacuate_bucket_pred(struct bch_fs *, void *,
+ enum btree_id, struct bkey_s_c,
+ struct bch_io_opts *,
+ struct data_update_opts *);
+
+static noinline void
+trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
- if (trace_io_move_enabled()) {
- struct printbuf buf = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, k);
- prt_newline(&buf);
- bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
- trace_io_move(c, buf.buf);
- printbuf_exit(&buf);
- }
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
+ bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
+ trace_io_move(c, buf.buf);
+ printbuf_exit(&buf);
}
-static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
+static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
{
- if (trace_io_move_read_enabled()) {
- struct printbuf buf = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, k);
- trace_io_move_read(c, buf.buf);
- printbuf_exit(&buf);
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_io_move_read(c, buf.buf);
+ printbuf_exit(&buf);
+}
+
+static noinline void
+trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts,
+ move_pred_fn pred, void *_arg, bool p)
+{
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "%ps: %u", pred, p);
+
+ if (pred == evacuate_bucket_pred) {
+ struct evacuate_bucket_arg *arg = _arg;
+ prt_printf(&buf, " gen=%u", arg->gen);
}
+
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
+ bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
+ trace_io_move_pred(c, buf.buf);
+ printbuf_exit(&buf);
+}
+
+static noinline void
+trace_io_move_evacuate_bucket2(struct bch_fs *c, struct bpos bucket, int gen)
+{
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "bucket: ");
+ bch2_bpos_to_text(&buf, bucket);
+ prt_printf(&buf, " gen: %i\n", gen);
+
+ trace_io_move_evacuate_bucket(c, buf.buf);
+ printbuf_exit(&buf);
}
struct moving_io {
@@ -298,7 +342,8 @@ int bch2_move_extent(struct moving_context *ctxt,
struct bch_fs *c = trans->c;
int ret = -ENOMEM;
- trace_io_move2(c, k, &io_opts, &data_opts);
+ if (trace_io_move_enabled())
+ trace_io_move2(c, k, &io_opts, &data_opts);
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
if (ctxt->stats)
@@ -314,16 +359,14 @@ int bch2_move_extent(struct moving_context *ctxt,
return 0;
}
- /*
- * Before memory allocations & taking nocow locks in
- * bch2_data_update_init():
- */
- bch2_trans_unlock(trans);
-
- struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL);
+ struct moving_io *io = allocate_dropping_locks(trans, ret,
+ kzalloc(sizeof(struct moving_io), _gfp));
if (!io)
goto err;
+ if (ret)
+ goto err_free;
+
INIT_LIST_HEAD(&io->io_list);
io->write.ctxt = ctxt;
io->read_sectors = k.k->size;
@@ -343,6 +386,8 @@ int bch2_move_extent(struct moving_context *ctxt,
io->write.op.c = c;
io->write.data_opts = data_opts;
+ bch2_trans_unlock(trans);
+
ret = bch2_data_update_bios_init(&io->write, c, &io_opts);
if (ret)
goto err_free;
@@ -364,7 +409,8 @@ int bch2_move_extent(struct moving_context *ctxt,
atomic_inc(&io->b->count);
}
- trace_io_move_read2(c, k);
+ if (trace_io_move_read_enabled())
+ trace_io_move_read2(c, k);
mutex_lock(&ctxt->lock);
atomic_add(io->read_sectors, &ctxt->read_sectors);
@@ -390,9 +436,6 @@ int bch2_move_extent(struct moving_context *ctxt,
err_free:
kfree(io);
err:
- if (bch2_err_matches(ret, BCH_ERR_data_update_done))
- return 0;
-
if (bch2_err_matches(ret, EROFS) ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ret;
@@ -408,6 +451,9 @@ err:
trace_io_move_start_fail(c, buf.buf);
printbuf_exit(&buf);
}
+
+ if (bch2_err_matches(ret, BCH_ERR_data_update_done))
+ return 0;
return ret;
}
@@ -496,6 +542,7 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans,
bch2_inode_opts_get(io_opts, c, &inode);
}
bch2_trans_iter_exit(trans, &inode_iter);
+ /* seem to be spinning here? */
out:
return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k);
}
@@ -910,7 +957,13 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
}
struct data_update_opts data_opts = {};
- if (!pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts)) {
+ bool p = pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts);
+
+ if (trace_io_move_pred_enabled())
+ trace_io_move_pred2(c, k, &io_opts, &data_opts,
+ pred, arg, p);
+
+ if (!p) {
bch2_trans_iter_exit(trans, &iter);
goto next;
}
@@ -918,7 +971,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
if (data_opts.scrub &&
!bch2_dev_idx_is_online(c, data_opts.read_dev)) {
bch2_trans_iter_exit(trans, &iter);
- ret = -BCH_ERR_device_offline;
+ ret = bch_err_throw(c, device_offline);
break;
}
@@ -993,12 +1046,6 @@ int bch2_move_data_phys(struct bch_fs *c,
return ret;
}
-struct evacuate_bucket_arg {
- struct bpos bucket;
- int gen;
- struct data_update_opts data_opts;
-};
-
static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg,
enum btree_id btree, struct bkey_s_c k,
struct bch_io_opts *io_opts,
@@ -1025,8 +1072,13 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
struct bpos bucket, int gen,
struct data_update_opts data_opts)
{
+ struct bch_fs *c = ctxt->trans->c;
struct evacuate_bucket_arg arg = { bucket, gen, data_opts, };
+ count_event(c, io_move_evacuate_bucket);
+ if (trace_io_move_evacuate_bucket_enabled())
+ trace_io_move_evacuate_bucket2(c, bucket, gen);
+
return __bch2_move_data_phys(ctxt, bucket_in_flight,
bucket.inode,
bucket.offset,
@@ -1124,7 +1176,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg,
? c->opts.metadata_replicas
: io_opts->data_replicas;
- rcu_read_lock();
+ guard(rcu)();
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
unsigned i = 0;
bkey_for_each_ptr(ptrs, ptr) {
@@ -1134,7 +1186,6 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg,
data_opts->kill_ptrs |= BIT(i);
i++;
}
- rcu_read_unlock();
if (!data_opts->kill_ptrs &&
(!nr_good || nr_good >= replicas))
@@ -1242,7 +1293,7 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
struct extent_ptr_decoded p;
unsigned i = 0;
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
unsigned d = bch2_extent_ptr_durability(c, &p);
@@ -1253,7 +1304,6 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
i++;
}
- rcu_read_unlock();
return data_opts->kill_ptrs != 0;
}
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index e7a2a13554d7..6d7b1d5f7697 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -293,11 +293,9 @@ u64 bch2_copygc_wait_amount(struct bch_fs *c)
{
u64 wait = U64_MAX;
- rcu_read_lock();
+ guard(rcu)();
for_each_rw_member_rcu(c, ca)
wait = min(wait, bch2_copygc_dev_wait_amount(ca));
- rcu_read_unlock();
-
return wait;
}
@@ -321,21 +319,21 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
bch2_printbuf_make_room(out, 4096);
- rcu_read_lock();
+ struct task_struct *t;
out->atomic++;
+ scoped_guard(rcu) {
+ prt_printf(out, "Currently calculated wait:\n");
+ for_each_rw_member_rcu(c, ca) {
+ prt_printf(out, " %s:\t", ca->name);
+ prt_human_readable_u64(out, bch2_copygc_dev_wait_amount(ca));
+ prt_newline(out);
+ }
- prt_printf(out, "Currently calculated wait:\n");
- for_each_rw_member_rcu(c, ca) {
- prt_printf(out, " %s:\t", ca->name);
- prt_human_readable_u64(out, bch2_copygc_dev_wait_amount(ca));
- prt_newline(out);
+ t = rcu_dereference(c->copygc_thread);
+ if (t)
+ get_task_struct(t);
}
-
- struct task_struct *t = rcu_dereference(c->copygc_thread);
- if (t)
- get_task_struct(t);
--out->atomic;
- rcu_read_unlock();
if (t) {
bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL);
diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
index b9683d22bab0..f615910d6f98 100644
--- a/fs/bcachefs/movinggc.h
+++ b/fs/bcachefs/movinggc.h
@@ -7,11 +7,10 @@ void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
static inline void bch2_copygc_wakeup(struct bch_fs *c)
{
- rcu_read_lock();
+ guard(rcu)();
struct task_struct *p = rcu_dereference(c->copygc_thread);
if (p)
wake_up_process(p);
- rcu_read_unlock();
}
void bch2_copygc_stop(struct bch_fs *);
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index a84b69d6caef..24120037c031 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -287,7 +287,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
}
if (deleting_subvol && !inode_u->bi_subvol) {
- ret = -BCH_ERR_ENOENT_not_subvol;
+ ret = bch_err_throw(c, ENOENT_not_subvol);
goto err;
}
@@ -425,8 +425,8 @@ int bch2_rename_trans(struct btree_trans *trans,
}
ret = bch2_dirent_rename(trans,
- src_dir, &src_hash, &src_dir_u->bi_size,
- dst_dir, &dst_hash, &dst_dir_u->bi_size,
+ src_dir, &src_hash,
+ dst_dir, &dst_hash,
src_name, &src_inum, &src_offset,
dst_name, &dst_inum, &dst_offset,
mode);
@@ -633,7 +633,7 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
break;
if (!inode.bi_dir && !inode.bi_dir_offset) {
- ret = -BCH_ERR_ENOENT_inode_no_backpointer;
+ ret = bch_err_throw(trans->c, ENOENT_inode_no_backpointer);
goto disconnected;
}
@@ -733,15 +733,6 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
return __bch2_fsck_write_inode(trans, target);
}
- if (bch2_inode_should_have_single_bp(target) &&
- !fsck_err(trans, inode_wrong_backpointer,
- "dirent points to inode that does not point back:\n%s",
- (bch2_bkey_val_to_text(&buf, c, d.s_c),
- prt_newline(&buf),
- bch2_inode_unpacked_to_text(&buf, target),
- buf.buf)))
- goto err;
-
struct bkey_s_c_dirent bp_dirent =
bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents,
SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot),
@@ -768,6 +759,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
ret = __bch2_fsck_write_inode(trans, target);
}
} else {
+ printbuf_reset(&buf);
bch2_bkey_val_to_text(&buf, c, d.s_c);
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
@@ -857,7 +849,8 @@ int __bch2_check_dirent_target(struct btree_trans *trans,
n->v.d_inum = cpu_to_le64(target->bi_inum);
}
- ret = bch2_trans_update(trans, dirent_iter, &n->k_i, 0);
+ ret = bch2_trans_update(trans, dirent_iter, &n->k_i,
+ BTREE_UPDATE_internal_snapshot_node);
if (ret)
goto err;
}
diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
index 1ca476adbf6f..8f4e28d440ac 100644
--- a/fs/bcachefs/printbuf.h
+++ b/fs/bcachefs/printbuf.h
@@ -140,6 +140,14 @@ void bch2_prt_bitflags_vector(struct printbuf *, const char * const[],
.size = _size, \
})
+static inline struct printbuf bch2_printbuf_init(void)
+{
+ return PRINTBUF;
+}
+
+DEFINE_CLASS(printbuf, struct printbuf,
+ bch2_printbuf_exit(&_T), bch2_printbuf_init(), void)
+
/*
* Returns size remaining of output buffer:
*/
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index 3d4755d73af7..f241efb1fb50 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -527,7 +527,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
mutex_unlock(&c->sb_lock);
- return -BCH_ERR_ENOSPC_sb_quota;
+ return bch_err_throw(c, ENOSPC_sb_quota);
}
bch2_sb_quota_read(c);
@@ -572,7 +572,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
mutex_lock(&c->sb_lock);
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
- ret = -BCH_ERR_ENOSPC_sb_quota;
+ ret = bch_err_throw(c, ENOSPC_sb_quota);
goto unlock;
}
@@ -726,7 +726,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
mutex_lock(&c->sb_lock);
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
- ret = -BCH_ERR_ENOSPC_sb_quota;
+ ret = bch_err_throw(c, ENOSPC_sb_quota);
goto unlock;
}
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index de1ec9e0caa0..1c345b86b1c0 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -80,13 +80,12 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
unsigned ptr_bit = 1;
unsigned rewrite_ptrs = 0;
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr(ptrs, ptr) {
if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target))
rewrite_ptrs |= ptr_bit;
ptr_bit <<= 1;
}
- rcu_read_unlock();
return rewrite_ptrs;
}
@@ -135,12 +134,11 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
}
incompressible:
if (opts->background_target) {
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (!p.ptr.cached &&
!bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
sectors += p.crc.compressed_size;
- rcu_read_unlock();
}
return sectors;
@@ -445,7 +443,7 @@ static int do_rebalance_extent(struct moving_context *ctxt,
if (bch2_err_matches(ret, ENOMEM)) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(ctxt);
- ret = -BCH_ERR_transaction_restart_nested;
+ ret = bch_err_throw(c, transaction_restart_nested);
}
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -527,7 +525,7 @@ static void rebalance_wait(struct bch_fs *c)
r->state = BCH_REBALANCE_waiting;
}
- bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
+ bch2_kthread_io_clock_wait_once(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
}
static bool bch2_rebalance_enabled(struct bch_fs *c)
@@ -544,6 +542,7 @@ static int do_rebalance(struct moving_context *ctxt)
struct bch_fs_rebalance *r = &c->rebalance;
struct btree_iter rebalance_work_iter, extent_iter = {};
struct bkey_s_c k;
+ u32 kick = r->kick;
int ret = 0;
bch2_trans_begin(trans);
@@ -593,7 +592,8 @@ static int do_rebalance(struct moving_context *ctxt)
if (!ret &&
!kthread_should_stop() &&
!atomic64_read(&r->work_stats.sectors_seen) &&
- !atomic64_read(&r->scan_stats.sectors_seen)) {
+ !atomic64_read(&r->scan_stats.sectors_seen) &&
+ kick == r->kick) {
bch2_moving_ctxt_flush_all(ctxt);
bch2_trans_unlock_long(trans);
rebalance_wait(c);
@@ -677,11 +677,12 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
}
prt_newline(out);
- rcu_read_lock();
- struct task_struct *t = rcu_dereference(c->rebalance.thread);
- if (t)
- get_task_struct(t);
- rcu_read_unlock();
+ struct task_struct *t;
+ scoped_guard(rcu) {
+ t = rcu_dereference(c->rebalance.thread);
+ if (t)
+ get_task_struct(t);
+ }
if (t) {
bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL);
@@ -794,7 +795,7 @@ static int check_rebalance_work_one(struct btree_trans *trans,
BTREE_ID_extents, POS_MIN,
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots);
- return -BCH_ERR_transaction_restart_nested;
+ return bch_err_throw(c, transaction_restart_nested);
}
if (!extent_k.k && !rebalance_k.k)
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index 5d9214fe1a22..7a565ea7dbfc 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -39,13 +39,11 @@ int bch2_set_fs_needs_rebalance(struct bch_fs *);
static inline void bch2_rebalance_wakeup(struct bch_fs *c)
{
- struct task_struct *p;
-
- rcu_read_lock();
- p = rcu_dereference(c->rebalance.thread);
+ c->rebalance.kick++;
+ guard(rcu)();
+ struct task_struct *p = rcu_dereference(c->rebalance.thread);
if (p)
wake_up_process(p);
- rcu_read_unlock();
}
void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
index 33d77286f1d5..c659da149fa3 100644
--- a/fs/bcachefs/rebalance_types.h
+++ b/fs/bcachefs/rebalance_types.h
@@ -18,6 +18,7 @@ enum bch_rebalance_states {
struct bch_fs_rebalance {
struct task_struct __rcu *thread;
+ u32 kick;
struct bch_pd_controller pd;
enum bch_rebalance_states state;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 4fca57575565..1e68e61f08e8 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -879,7 +879,7 @@ int bch2_fs_recovery(struct bch_fs *c)
use_clean:
if (!clean) {
bch_err(c, "no superblock clean section found");
- ret = -BCH_ERR_fsck_repair_impossible;
+ ret = bch_err_throw(c, fsck_repair_impossible);
goto err;
}
@@ -1093,10 +1093,6 @@ use_clean:
out:
bch2_flush_fsck_errs(c);
- if (!c->opts.retain_recovery_info) {
- bch2_journal_keys_put_initial(c);
- bch2_find_btree_nodes_exit(&c->found_btree_nodes);
- }
if (!IS_ERR(clean))
kfree(clean);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index dabb29b08ad0..605588e33fb3 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -103,20 +103,20 @@ static void bch2_sb_recovery_passes_to_text(struct printbuf *out,
prt_tab(out);
bch2_pr_time_units(out, le32_to_cpu(i->last_runtime) * NSEC_PER_SEC);
+
+ if (BCH_RECOVERY_PASS_NO_RATELIMIT(i))
+ prt_str(out, " (no ratelimit)");
+
prt_newline(out);
}
}
-static void bch2_sb_recovery_pass_complete(struct bch_fs *c,
- enum bch_recovery_pass pass,
- s64 start_time)
+static struct recovery_pass_entry *bch2_sb_recovery_pass_entry(struct bch_fs *c,
+ enum bch_recovery_pass pass)
{
enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
- s64 end_time = ktime_get_real_seconds();
- mutex_lock(&c->sb_lock);
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- __clear_bit_le64(stable, ext->recovery_passes_required);
+ lockdep_assert_held(&c->sb_lock);
struct bch_sb_field_recovery_passes *r =
bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
@@ -127,15 +127,43 @@ static void bch2_sb_recovery_pass_complete(struct bch_fs *c,
r = bch2_sb_field_resize(&c->disk_sb, recovery_passes, u64s);
if (!r) {
bch_err(c, "error creating recovery_passes sb section");
- goto out;
+ return NULL;
}
}
- r->start[stable].last_run = cpu_to_le64(end_time);
- r->start[stable].last_runtime = cpu_to_le32(max(0, end_time - start_time));
-out:
+ return r->start + stable;
+}
+
+static void bch2_sb_recovery_pass_complete(struct bch_fs *c,
+ enum bch_recovery_pass pass,
+ s64 start_time)
+{
+ guard(mutex)(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __clear_bit_le64(bch2_recovery_pass_to_stable(pass),
+ ext->recovery_passes_required);
+
+ struct recovery_pass_entry *e = bch2_sb_recovery_pass_entry(c, pass);
+ if (e) {
+ s64 end_time = ktime_get_real_seconds();
+ e->last_run = cpu_to_le64(end_time);
+ e->last_runtime = cpu_to_le32(max(0, end_time - start_time));
+ SET_BCH_RECOVERY_PASS_NO_RATELIMIT(e, false);
+ }
+
bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+}
+
+void bch2_recovery_pass_set_no_ratelimit(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ guard(mutex)(&c->sb_lock);
+
+ struct recovery_pass_entry *e = bch2_sb_recovery_pass_entry(c, pass);
+ if (e && !BCH_RECOVERY_PASS_NO_RATELIMIT(e)) {
+ SET_BCH_RECOVERY_PASS_NO_RATELIMIT(e, false);
+ bch2_write_super(c);
+ }
}
static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass)
@@ -157,6 +185,9 @@ static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recover
*/
ret = (u64) le32_to_cpu(i->last_runtime) * 100 >
ktime_get_real_seconds() - le64_to_cpu(i->last_run);
+
+ if (BCH_RECOVERY_PASS_NO_RATELIMIT(i))
+ ret = false;
}
return ret;
@@ -315,7 +346,9 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
goto out;
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
- bool rewind = in_recovery && r->curr_pass > pass;
+ bool rewind = in_recovery &&
+ r->curr_pass > pass &&
+ !(r->passes_complete & BIT_ULL(pass));
bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit;
if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) {
@@ -327,7 +360,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
(!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) {
prt_printf(out, "need recovery pass %s (%u), but already rw\n",
bch2_recovery_passes[pass], pass);
- ret = -BCH_ERR_cannot_rewind_recovery;
+ ret = bch_err_throw(c, cannot_rewind_recovery);
goto out;
}
@@ -347,7 +380,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
if (rewind) {
r->next_pass = pass;
r->passes_complete &= (1ULL << pass) >> 1;
- ret = -BCH_ERR_restart_recovery;
+ ret = bch_err_throw(c, restart_recovery);
}
} else {
prt_printf(out, "scheduling recovery pass %s (%u)%s\n",
@@ -382,6 +415,35 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
return ret;
}
+/*
+ * Returns 0 if @pass has run recently, otherwise one of
+ * -BCH_ERR_restart_recovery
+ * -BCH_ERR_recovery_pass_will_run
+ */
+int bch2_require_recovery_pass(struct bch_fs *c,
+ struct printbuf *out,
+ enum bch_recovery_pass pass)
+{
+ if (test_bit(BCH_FS_in_recovery, &c->flags) &&
+ c->recovery.passes_complete & BIT_ULL(pass))
+ return 0;
+
+ guard(mutex)(&c->sb_lock);
+
+ if (bch2_recovery_pass_want_ratelimit(c, pass))
+ return 0;
+
+ enum bch_run_recovery_pass_flags flags = 0;
+ int ret = 0;
+
+ if (recovery_pass_needs_set(c, pass, &flags)) {
+ ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
+ bch2_write_super(c);
+ }
+
+ return ret ?: bch_err_throw(c, recovery_pass_will_run);
+}
+
int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent;
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
index dc0d2014ff9b..260571c7105e 100644
--- a/fs/bcachefs/recovery_passes.h
+++ b/fs/bcachefs/recovery_passes.h
@@ -10,6 +10,8 @@ u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void);
+void bch2_recovery_pass_set_no_ratelimit(struct bch_fs *, enum bch_recovery_pass);
+
enum bch_run_recovery_pass_flags {
RUN_RECOVERY_PASS_nopersistent = BIT(0),
RUN_RECOVERY_PASS_ratelimit = BIT(1),
@@ -24,6 +26,9 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
enum bch_recovery_pass,
enum bch_run_recovery_pass_flags);
+int bch2_require_recovery_pass(struct bch_fs *, struct printbuf *,
+ enum bch_recovery_pass);
+
int bch2_run_online_recovery_passes(struct bch_fs *, u64);
int bch2_run_recovery_passes(struct bch_fs *, enum bch_recovery_pass);
diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h
index c434eafbca19..b63c20558d3d 100644
--- a/fs/bcachefs/recovery_passes_format.h
+++ b/fs/bcachefs/recovery_passes_format.h
@@ -87,6 +87,8 @@ struct recovery_pass_entry {
__le32 flags;
};
+LE32_BITMASK(BCH_RECOVERY_PASS_NO_RATELIMIT, struct recovery_pass_entry, flags, 0, 1)
+
struct bch_sb_field_recovery_passes {
struct bch_sb_field field;
struct recovery_pass_entry start[];
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 3a13dbcab6ba..a535abd44df3 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -312,7 +312,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
if (!bkey_refcount_c(k)) {
if (!(flags & BTREE_TRIGGER_overwrite))
- ret = -BCH_ERR_missing_indirect_extent;
+ ret = bch_err_throw(c, missing_indirect_extent);
goto next;
}
@@ -612,7 +612,7 @@ s64 bch2_remap_range(struct bch_fs *c,
int ret = 0, ret2 = 0;
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_reflink))
- return -BCH_ERR_erofs_no_writes;
+ return bch_err_throw(c, erofs_no_writes);
bch2_check_set_feature(c, BCH_FEATURE_reflink);
@@ -711,7 +711,8 @@ s64 bch2_remap_range(struct bch_fs *c,
SET_REFLINK_P_IDX(&dst_p->v, offset);
if (reflink_p_may_update_opts_field &&
- may_change_src_io_path_opts)
+ may_change_src_io_path_opts &&
+ REFLINK_P_MAY_UPDATE_OPTIONS(src_p.v))
SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true);
} else {
BUG();
@@ -847,7 +848,7 @@ int bch2_gc_reflink_start(struct bch_fs *c)
struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table,
c->reflink_gc_nr++, GFP_KERNEL);
if (!r) {
- ret = -BCH_ERR_ENOMEM_gc_reflink_start;
+ ret = bch_err_throw(c, ENOMEM_gc_reflink_start);
break;
}
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 477ef0997949..8383bd7fdb3f 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -119,7 +119,7 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
return 0;
bad:
bch2_replicas_entry_to_text(err, r);
- return -BCH_ERR_invalid_replicas_entry;
+ return bch_err_throw(c, invalid_replicas_entry);
}
void bch2_cpu_replicas_to_text(struct printbuf *out,
@@ -311,7 +311,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
if (!new_gc.entries) {
- ret = -BCH_ERR_ENOMEM_cpu_replicas;
+ ret = bch_err_throw(c, ENOMEM_cpu_replicas);
goto err;
}
}
@@ -319,7 +319,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
if (!__replicas_has_entry(&c->replicas, new_entry)) {
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries) {
- ret = -BCH_ERR_ENOMEM_cpu_replicas;
+ ret = bch_err_throw(c, ENOMEM_cpu_replicas);
goto err;
}
@@ -422,7 +422,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
if (!c->replicas_gc.entries) {
mutex_unlock(&c->sb_lock);
bch_err(c, "error allocating c->replicas_gc");
- return -BCH_ERR_ENOMEM_replicas_gc;
+ return bch_err_throw(c, ENOMEM_replicas_gc);
}
for_each_cpu_replicas_entry(&c->replicas, e)
@@ -458,7 +458,7 @@ retry:
new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
if (!new.entries) {
bch_err(c, "error allocating c->replicas_gc");
- return -BCH_ERR_ENOMEM_replicas_gc;
+ return bch_err_throw(c, ENOMEM_replicas_gc);
}
mutex_lock(&c->sb_lock);
@@ -622,7 +622,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0,
DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
- return -BCH_ERR_ENOSPC_sb_replicas;
+ return bch_err_throw(c, ENOSPC_sb_replicas);
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0);
@@ -667,7 +667,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
sb_r = bch2_sb_field_resize(&c->disk_sb, replicas,
DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
- return -BCH_ERR_ENOSPC_sb_replicas;
+ return bch_err_throw(c, ENOSPC_sb_replicas);
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas);
@@ -819,19 +819,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
if (e->data_type == BCH_DATA_cached)
continue;
- rcu_read_lock();
- for (unsigned i = 0; i < e->nr_devs; i++) {
- if (e->devs[i] == BCH_SB_MEMBER_INVALID) {
- nr_failed++;
- continue;
- }
+ scoped_guard(rcu)
+ for (unsigned i = 0; i < e->nr_devs; i++) {
+ if (e->devs[i] == BCH_SB_MEMBER_INVALID) {
+ nr_failed++;
+ continue;
+ }
- nr_online += test_bit(e->devs[i], devs.d);
+ nr_online += test_bit(e->devs[i], devs.d);
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]);
- nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
- }
- rcu_read_unlock();
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]);
+ nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
+ }
if (nr_online + nr_failed == e->nr_devs)
continue;
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index 7c0c9c842b4e..b868702a431a 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -26,6 +26,7 @@ enum counters_flags {
x(io_move_write_fail, 82, TYPE_COUNTER) \
x(io_move_start_fail, 39, TYPE_COUNTER) \
x(io_move_created_rebalance, 83, TYPE_COUNTER) \
+ x(io_move_evacuate_bucket, 84, TYPE_COUNTER) \
x(bucket_invalidate, 3, TYPE_COUNTER) \
x(bucket_discard, 4, TYPE_COUNTER) \
x(bucket_discard_fast, 79, TYPE_COUNTER) \
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 861fce1630f0..b61f88450a6d 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -417,7 +417,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
if (!d) {
- ret = -BCH_ERR_ENOSPC_sb_downgrade;
+ ret = bch_err_throw(c, ENOSPC_sb_downgrade);
goto out;
}
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index 013a96883b4e..48853efdc105 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -78,6 +78,28 @@ const struct bch_sb_field_ops bch_sb_field_ops_errors = {
.to_text = bch2_sb_errors_to_text,
};
+void bch2_fs_errors_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ if (out->nr_tabstops < 1)
+ printbuf_tabstop_push(out, 48);
+ if (out->nr_tabstops < 2)
+ printbuf_tabstop_push(out, 8);
+ if (out->nr_tabstops < 3)
+ printbuf_tabstop_push(out, 16);
+
+ guard(mutex)(&c->fsck_error_counts_lock);
+
+ bch_sb_errors_cpu *e = &c->fsck_error_counts;
+ darray_for_each(*e, i) {
+ bch2_sb_error_id_to_text(out, i->id);
+ prt_tab(out);
+ prt_u64(out, i->nr);
+ prt_tab(out);
+ bch2_prt_datetime(out, i->last_error_time);
+ prt_newline(out);
+ }
+}
+
void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
{
bch_sb_errors_cpu *e = &c->fsck_error_counts;
diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h
index b2357b8e6107..e86267264692 100644
--- a/fs/bcachefs/sb-errors.h
+++ b/fs/bcachefs/sb-errors.h
@@ -7,6 +7,7 @@
extern const char * const bch2_sb_error_strs[];
void bch2_sb_error_id_to_text(struct printbuf *, enum bch_sb_error_id);
+void bch2_fs_errors_to_text(struct printbuf *, struct bch_fs *);
extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 0bfb151da9cf..6fdbf265e4c0 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -232,6 +232,7 @@ enum bch_fsck_flags {
x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \
x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \
x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \
+ x(inode_dir_has_nonzero_i_size, 319, FSCK_AUTOFIX) \
x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \
x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \
x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
@@ -243,6 +244,7 @@ enum bch_fsck_flags {
x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \
+ x(vfs_bad_inode_rm, 320, 0) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
@@ -328,7 +330,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 319, 0)
+ x(MAX, 321, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 3398906660a5..363eb0c6eb7c 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -101,7 +101,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c)
mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
if (!mi)
- return -BCH_ERR_ENOSPC_sb_members_v2;
+ return bch_err_throw(c, ENOSPC_sb_members_v2);
for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
@@ -378,14 +378,13 @@ void bch2_sb_members_from_cpu(struct bch_fs *c)
{
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
- rcu_read_lock();
+ guard(rcu)();
for_each_member_device_rcu(c, ca, NULL) {
struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
}
- rcu_read_unlock();
}
void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
@@ -443,20 +442,14 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
{
- bool ret = true;
- rcu_read_lock();
+ guard(rcu)();
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
-
- if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) {
- ret = false;
- break;
- }
+ if (ca &&
+ !bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c)))
+ return false;
}
- rcu_read_unlock();
- return ret;
+ return true;
}
static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 6bd9b86aee5b..8d8a8a857648 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -28,12 +28,9 @@ static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev)
{
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu(c, dev);
- bool ret = ca && bch2_dev_is_online(ca);
- rcu_read_unlock();
-
- return ret;
+ return ca && bch2_dev_is_online(ca);
}
static inline bool bch2_dev_is_healthy(struct bch_dev *ca)
@@ -142,12 +139,10 @@ static inline void bch2_dev_put(struct bch_dev *ca)
static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca)
{
- rcu_read_lock();
+ guard(rcu)();
bch2_dev_put(ca);
if ((ca = __bch2_next_dev(c, ca, NULL)))
bch2_dev_get(ca);
- rcu_read_unlock();
-
return ca;
}
@@ -166,7 +161,7 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
unsigned state_mask,
int rw, unsigned ref_idx)
{
- rcu_read_lock();
+ guard(rcu)();
if (ca)
enumerated_ref_put(&ca->io_ref[rw], ref_idx);
@@ -174,7 +169,6 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
(!((1 << ca->mi.state) & state_mask) ||
!enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)))
;
- rcu_read_unlock();
return ca;
}
@@ -239,11 +233,10 @@ static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *c, unsigned dev)
static inline struct bch_dev *bch2_dev_tryget_noerror(struct bch_fs *c, unsigned dev)
{
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, dev);
if (ca)
bch2_dev_get(ca);
- rcu_read_unlock();
return ca;
}
@@ -299,19 +292,16 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
{
might_sleep();
- rcu_read_lock();
+ guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu(c, dev);
- if (ca && !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))
- ca = NULL;
- rcu_read_unlock();
+ if (!ca || !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))
+ return NULL;
- if (ca &&
- (ca->mi.state == BCH_MEMBER_STATE_rw ||
- (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ)))
+ if (ca->mi.state == BCH_MEMBER_STATE_rw ||
+ (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ))
return ca;
- if (ca)
- enumerated_ref_put(&ca->io_ref[rw], ref_idx);
+ enumerated_ref_put(&ca->io_ref[rw], ref_idx);
return NULL;
}
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index 7c403427fbdb..538c324f4765 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -339,12 +339,9 @@ static inline bool six_owner_running(struct six_lock *lock)
* acquiring the lock and setting the owner field. If we're an RT task
* that will live-lock because we won't let the owner complete.
*/
- rcu_read_lock();
+ guard(rcu)();
struct task_struct *owner = READ_ONCE(lock->owner);
- bool ret = owner ? owner_on_cpu(owner) : !rt_or_dl_task(current);
- rcu_read_unlock();
-
- return ret;
+ return owner ? owner_on_cpu(owner) : !rt_or_dl_task(current);
}
static inline bool six_optimistic_spin(struct six_lock *lock,
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 00d62d1190ef..23a332d76b32 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -54,7 +54,7 @@ int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
BTREE_ITER_with_updates, snapshot_tree, s);
if (bch2_err_matches(ret, ENOENT))
- ret = -BCH_ERR_ENOENT_snapshot_tree;
+ ret = bch_err_throw(trans->c, ENOENT_snapshot_tree);
return ret;
}
@@ -67,7 +67,7 @@ __bch2_snapshot_tree_create(struct btree_trans *trans)
struct bkey_i_snapshot_tree *s_t;
if (ret == -BCH_ERR_ENOSPC_btree_slot)
- ret = -BCH_ERR_ENOSPC_snapshot_tree;
+ ret = bch_err_throw(trans->c, ENOSPC_snapshot_tree);
if (ret)
return ERR_PTR(ret);
@@ -105,11 +105,8 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id,
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{
- rcu_read_lock();
- bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
- rcu_read_unlock();
-
- return ret;
+ guard(rcu)();
+ return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
}
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
@@ -140,13 +137,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
bool ret;
- rcu_read_lock();
+ guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
- ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
- goto out;
- }
+ if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots))
+ return __bch2_snapshot_is_ancestor_early(t, id, ancestor);
if (likely(ancestor >= IS_ANCESTOR_BITMAP))
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
@@ -157,9 +152,6 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
: id == ancestor;
EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
-out:
- rcu_read_unlock();
-
return ret;
}
@@ -293,7 +285,7 @@ static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
mutex_lock(&c->snapshot_table_lock);
int ret = snapshot_t_mut(c, id)
? 0
- : -BCH_ERR_ENOMEM_mark_snapshot;
+ : bch_err_throw(c, ENOMEM_mark_snapshot);
mutex_unlock(&c->snapshot_table_lock);
return ret;
}
@@ -312,7 +304,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
t = snapshot_t_mut(c, id);
if (!t) {
- ret = -BCH_ERR_ENOMEM_mark_snapshot;
+ ret = bch_err_throw(c, ENOMEM_mark_snapshot);
goto err;
}
@@ -412,10 +404,10 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root,
snapshot_id_list *skip)
{
+ guard(rcu)();
u32 id, subvol = 0, s;
retry:
id = snapshot_root;
- rcu_read_lock();
while (id && bch2_snapshot_exists(c, id)) {
if (!(skip && snapshot_list_has_id(skip, id))) {
s = snapshot_t(c, id)->subvol;
@@ -427,7 +419,6 @@ retry:
if (id == snapshot_root)
break;
}
- rcu_read_unlock();
if (!subvol && skip) {
skip = NULL;
@@ -617,18 +608,14 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
-
if (!id)
return 0;
- rcu_read_lock();
- s = snapshot_t(c, id);
- if (s->parent)
- id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
- rcu_read_unlock();
-
- return id;
+ guard(rcu)();
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s->parent
+ ? bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth))
+ : id;
}
static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s)
@@ -947,10 +934,7 @@ static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpo
static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
{
- darray_for_each(*l, i)
- if (snapshot_list_has_id(r, *i))
- return true;
- return false;
+ return darray_find_p(*l, i, snapshot_list_has_id(r, *i)) != NULL;
}
static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
@@ -1022,7 +1006,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
if (t->nr > 1) {
bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
goto err;
}
@@ -1061,24 +1045,73 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans,
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
- /*
- * Snapshot missing: we should have caught this with btree_lost_data and
- * kicked off reconstruct_snapshots, so if we end up here we have no
- * idea what happened:
- */
- if (fsck_err_on(state == SNAPSHOT_ID_empty,
- trans, bkey_in_missing_snapshot,
- "key in missing snapshot %s, delete?",
- (bch2_btree_id_to_text(&buf, iter->btree_id),
- prt_char(&buf, ' '),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- ret = bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_internal_snapshot_node) ?: 1;
+ if (state == SNAPSHOT_ID_empty) {
+ /*
+ * Snapshot missing: we should have caught this with btree_lost_data and
+ * kicked off reconstruct_snapshots, so if we end up here we have no
+ * idea what happened.
+ *
+ * Do not delete unless we know that subvolumes and snapshots
+ * are consistent:
+ *
+ * XXX:
+ *
+ * We could be smarter here, and instead of using the generic
+ * recovery pass ratelimiting, track if there have been any
+ * changes to the snapshots or inodes btrees since those passes
+ * last ran.
+ */
+ ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret;
+ ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret;
+
+ if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))
+ ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
+
+ unsigned repair_flags = FSCK_CAN_IGNORE | (!ret ? FSCK_CAN_FIX : 0);
+
+ if (__fsck_err(trans, repair_flags, bkey_in_missing_snapshot,
+ "key in missing snapshot %s, delete?",
+ (bch2_btree_id_to_text(&buf, iter->btree_id),
+ prt_char(&buf, ' '),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_internal_snapshot_node) ?: 1;
+ }
+ }
fsck_err:
printbuf_exit(&buf);
return ret;
}
+int __bch2_get_snapshot_overwrites(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos,
+ snapshot_id_list *s)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key_reverse_norestart(trans, iter, btree, bpos_predecessor(pos),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (!bkey_eq(k.k->p, pos))
+ break;
+
+ if (!bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot) ||
+ snapshot_list_has_ancestor(c, s, k.k->p.snapshot))
+ continue;
+
+ ret = snapshot_list_add(c, s, k.k->p.snapshot);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ darray_exit(s);
+
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
@@ -1263,7 +1296,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
goto err;
if (!k.k || !k.k->p.offset) {
- ret = -BCH_ERR_ENOSPC_snapshot_create;
+ ret = bch_err_throw(c, ENOSPC_snapshot_create);
goto err;
}
@@ -1399,10 +1432,8 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
{
- darray_for_each(*l, i)
- if (i->id == id)
- return i->live_child;
- return 0;
+ struct snapshot_interior_delete *i = darray_find_p(*l, i, i->id == id);
+ return i ? i->live_child : 0;
}
static unsigned __live_child(struct snapshot_table *t, u32 id,
@@ -1434,11 +1465,9 @@ static unsigned live_child(struct bch_fs *c, u32 id)
{
struct snapshot_delete *d = &c->snapshot_delete;
- rcu_read_lock();
- u32 ret = __live_child(rcu_dereference(c->snapshots), id,
- &d->delete_leaves, &d->delete_interior);
- rcu_read_unlock();
- return ret;
+ guard(rcu)();
+ return __live_child(rcu_dereference(c->snapshots), id,
+ &d->delete_leaves, &d->delete_interior);
}
static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id)
@@ -1695,7 +1724,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
interior_delete_list *skip)
{
- rcu_read_lock();
+ guard(rcu)();
while (interior_delete_has_id(skip, id))
id = __bch2_snapshot_parent(c, id);
@@ -1704,7 +1733,6 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
id = __bch2_snapshot_parent(c, id);
} while (interior_delete_has_id(skip, id));
}
- rcu_read_unlock();
return id;
}
@@ -1870,6 +1898,8 @@ err:
d->running = false;
mutex_unlock(&d->progress_lock);
bch2_trans_put(trans);
+
+ bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots);
out_unlock:
mutex_unlock(&d->lock);
if (!bch2_err_matches(ret, EROFS))
@@ -1905,7 +1935,7 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
- if (!queue_work(c->write_ref_wq, &c->snapshot_delete.work))
+ if (!queue_work(system_long_wq, &c->snapshot_delete.work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 382a171f5413..6766bf673ed9 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -46,12 +46,9 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
{
- rcu_read_lock();
+ guard(rcu)();
const struct snapshot_t *s = snapshot_t(c, id);
- id = s ? s->tree : 0;
- rcu_read_unlock();
-
- return id;
+ return s ? s->tree : 0;
}
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
@@ -62,11 +59,8 @@ static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
- rcu_read_lock();
- id = __bch2_snapshot_parent_early(c, id);
- rcu_read_unlock();
-
- return id;
+ guard(rcu)();
+ return __bch2_snapshot_parent_early(c, id);
}
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
@@ -88,20 +82,15 @@ static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
- rcu_read_lock();
- id = __bch2_snapshot_parent(c, id);
- rcu_read_unlock();
-
- return id;
+ guard(rcu)();
+ return __bch2_snapshot_parent(c, id);
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
- rcu_read_lock();
+ guard(rcu)();
while (n--)
id = __bch2_snapshot_parent(c, id);
- rcu_read_unlock();
-
return id;
}
@@ -110,13 +99,11 @@ u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{
- u32 parent;
+ guard(rcu)();
- rcu_read_lock();
+ u32 parent;
while ((parent = __bch2_snapshot_parent(c, id)))
id = parent;
- rcu_read_unlock();
-
return id;
}
@@ -128,11 +115,8 @@ static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_fs *c,
static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
- rcu_read_lock();
- enum snapshot_id_state ret = __bch2_snapshot_id_state(c, id);
- rcu_read_unlock();
-
- return ret;
+ guard(rcu)();
+ return __bch2_snapshot_id_state(c, id);
}
static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
@@ -142,12 +126,9 @@ static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
- rcu_read_lock();
+ guard(rcu)();
const struct snapshot_t *s = snapshot_t(c, id);
- int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
- rcu_read_unlock();
-
- return ret;
+ return s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
}
static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
@@ -160,13 +141,8 @@ static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
{
- u32 depth;
-
- rcu_read_lock();
- depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
- rcu_read_unlock();
-
- return depth;
+ guard(rcu)();
+ return parent ? snapshot_t(c, parent)->depth + 1 : 0;
}
bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
@@ -180,20 +156,14 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{
- rcu_read_lock();
+ guard(rcu)();
const struct snapshot_t *t = snapshot_t(c, id);
- bool ret = t && (t->children[0]|t->children[1]) != 0;
- rcu_read_unlock();
-
- return ret;
+ return t && (t->children[0]|t->children[1]) != 0;
}
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
{
- darray_for_each(*s, i)
- if (*i == id)
- return true;
- return false;
+ return darray_find(*s, id) != NULL;
}
static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id)
@@ -258,6 +228,25 @@ static inline int bch2_check_key_has_snapshot(struct btree_trans *trans,
: __bch2_check_key_has_snapshot(trans, iter, k);
}
+int __bch2_get_snapshot_overwrites(struct btree_trans *,
+ enum btree_id, struct bpos,
+ snapshot_id_list *);
+
+/*
+ * Get a list of snapshot IDs that have overwritten a given key:
+ */
+static inline int bch2_get_snapshot_overwrites(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos,
+ snapshot_id_list *s)
+{
+ darray_init(s);
+
+ return bch2_snapshot_has_children(trans->c, pos.snapshot)
+ ? __bch2_get_snapshot_overwrites(trans, btree, pos, s)
+ : 0;
+
+}
+
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index 0cbf5508a32c..71b735a85026 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -31,14 +31,15 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir
}
}
-static noinline int fsck_rename_dirent(struct btree_trans *trans,
- struct snapshots_seen *s,
- const struct bch_hash_desc desc,
- struct bch_hash_info *hash_info,
- struct bkey_s_c_dirent old)
+static int bch2_fsck_rename_dirent(struct btree_trans *trans,
+ struct snapshots_seen *s,
+ const struct bch_hash_desc desc,
+ struct bch_hash_info *hash_info,
+ struct bkey_s_c_dirent old,
+ bool *updated_before_k_pos)
{
struct qstr old_name = bch2_dirent_get_name(old);
- struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
+ struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
@@ -47,28 +48,39 @@ static noinline int fsck_rename_dirent(struct btree_trans *trans,
dirent_copy_target(new, old);
new->k.p = old.k->p;
+ char *renamed_buf = bch2_trans_kmalloc(trans, old_name.len + 20);
+ ret = PTR_ERR_OR_ZERO(renamed_buf);
+ if (ret)
+ return ret;
+
for (unsigned i = 0; i < 1000; i++) {
- unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
- old_name.len, old_name.name, i);
- unsigned u64s = BKEY_U64s + dirent_val_u64s(len, 0);
+ new->k.u64s = BKEY_U64s_MAX;
- if (u64s > U8_MAX)
- return -EINVAL;
+ struct qstr renamed_name = (struct qstr) QSTR_INIT(renamed_buf,
+ sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
+ old_name.len, old_name.name, i));
- new->k.u64s = u64s;
+ ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL);
+ if (ret)
+ return ret;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
(subvol_inum) { 0, old.k->p.inode },
old.k->p.snapshot, &new->k_i,
- BTREE_UPDATE_internal_snapshot_node);
- if (!bch2_err_matches(ret, EEXIST))
+ BTREE_UPDATE_internal_snapshot_node|
+ STR_HASH_must_create);
+ if (ret && !bch2_err_matches(ret, EEXIST))
+ break;
+ if (!ret) {
+ if (bpos_lt(new->k.p, old.k->p))
+ *updated_before_k_pos = true;
break;
+ }
}
- if (ret)
- return ret;
-
- return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
+ ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
+ bch_err_fn(trans->c, ret);
+ return ret;
}
static noinline int hash_pick_winner(struct btree_trans *trans,
@@ -186,7 +198,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans,
#endif
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
goto err;
}
@@ -221,11 +233,115 @@ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans
return ret;
}
+/* Put a str_hash key in its proper location, checking for duplicates */
+int bch2_str_hash_repair_key(struct btree_trans *trans,
+ struct snapshots_seen *s,
+ const struct bch_hash_desc *desc,
+ struct bch_hash_info *hash_info,
+ struct btree_iter *k_iter, struct bkey_s_c k,
+ struct btree_iter *dup_iter, struct bkey_s_c dup_k,
+ bool *updated_before_k_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ bool free_snapshots_seen = false;
+ int ret = 0;
+
+ if (!s) {
+ s = bch2_trans_kmalloc(trans, sizeof(*s));
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ goto out;
+
+ s->pos = k_iter->pos;
+ darray_init(&s->ids);
+
+ ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids);
+ if (ret)
+ goto out;
+
+ free_snapshots_seen = true;
+ }
+
+ if (!dup_k.k) {
+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ goto out;
+
+ dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
+ (subvol_inum) { 0, new->k.p.inode },
+ new->k.p.snapshot, new,
+ STR_HASH_must_create|
+ BTREE_ITER_with_updates|
+ BTREE_UPDATE_internal_snapshot_node);
+ ret = bkey_err(dup_k);
+ if (ret)
+ goto out;
+ if (dup_k.k)
+ goto duplicate_entries;
+
+ if (bpos_lt(new->k.p, k.k->p))
+ *updated_before_k_pos = true;
+
+ ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
+ k_iter->pos, new->k.p) ?:
+ bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
+ BTREE_ITER_with_updates|
+ BTREE_UPDATE_internal_snapshot_node) ?:
+ bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ -BCH_ERR_transaction_restart_commit;
+ } else {
+duplicate_entries:
+ ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
+ if (ret < 0)
+ goto out;
+
+ if (!fsck_err(trans, hash_table_key_duplicate,
+ "duplicate hash table keys%s:\n%s",
+ ret != 2 ? "" : ", both point to valid inodes",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k),
+ prt_newline(&buf),
+ bch2_bkey_val_to_text(&buf, c, dup_k),
+ buf.buf)))
+ goto out;
+
+ switch (ret) {
+ case 0:
+ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
+ break;
+ case 1:
+ ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0);
+ break;
+ case 2:
+ ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
+ bkey_s_c_to_dirent(k),
+ updated_before_k_pos) ?:
+ bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
+ BTREE_ITER_with_updates);
+ goto out;
+ }
+
+ ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
+ -BCH_ERR_transaction_restart_commit;
+ }
+out:
+fsck_err:
+ bch2_trans_iter_exit(trans, dup_iter);
+ printbuf_exit(&buf);
+ if (free_snapshots_seen)
+ darray_exit(&s->ids);
+ return ret;
+}
+
int __bch2_str_hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
- struct btree_iter *k_iter, struct bkey_s_c hash_k)
+ struct btree_iter *k_iter, struct bkey_s_c hash_k,
+ bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter = {};
@@ -239,24 +355,31 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
for_each_btree_key_norestart(trans, iter, desc->btree_id,
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
- BTREE_ITER_slots, k, ret) {
+ BTREE_ITER_slots|
+ BTREE_ITER_with_updates, k, ret) {
if (bkey_eq(k.k->p, hash_k.k->p))
break;
if (k.k->type == desc->key_type &&
- !desc->cmp_bkey(k, hash_k))
- goto duplicate_entries;
+ !desc->cmp_bkey(k, hash_k)) {
+ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode,
+ hash_info) ?:
+ bch2_str_hash_repair_key(trans, s, desc, hash_info,
+ k_iter, hash_k,
+ &iter, k, updated_before_k_pos);
+ break;
+ }
- if (bkey_deleted(k.k)) {
- bch2_trans_iter_exit(trans, &iter);
+ if (bkey_deleted(k.k))
goto bad_hash;
- }
}
-out:
bch2_trans_iter_exit(trans, &iter);
+out:
+fsck_err:
printbuf_exit(&buf);
return ret;
bad_hash:
+ bch2_trans_iter_exit(trans, &iter);
/*
* Before doing any repair, check hash_info itself:
*/
@@ -265,64 +388,12 @@ bad_hash:
goto out;
if (fsck_err(trans, hash_table_key_wrong_offset,
- "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
- bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
- k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info,
- (subvol_inum) { 0, hash_k.k->p.inode },
- hash_k.k->p.snapshot, new,
- STR_HASH_must_create|
- BTREE_ITER_with_updates|
- BTREE_UPDATE_internal_snapshot_node);
- ret = bkey_err(k);
- if (ret)
- goto out;
- if (k.k)
- goto duplicate_entries;
-
- ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
- BTREE_UPDATE_internal_snapshot_node) ?:
- bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
- goto out;
- }
-fsck_err:
- goto out;
-duplicate_entries:
- ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k);
- if (ret < 0)
- goto out;
-
- if (!fsck_err(trans, hash_table_key_duplicate,
- "duplicate hash table keys%s:\n%s",
- ret != 2 ? "" : ", both point to valid inodes",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, hash_k),
- prt_newline(&buf),
- bch2_bkey_val_to_text(&buf, c, k),
- buf.buf)))
- goto out;
-
- switch (ret) {
- case 0:
- ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
- break;
- case 1:
- ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0);
- break;
- case 2:
- ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
- bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
- goto out;
- }
-
- ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
- -BCH_ERR_transaction_restart_nested;
+ "hash table key at wrong offset: should be at %llu\n%s",
+ hash,
+ (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)))
+ ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
+ k_iter, hash_k,
+ &iter, bkey_s_c_null,
+ updated_before_k_pos);
goto out;
}
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 6762b3627e1b..79d51aef70aa 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -261,6 +261,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans,
struct bkey_i *insert,
enum btree_iter_update_trigger_flags flags)
{
+ struct bch_fs *c = trans->c;
struct btree_iter slot = {};
struct bkey_s_c k;
bool found = false;
@@ -288,7 +289,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans,
}
if (!ret)
- ret = -BCH_ERR_ENOSPC_str_hash_create;
+ ret = bch_err_throw(c, ENOSPC_str_hash_create);
out:
bch2_trans_iter_exit(trans, &slot);
bch2_trans_iter_exit(trans, iter);
@@ -300,7 +301,7 @@ not_found:
bch2_trans_iter_exit(trans, &slot);
return k;
} else if (!found && (flags & STR_HASH_must_replace)) {
- ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
+ ret = bch_err_throw(c, ENOENT_str_hash_set_must_replace);
} else {
if (!found && slot.path)
swap(*iter, slot);
@@ -328,7 +329,7 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans,
return ret;
if (k.k) {
bch2_trans_iter_exit(trans, &iter);
- return -BCH_ERR_EEXIST_str_hash_set;
+ return bch_err_throw(trans->c, EEXIST_str_hash_set);
}
return 0;
@@ -397,17 +398,27 @@ int bch2_hash_delete(struct btree_trans *trans,
int bch2_repair_inode_hash_info(struct btree_trans *, struct bch_inode_unpacked *);
struct snapshots_seen;
+int bch2_str_hash_repair_key(struct btree_trans *,
+ struct snapshots_seen *,
+ const struct bch_hash_desc *,
+ struct bch_hash_info *,
+ struct btree_iter *, struct bkey_s_c,
+ struct btree_iter *, struct bkey_s_c,
+ bool *);
+
int __bch2_str_hash_check_key(struct btree_trans *,
struct snapshots_seen *,
const struct bch_hash_desc *,
struct bch_hash_info *,
- struct btree_iter *, struct bkey_s_c);
+ struct btree_iter *, struct bkey_s_c,
+ bool *);
static inline int bch2_str_hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
- struct btree_iter *k_iter, struct bkey_s_c hash_k)
+ struct btree_iter *k_iter, struct bkey_s_c hash_k,
+ bool *updated_before_k_pos)
{
if (hash_k.k->type != desc->key_type)
return 0;
@@ -415,7 +426,8 @@ static inline int bch2_str_hash_check_key(struct btree_trans *trans,
if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset))
return 0;
- return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k);
+ return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k,
+ updated_before_k_pos);
}
#endif /* _BCACHEFS_STR_HASH_H */
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index 35c9f86a73c1..020587449123 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -130,10 +130,20 @@ static int check_subvol(struct btree_trans *trans,
"subvolume %llu points to missing subvolume root %llu:%u",
k.k->p.offset, le64_to_cpu(subvol.v->inode),
le32_to_cpu(subvol.v->snapshot))) {
- ret = bch2_subvolume_delete(trans, iter->pos.offset);
- bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
- ret = ret ?: -BCH_ERR_transaction_restart_nested;
- goto err;
+ /*
+ * Recreate - any contents that are still disconnected
+ * will then get reattached under lost+found
+ */
+ bch2_inode_init_early(c, &inode);
+ bch2_inode_init_late(c, &inode, bch2_current_time(c),
+ 0, 0, S_IFDIR|0700, 0, NULL);
+ inode.bi_inum = le64_to_cpu(subvol.v->inode);
+ inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot);
+ inode.bi_subvol = k.k->p.offset;
+ inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent);
+ ret = __bch2_fsck_write_inode(trans, &inode);
+ if (ret)
+ goto err;
}
} else {
goto err;
@@ -141,13 +151,9 @@ static int check_subvol(struct btree_trans *trans,
if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
- u32 snapshot_tree;
- struct bch_snapshot_tree st;
-
- rcu_read_lock();
- snapshot_tree = snapshot_t(c, snapshot_root)->tree;
- rcu_read_unlock();
+ u32 snapshot_tree = bch2_snapshot_tree(c, snapshot_root);
+ struct bch_snapshot_tree st;
ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
@@ -259,6 +265,13 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
}
+
+ if (BCH_SUBVOLUME_RO(s.v))
+ prt_printf(out, " ro");
+ if (BCH_SUBVOLUME_SNAP(s.v))
+ prt_printf(out, " snapshot");
+ if (BCH_SUBVOLUME_UNLINKED(s.v))
+ prt_printf(out, " unlinked");
}
static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
@@ -486,9 +499,12 @@ err:
static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
{
- return bch2_subvolumes_reparent(trans, subvolid) ?:
+ int ret = bch2_subvolumes_reparent(trans, subvolid) ?:
commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
__bch2_subvolume_delete(trans, subvolid));
+
+ bch2_recovery_pass_set_no_ratelimit(trans->c, BCH_RECOVERY_PASS_check_subvols);
+ return ret;
}
static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
@@ -597,7 +613,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
BTREE_ID_subvolumes, POS(0, U32_MAX));
if (ret == -BCH_ERR_ENOSPC_btree_slot)
- ret = -BCH_ERR_ENOSPC_subvolume_create;
+ ret = bch_err_throw(c, ENOSPC_subvolume_create);
if (ret)
return ret;
@@ -703,8 +719,9 @@ static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
return ret;
if (!bkey_is_inode(k.k)) {
- bch_err(trans->c, "root inode not found");
- ret = -BCH_ERR_ENOENT_inode;
+ struct bch_fs *c = trans->c;
+ bch_err(c, "root inode not found");
+ ret = bch_err_throw(c, ENOENT_inode);
goto err;
}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 6687b9235d3c..6c2e1d647403 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1112,7 +1112,7 @@ int bch2_write_super(struct bch_fs *c)
prt_str(&buf, ")");
bch2_fs_fatal_error(c, ": %s", buf.buf);
printbuf_exit(&buf);
- ret = -BCH_ERR_sb_not_downgraded;
+ ret = bch_err_throw(c, sb_not_downgraded);
goto out;
}
@@ -1142,7 +1142,7 @@ int bch2_write_super(struct bch_fs *c)
if (c->opts.errors != BCH_ON_ERROR_continue &&
c->opts.errors != BCH_ON_ERROR_fix_safe) {
- ret = -BCH_ERR_erofs_sb_err;
+ ret = bch_err_throw(c, erofs_sb_err);
bch2_fs_fatal_error(c, "%s", buf.buf);
} else {
bch_err(c, "%s", buf.buf);
@@ -1161,7 +1161,7 @@ int bch2_write_super(struct bch_fs *c)
ca->disk_sb.seq);
bch2_fs_fatal_error(c, "%s", buf.buf);
printbuf_exit(&buf);
- ret = -BCH_ERR_erofs_sb_err;
+ ret = bch_err_throw(c, erofs_sb_err);
}
}
@@ -1215,7 +1215,7 @@ int bch2_write_super(struct bch_fs *c)
!can_mount_with_written), c,
": Unable to write superblock to sufficient devices (from %ps)",
(void *) _RET_IP_))
- ret = -BCH_ERR_erofs_sb_err;
+ ret = bch_err_throw(c, erofs_sb_err);
out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 11579b74c640..397a69da5a75 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -219,23 +219,17 @@ static int bch2_fs_init_rw(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
- struct bch_fs *c;
-
- mutex_lock(&bch_fs_list_lock);
- rcu_read_lock();
+ guard(mutex)(&bch_fs_list_lock);
+ guard(rcu)();
+ struct bch_fs *c;
list_for_each_entry(c, &bch_fs_list, list)
for_each_member_device_rcu(c, ca, NULL)
if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) {
closure_get(&c->cl);
- goto found;
+ return c;
}
- c = NULL;
-found:
- rcu_read_unlock();
- mutex_unlock(&bch_fs_list_lock);
-
- return c;
+ return NULL;
}
static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid)
@@ -480,16 +474,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (WARN_ON(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))
- return -BCH_ERR_erofs_no_alloc_info;
+ return bch_err_throw(c, erofs_no_alloc_info);
if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) {
bch_err(c, "cannot go rw, unfixed btree errors");
- return -BCH_ERR_erofs_unfixed_errors;
+ return bch_err_throw(c, erofs_unfixed_errors);
}
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
bch_err(c, "cannot go rw, filesystem is an unresized image file");
- return -BCH_ERR_erofs_filesystem_full;
+ return bch_err_throw(c, erofs_filesystem_full);
}
if (test_bit(BCH_FS_rw, &c->flags))
@@ -507,13 +501,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_clean_shutdown, &c->flags);
- rcu_read_lock();
- for_each_online_member_rcu(c, ca)
- if (ca->mi.state == BCH_MEMBER_STATE_rw) {
- bch2_dev_allocator_add(c, ca);
- enumerated_ref_start(&ca->io_ref[WRITE]);
- }
- rcu_read_unlock();
+ scoped_guard(rcu)
+ for_each_online_member_rcu(c, ca)
+ if (ca->mi.state == BCH_MEMBER_STATE_rw) {
+ bch2_dev_allocator_add(c, ca);
+ enumerated_ref_start(&ca->io_ref[WRITE]);
+ }
bch2_recalc_capacity(c);
@@ -571,13 +564,13 @@ int bch2_fs_read_write(struct bch_fs *c)
{
if (c->opts.recovery_pass_last &&
c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
- return -BCH_ERR_erofs_norecovery;
+ return bch_err_throw(c, erofs_norecovery);
if (c->opts.nochanges)
- return -BCH_ERR_erofs_nochanges;
+ return bch_err_throw(c, erofs_nochanges);
if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))
- return -BCH_ERR_erofs_no_alloc_info;
+ return bch_err_throw(c, erofs_no_alloc_info);
return __bch2_fs_read_write(c, false);
}
@@ -762,7 +755,7 @@ static int bch2_fs_online(struct bch_fs *c)
if (c->sb.multi_device &&
__bch2_uuid_to_fs(c->sb.uuid)) {
bch_err(c, "filesystem UUID already open");
- return -BCH_ERR_filesystem_uuid_already_open;
+ return bch_err_throw(c, filesystem_uuid_already_open);
}
ret = bch2_fs_chardev_init(c);
@@ -821,7 +814,7 @@ static int bch2_fs_init_rw(struct bch_fs *c)
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)))
- return -BCH_ERR_ENOMEM_fs_other_alloc;
+ return bch_err_throw(c, ENOMEM_fs_other_alloc);
int ret = bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
@@ -1002,7 +995,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
c->opts.btree_node_size) ||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) {
- ret = -BCH_ERR_ENOMEM_fs_other_alloc;
+ ret = bch_err_throw(c, ENOMEM_fs_other_alloc);
goto err;
}
@@ -1038,10 +1031,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
ret = -EINVAL;
goto err;
}
- bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
#else
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
@@ -1159,8 +1148,15 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
+#ifdef CONFIG_UNICODE
+ bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+#endif
+
if (!bch2_fs_may_start(c))
- return -BCH_ERR_insufficient_devices_to_start;
+ return bch_err_throw(c, insufficient_devices_to_start);
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
@@ -1171,7 +1167,7 @@ int bch2_fs_start(struct bch_fs *c)
sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
mutex_unlock(&c->sb_lock);
up_write(&c->state_lock);
- ret = -BCH_ERR_ENOSPC_sb;
+ ret = bch_err_throw(c, ENOSPC_sb);
goto err;
}
@@ -1182,22 +1178,20 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
}
- rcu_read_lock();
- for_each_online_member_rcu(c, ca)
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
- cpu_to_le64(now);
- rcu_read_unlock();
+ scoped_guard(rcu)
+ for_each_online_member_rcu(c, ca)
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+ cpu_to_le64(now);
/*
* Dno't write superblock yet: recovery might have to downgrade
*/
mutex_unlock(&c->sb_lock);
- rcu_read_lock();
- for_each_online_member_rcu(c, ca)
- if (ca->mi.state == BCH_MEMBER_STATE_rw)
- bch2_dev_allocator_add(c, ca);
- rcu_read_unlock();
+ scoped_guard(rcu)
+ for_each_online_member_rcu(c, ca)
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
up_write(&c->state_lock);
@@ -1215,7 +1209,7 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
if (bch2_fs_init_fault("fs_start")) {
- ret = -BCH_ERR_injected_fs_start;
+ ret = bch_err_throw(c, injected_fs_start);
goto err;
}
@@ -1242,11 +1236,11 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
if (le16_to_cpu(sb->block_size) != block_sectors(c))
- return -BCH_ERR_mismatched_block_size;
+ return bch_err_throw(c, mismatched_block_size);
if (le16_to_cpu(m.bucket_size) <
BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
- return -BCH_ERR_bucket_size_too_small;
+ return bch_err_throw(c, bucket_size_too_small);
return 0;
}
@@ -1557,7 +1551,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
bch2_dev_attach(c, ca, dev_idx);
return 0;
err:
- return -BCH_ERR_ENOMEM_dev_alloc;
+ return bch_err_throw(c, ENOMEM_dev_alloc);
}
static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
@@ -1567,13 +1561,13 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
if (bch2_dev_is_online(ca)) {
bch_err(ca, "already have device online in slot %u",
sb->sb->dev_idx);
- return -BCH_ERR_device_already_online;
+ return bch_err_throw(ca->fs, device_already_online);
}
if (get_capacity(sb->bdev->bd_disk) <
ca->mi.bucket_size * ca->mi.nbuckets) {
bch_err(ca, "cannot online: device too small");
- return -BCH_ERR_device_size_too_small;
+ return bch_err_throw(ca->fs, device_size_too_small);
}
BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
@@ -1725,7 +1719,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
return 0;
if (!bch2_dev_state_allowed(c, ca, new_state, flags))
- return -BCH_ERR_device_state_not_allowed;
+ return bch_err_throw(c, device_state_not_allowed);
if (new_state != BCH_MEMBER_STATE_rw)
__bch2_dev_read_only(c, ca);
@@ -1778,7 +1772,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
bch_err(ca, "Cannot remove without losing data");
- ret = -BCH_ERR_device_state_not_allowed;
+ ret = bch_err_throw(c, device_state_not_allowed);
goto err;
}
@@ -1914,7 +1908,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (list_empty(&c->list)) {
mutex_lock(&bch_fs_list_lock);
if (__bch2_uuid_to_fs(c->sb.uuid))
- ret = -BCH_ERR_filesystem_uuid_already_open;
+ ret = bch_err_throw(c, filesystem_uuid_already_open);
else
list_add(&c->list, &bch_fs_list);
mutex_unlock(&bch_fs_list_lock);
@@ -2101,7 +2095,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
bch_err(ca, "Cannot offline required disk");
up_write(&c->state_lock);
- return -BCH_ERR_device_state_not_allowed;
+ return bch_err_throw(c, device_state_not_allowed);
}
__bch2_dev_offline(c, ca);
@@ -2140,7 +2134,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) {
bch_err(ca, "New device size too big (%llu greater than max %u)",
nbuckets, BCH_MEMBER_NBUCKETS_MAX);
- ret = -BCH_ERR_device_size_too_big;
+ ret = bch_err_throw(c, device_size_too_big);
goto err;
}
@@ -2148,7 +2142,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
get_capacity(ca->disk_sb.bdev->bd_disk) <
ca->mi.bucket_size * nbuckets) {
bch_err(ca, "New size larger than device");
- ret = -BCH_ERR_device_size_too_small;
+ ret = bch_err_throw(c, device_size_too_small);
goto err;
}
@@ -2383,7 +2377,7 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices,
}
if (opts->nochanges && !opts->read_only) {
- ret = -BCH_ERR_erofs_nochanges;
+ ret = bch_err_throw(c, erofs_nochanges);
goto err_print;
}
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 1a55196d69f1..05848375cea2 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -26,6 +26,7 @@
#include "disk_groups.h"
#include "ec.h"
#include "enumerated_ref.h"
+#include "error.h"
#include "inode.h"
#include "journal.h"
#include "journal_reclaim.h"
@@ -37,6 +38,7 @@
#include "rebalance.h"
#include "recovery_passes.h"
#include "replicas.h"
+#include "sb-errors.h"
#include "super-io.h"
#include "tests.h"
@@ -143,6 +145,7 @@ do { \
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_commit);
write_attribute(trigger_journal_flush);
write_attribute(trigger_journal_writes);
write_attribute(trigger_btree_cache_shrink);
@@ -151,6 +154,7 @@ write_attribute(trigger_btree_updates);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_recalc_capacity);
write_attribute(trigger_delete_dead_snapshots);
+write_attribute(trigger_emergency_read_only);
read_attribute(gc_gens_pos);
read_attribute(uuid);
@@ -172,6 +176,7 @@ read_attribute(btree_write_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
+read_attribute(errors);
read_attribute(journal_debug);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
@@ -353,6 +358,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_compression_stats)
bch2_compression_stats_to_text(out, c);
+ if (attr == &sysfs_errors)
+ bch2_fs_errors_to_text(out, c);
+
if (attr == &sysfs_new_stripes)
bch2_new_stripes_to_text(out, c);
@@ -428,6 +436,9 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_invalidates)
bch2_do_invalidates(c);
+ if (attr == &sysfs_trigger_journal_commit)
+ bch2_journal_flush(&c->journal);
+
if (attr == &sysfs_trigger_journal_flush) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_meta(&c->journal);
@@ -448,6 +459,16 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_delete_dead_snapshots)
__bch2_delete_dead_snapshots(c);
+ if (attr == &sysfs_trigger_emergency_read_only) {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "shutdown by sysfs\n");
+ bch2_fs_emergency_read_only2(c, &buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
+
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -483,6 +504,7 @@ struct attribute *bch2_fs_files[] = {
&sysfs_recovery_status,
&sysfs_compression_stats,
+ &sysfs_errors,
#ifdef CONFIG_BCACHEFS_TESTS
&sysfs_perf_test,
@@ -571,6 +593,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_gc,
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
+ &sysfs_trigger_journal_commit,
&sysfs_trigger_journal_flush,
&sysfs_trigger_journal_writes,
&sysfs_trigger_btree_cache_shrink,
@@ -579,6 +602,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_recalc_capacity,
&sysfs_trigger_delete_dead_snapshots,
+ &sysfs_trigger_emergency_read_only,
&sysfs_gc_gens_pos,
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 8cb5b40704fd..dc09532796af 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -199,6 +199,50 @@ DECLARE_EVENT_CLASS(bio,
(unsigned long long)__entry->sector, __entry->nr_sector)
);
+/* errors */
+
+TRACE_EVENT(error_throw,
+ TP_PROTO(struct bch_fs *c, int bch_err, unsigned long ip),
+ TP_ARGS(c, bch_err, ip),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(int, err )
+ __array(char, err_str, 32 )
+ __array(char, ip, 32 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->err = bch_err;
+ strscpy(__entry->err_str, bch2_err_str(bch_err), sizeof(__entry->err_str));
+ snprintf(__entry->ip, sizeof(__entry->ip), "%ps", (void *) ip);
+ ),
+
+ TP_printk("%d,%d %s ret %s", MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ip, __entry->err_str)
+);
+
+TRACE_EVENT(error_downcast,
+ TP_PROTO(int bch_err, int std_err, unsigned long ip),
+ TP_ARGS(bch_err, std_err, ip),
+
+ TP_STRUCT__entry(
+ __array(char, bch_err, 32 )
+ __array(char, std_err, 32 )
+ __array(char, ip, 32 )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->bch_err, bch2_err_str(bch_err), sizeof(__entry->bch_err));
+ strscpy(__entry->std_err, bch2_err_str(std_err), sizeof(__entry->std_err));
+ snprintf(__entry->ip, sizeof(__entry->ip), "%ps", (void *) ip);
+ ),
+
+ TP_printk("%s ret %s -> %s %s", __entry->ip,
+ __entry->bch_err, __entry->std_err, __entry->ip)
+);
+
/* disk_accounting.c */
TRACE_EVENT(accounting_mem_insert,
@@ -1431,28 +1475,19 @@ DEFINE_EVENT(fs_str, data_update,
TP_ARGS(c, str)
);
-DEFINE_EVENT(fs_str, io_move_created_rebalance,
+DEFINE_EVENT(fs_str, io_move_pred,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)
);
-TRACE_EVENT(error_downcast,
- TP_PROTO(int bch_err, int std_err, unsigned long ip),
- TP_ARGS(bch_err, std_err, ip),
-
- TP_STRUCT__entry(
- __array(char, bch_err, 32 )
- __array(char, std_err, 32 )
- __array(char, ip, 32 )
- ),
-
- TP_fast_assign(
- strscpy(__entry->bch_err, bch2_err_str(bch_err), sizeof(__entry->bch_err));
- strscpy(__entry->std_err, bch2_err_str(std_err), sizeof(__entry->std_err));
- snprintf(__entry->ip, sizeof(__entry->ip), "%ps", (void *) ip);
- ),
+DEFINE_EVENT(fs_str, io_move_created_rebalance,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
- TP_printk("%s -> %s %s", __entry->bch_err, __entry->std_err, __entry->ip)
+DEFINE_EVENT(fs_str, io_move_evacuate_bucket,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
#ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS
diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig
index cf70e96ad4de..4a23a5e7e8fe 100644
--- a/fs/smb/server/Kconfig
+++ b/fs/smb/server/Kconfig
@@ -11,6 +11,7 @@ config SMB_SERVER
select CRYPTO_HMAC
select CRYPTO_ECB
select CRYPTO_LIB_DES
+ select CRYPTO_LIB_SHA256
select CRYPTO_SHA256
select CRYPTO_CMAC
select CRYPTO_SHA512
diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c
index b3d121052408..d99871c21451 100644
--- a/fs/smb/server/auth.c
+++ b/fs/smb/server/auth.c
@@ -979,40 +979,6 @@ out:
return rc;
}
-int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
- __u8 *pi_hash)
-{
- int rc;
- struct ksmbd_crypto_ctx *ctx = NULL;
-
- ctx = ksmbd_crypto_ctx_find_sha256();
- if (!ctx) {
- ksmbd_debug(AUTH, "could not alloc sha256\n");
- return -ENOMEM;
- }
-
- rc = crypto_shash_init(CRYPTO_SHA256(ctx));
- if (rc) {
- ksmbd_debug(AUTH, "could not init shashn");
- goto out;
- }
-
- rc = crypto_shash_update(CRYPTO_SHA256(ctx), sd_buf, len);
- if (rc) {
- ksmbd_debug(AUTH, "could not update with n\n");
- goto out;
- }
-
- rc = crypto_shash_final(CRYPTO_SHA256(ctx), pi_hash);
- if (rc) {
- ksmbd_debug(AUTH, "Could not generate hash err : %d\n", rc);
- goto out;
- }
-out:
- ksmbd_release_crypto_ctx(ctx);
- return rc;
-}
-
static int ksmbd_get_encryption_key(struct ksmbd_work *work, __u64 ses_id,
int enc, u8 *key)
{
diff --git a/fs/smb/server/auth.h b/fs/smb/server/auth.h
index 362b6159a6cf..6879a1bd1b91 100644
--- a/fs/smb/server/auth.h
+++ b/fs/smb/server/auth.h
@@ -66,6 +66,4 @@ int ksmbd_gen_smb311_encryptionkey(struct ksmbd_conn *conn,
struct ksmbd_session *sess);
int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
__u8 *pi_hash);
-int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
- __u8 *pi_hash);
#endif
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 14620e147dda..6efed923bd68 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -108,6 +108,7 @@ struct ksmbd_conn {
__le16 signing_algorithm;
bool binding;
atomic_t refcnt;
+ bool is_aapl;
};
struct ksmbd_conn_ops {
diff --git a/fs/smb/server/crypto_ctx.c b/fs/smb/server/crypto_ctx.c
index ce733dc9a4a3..80bd68c8635e 100644
--- a/fs/smb/server/crypto_ctx.c
+++ b/fs/smb/server/crypto_ctx.c
@@ -75,9 +75,6 @@ static struct shash_desc *alloc_shash_desc(int id)
case CRYPTO_SHASH_CMACAES:
tfm = crypto_alloc_shash("cmac(aes)", 0, 0);
break;
- case CRYPTO_SHASH_SHA256:
- tfm = crypto_alloc_shash("sha256", 0, 0);
- break;
case CRYPTO_SHASH_SHA512:
tfm = crypto_alloc_shash("sha512", 0, 0);
break;
@@ -198,11 +195,6 @@ struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_cmacaes(void)
return ____crypto_shash_ctx_find(CRYPTO_SHASH_CMACAES);
}
-struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha256(void)
-{
- return ____crypto_shash_ctx_find(CRYPTO_SHASH_SHA256);
-}
-
struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha512(void)
{
return ____crypto_shash_ctx_find(CRYPTO_SHASH_SHA512);
diff --git a/fs/smb/server/crypto_ctx.h b/fs/smb/server/crypto_ctx.h
index 4a367c62f653..ac64801d52d3 100644
--- a/fs/smb/server/crypto_ctx.h
+++ b/fs/smb/server/crypto_ctx.h
@@ -13,7 +13,6 @@ enum {
CRYPTO_SHASH_HMACMD5 = 0,
CRYPTO_SHASH_HMACSHA256,
CRYPTO_SHASH_CMACAES,
- CRYPTO_SHASH_SHA256,
CRYPTO_SHASH_SHA512,
CRYPTO_SHASH_MAX,
};
@@ -39,14 +38,12 @@ struct ksmbd_crypto_ctx {
#define CRYPTO_HMACMD5(c) ((c)->desc[CRYPTO_SHASH_HMACMD5])
#define CRYPTO_HMACSHA256(c) ((c)->desc[CRYPTO_SHASH_HMACSHA256])
#define CRYPTO_CMACAES(c) ((c)->desc[CRYPTO_SHASH_CMACAES])
-#define CRYPTO_SHA256(c) ((c)->desc[CRYPTO_SHASH_SHA256])
#define CRYPTO_SHA512(c) ((c)->desc[CRYPTO_SHASH_SHA512])
#define CRYPTO_HMACMD5_TFM(c) ((c)->desc[CRYPTO_SHASH_HMACMD5]->tfm)
#define CRYPTO_HMACSHA256_TFM(c)\
((c)->desc[CRYPTO_SHASH_HMACSHA256]->tfm)
#define CRYPTO_CMACAES_TFM(c) ((c)->desc[CRYPTO_SHASH_CMACAES]->tfm)
-#define CRYPTO_SHA256_TFM(c) ((c)->desc[CRYPTO_SHASH_SHA256]->tfm)
#define CRYPTO_SHA512_TFM(c) ((c)->desc[CRYPTO_SHASH_SHA512]->tfm)
#define CRYPTO_GCM(c) ((c)->ccmaes[CRYPTO_AEAD_AES_GCM])
@@ -57,7 +54,6 @@ struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacmd5(void);
struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacsha256(void);
struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_cmacaes(void);
struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha512(void);
-struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha256(void);
struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_gcm(void);
struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_ccm(void);
void ksmbd_crypto_destroy(void);
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index ab533c602987..8c9c49c3a0a4 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -631,6 +631,5 @@ MODULE_SOFTDEP("pre: sha512");
MODULE_SOFTDEP("pre: aead2");
MODULE_SOFTDEP("pre: ccm");
MODULE_SOFTDEP("pre: gcm");
-MODULE_SOFTDEP("pre: crc32");
module_init(ksmbd_server_init)
module_exit(ksmbd_server_exit)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 8d414239b3fe..1a308171b599 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2874,7 +2874,7 @@ int smb2_open(struct ksmbd_work *work)
int req_op_level = 0, open_flags = 0, may_flags = 0, file_info = 0;
int rc = 0;
int contxt_cnt = 0, query_disk_id = 0;
- int maximal_access_ctxt = 0, posix_ctxt = 0;
+ bool maximal_access_ctxt = false, posix_ctxt = false;
int s_type = 0;
int next_off = 0;
char *name = NULL;
@@ -2903,6 +2903,27 @@ int smb2_open(struct ksmbd_work *work)
return create_smb2_pipe(work);
}
+ if (req->CreateContextsOffset && tcon->posix_extensions) {
+ context = smb2_find_context_vals(req, SMB2_CREATE_TAG_POSIX, 16);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out2;
+ } else if (context) {
+ struct create_posix *posix = (struct create_posix *)context;
+
+ if (le16_to_cpu(context->DataOffset) +
+ le32_to_cpu(context->DataLength) <
+ sizeof(struct create_posix) - 4) {
+ rc = -EINVAL;
+ goto err_out2;
+ }
+ ksmbd_debug(SMB, "get posix context\n");
+
+ posix_mode = le32_to_cpu(posix->Mode);
+ posix_ctxt = true;
+ }
+ }
+
if (req->NameLength) {
name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset),
le16_to_cpu(req->NameLength),
@@ -2925,9 +2946,11 @@ int smb2_open(struct ksmbd_work *work)
goto err_out2;
}
- rc = ksmbd_validate_filename(name);
- if (rc < 0)
- goto err_out2;
+ if (posix_ctxt == false) {
+ rc = ksmbd_validate_filename(name);
+ if (rc < 0)
+ goto err_out2;
+ }
if (ksmbd_share_veto_filename(share, name)) {
rc = -ENOENT;
@@ -3085,28 +3108,6 @@ int smb2_open(struct ksmbd_work *work)
rc = -EBADF;
goto err_out2;
}
-
- if (tcon->posix_extensions) {
- context = smb2_find_context_vals(req,
- SMB2_CREATE_TAG_POSIX, 16);
- if (IS_ERR(context)) {
- rc = PTR_ERR(context);
- goto err_out2;
- } else if (context) {
- struct create_posix *posix =
- (struct create_posix *)context;
- if (le16_to_cpu(context->DataOffset) +
- le32_to_cpu(context->DataLength) <
- sizeof(struct create_posix) - 4) {
- rc = -EINVAL;
- goto err_out2;
- }
- ksmbd_debug(SMB, "get posix context\n");
-
- posix_mode = le32_to_cpu(posix->Mode);
- posix_ctxt = 1;
- }
- }
}
if (ksmbd_override_fsids(work)) {
@@ -3539,6 +3540,15 @@ int smb2_open(struct ksmbd_work *work)
ksmbd_debug(SMB, "get query on disk id context\n");
query_disk_id = 1;
}
+
+ if (conn->is_aapl == false) {
+ context = smb2_find_context_vals(req, SMB2_CREATE_AAPL, 4);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context)
+ conn->is_aapl = true;
+ }
}
rc = ksmbd_vfs_getattr(&path, &stat);
@@ -3978,7 +3988,10 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
if (dinfo->EaSize)
dinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
dinfo->Reserved = 0;
- dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ if (conn->is_aapl)
+ dinfo->UniqueId = 0;
+ else
+ dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
if (d_info->hide_dot_file && d_info->name[0] == '.')
dinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
memcpy(dinfo->FileName, conv_name, conv_len);
@@ -3995,7 +4008,10 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
if (fibdinfo->EaSize)
fibdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
- fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ if (conn->is_aapl)
+ fibdinfo->UniqueId = 0;
+ else
+ fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
fibdinfo->ShortNameLength = 0;
fibdinfo->Reserved = 0;
fibdinfo->Reserved2 = cpu_to_le16(0);
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 17a0b18a8406..16ae8a10490b 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -63,6 +63,9 @@ struct preauth_integrity_info {
#define SMB2_SESSION_TIMEOUT (10 * HZ)
+/* Apple Defined Contexts */
+#define SMB2_CREATE_AAPL "AAPL"
+
struct create_durable_req_v2 {
struct create_context_hdr ccontext;
__u8 Name[8];
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index baf0d3031a44..ba45e809555a 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -4,6 +4,7 @@
* Copyright (C) 2018 Samsung Electronics Co., Ltd.
*/
+#include <crypto/sha2.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/filelock.h>
@@ -1476,11 +1477,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
acl.sd_buf = (char *)pntsd;
acl.sd_size = len;
- rc = ksmbd_gen_sd_hash(conn, acl.sd_buf, acl.sd_size, acl.hash);
- if (rc) {
- pr_err("failed to generate hash for ndr acl\n");
- return rc;
- }
+ sha256(acl.sd_buf, acl.sd_size, acl.hash);
smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_ACCESS);
@@ -1495,12 +1492,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
goto out;
}
- rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset,
- acl.posix_acl_hash);
- if (rc) {
- pr_err("failed to generate hash for ndr acl\n");
- goto out;
- }
+ sha256(acl_ndr.data, acl_ndr.offset, acl.posix_acl_hash);
rc = ndr_encode_v4_ntacl(&sd_ndr, &acl);
if (rc) {
@@ -1557,11 +1549,7 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
goto out_free;
}
- rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset, cmp_hash);
- if (rc) {
- pr_err("failed to generate hash for ndr acl\n");
- goto out_free;
- }
+ sha256(acl_ndr.data, acl_ndr.offset, cmp_hash);
if (memcmp(cmp_hash, acl.posix_acl_hash, XATTR_SD_HASH_SIZE)) {
pr_err("hash value diff\n");
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 420c7f9aa6ee..ce377f7fb912 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -111,6 +111,8 @@
/* bits unique to S1G beacon */
#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100
+#define IEEE80211_S1G_BCN_CSSID 0x200
+#define IEEE80211_S1G_BCN_ANO 0x400
/* see 802.11ah-2016 9.9 NDP CMAC frames */
#define IEEE80211_S1G_1MHZ_NDP_BITS 25
@@ -153,9 +155,6 @@
#define IEEE80211_ANO_NETTYPE_WILD 15
-/* bits unique to S1G beacon */
-#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100
-
/* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */
#define IEEE80211_CTL_EXT_POLL 0x2000
#define IEEE80211_CTL_EXT_SPR 0x3000
@@ -628,6 +627,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc)
}
/**
+ * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame contains the variable-length
+ * next TBTT field
+ */
+static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc)
+{
+ return ieee80211_is_s1g_beacon(fc) &&
+ (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT));
+}
+
+/**
+ * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame contains the variable-length
+ * ANO field
+ */
+static inline bool ieee80211_s1g_has_ano(__le16 fc)
+{
+ return ieee80211_is_s1g_beacon(fc) &&
+ (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO));
+}
+
+/**
+ * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame contains the variable-length
+ * compressed SSID field
+ */
+static inline bool ieee80211_s1g_has_cssid(__le16 fc)
+{
+ return ieee80211_is_s1g_beacon(fc) &&
+ (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID));
+}
+
+/**
* ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon
* @fc: frame control bytes in little-endian byteorder
* Return: whether or not the frame is an S1G short beacon,
@@ -1245,16 +1280,40 @@ struct ieee80211_ext {
u8 change_seq;
u8 variable[0];
} __packed s1g_beacon;
- struct {
- u8 sa[ETH_ALEN];
- __le32 timestamp;
- u8 change_seq;
- u8 next_tbtt[3];
- u8 variable[0];
- } __packed s1g_short_beacon;
} u;
} __packed __aligned(2);
+/**
+ * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: total length in bytes of the optional fixed-length fields
+ *
+ * S1G beacons may contain up to three optional fixed-length fields that
+ * precede the variable-length elements. Whether these fields are present
+ * is indicated by flags in the frame control field.
+ *
+ * From IEEE 802.11-2024 section 9.3.4.3:
+ * - Next TBTT field may be 0 or 3 bytes
+ * - Short SSID field may be 0 or 4 bytes
+ * - Access Network Options (ANO) field may be 0 or 1 byte
+ */
+static inline size_t
+ieee80211_s1g_optional_len(__le16 fc)
+{
+ size_t len = 0;
+
+ if (ieee80211_s1g_has_next_tbtt(fc))
+ len += 3;
+
+ if (ieee80211_s1g_has_cssid(fc))
+ len += 4;
+
+ if (ieee80211_s1g_has_ano(fc))
+ len += 1;
+
+ return len;
+}
+
#define IEEE80211_TWT_CONTROL_NDP BIT(0)
#define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1)
#define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3)
diff --git a/include/linux/irqchip/irq-renesas-rzv2h.h b/include/linux/irqchip/irq-renesas-rzv2h.h
new file mode 100644
index 000000000000..618a60d2eac0
--- /dev/null
+++ b/include/linux/irqchip/irq-renesas-rzv2h.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Renesas RZ/V2H(P) Interrupt Control Unit (ICU)
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation.
+ */
+
+#ifndef __LINUX_IRQ_RENESAS_RZV2H
+#define __LINUX_IRQ_RENESAS_RZV2H
+
+#include <linux/platform_device.h>
+
+#define RZV2H_ICU_DMAC_REQ_NO_DEFAULT 0x3ff
+
+#ifdef CONFIG_RENESAS_RZV2H_ICU
+void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, u8 dmac_channel,
+ u16 req_no);
+#else
+static inline void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index,
+ u8 dmac_channel, u16 req_no) { }
+#endif
+
+#endif /* __LINUX_IRQ_RENESAS_RZV2H */
diff --git a/include/linux/phy/phy-hdmi.h b/include/linux/phy/phy-hdmi.h
new file mode 100644
index 000000000000..f0ec963c6e84
--- /dev/null
+++ b/include/linux/phy/phy-hdmi.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022,2024 NXP
+ */
+
+#ifndef __PHY_HDMI_H_
+#define __PHY_HDMI_H_
+
+/**
+ * struct phy_configure_opts_hdmi - HDMI configuration set
+ * @tmds_char_rate: HDMI TMDS Character Rate in Hertz.
+ * @bpc: Bits per color channel.
+ *
+ * This structure is used to represent the configuration state of a HDMI phy.
+ */
+struct phy_configure_opts_hdmi {
+ unsigned long long tmds_char_rate;
+ unsigned int bpc;
+};
+
+#endif /* __PHY_HDMI_H_ */
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index e63e6e70e860..437769e061b7 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -17,6 +17,7 @@
#include <linux/regulator/consumer.h>
#include <linux/phy/phy-dp.h>
+#include <linux/phy/phy-hdmi.h>
#include <linux/phy/phy-lvds.h>
#include <linux/phy/phy-mipi-dphy.h>
@@ -42,7 +43,8 @@ enum phy_mode {
PHY_MODE_MIPI_DPHY,
PHY_MODE_SATA,
PHY_MODE_LVDS,
- PHY_MODE_DP
+ PHY_MODE_DP,
+ PHY_MODE_HDMI,
};
enum phy_media {
@@ -60,11 +62,14 @@ enum phy_media {
* the DisplayPort protocol.
* @lvds: Configuration set applicable for phys supporting
* the LVDS phy mode.
+ * @hdmi: Configuration set applicable for phys supporting
+ * the HDMI phy mode.
*/
union phy_configure_opts {
struct phy_configure_opts_mipi_dphy mipi_dphy;
struct phy_configure_opts_dp dp;
struct phy_configure_opts_lvds lvds;
+ struct phy_configure_opts_hdmi hdmi;
};
/**
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index 0d5a17ea8fb8..1a2c0e0838f9 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -55,6 +55,8 @@
#define EXYNOS4_MIPI_PHY_SRESETN (1 << 1)
#define EXYNOS4_MIPI_PHY_MRESETN (1 << 2)
#define EXYNOS4_MIPI_PHY_RESET_MASK (3 << 1)
+/* USB PHY enable bit, valid for Exynos7870 */
+#define EXYNOS7870_USB2PHY_ENABLE (1 << 1)
#define S5P_INFORM0 0x0800
#define S5P_INFORM1 0x0804
@@ -185,6 +187,9 @@
/* Only for S5Pv210 */
#define S5PV210_EINT_WAKEUP_MASK 0xC004
+/* Only for Exynos2200 */
+#define EXYNOS2200_PHY_CTRL_USB20 0x72C
+
/* Only for Exynos4210 */
#define S5P_CMU_CLKSTOP_LCD1_LOWPWR 0x1154
#define S5P_CMU_RESET_LCD1_LOWPWR 0x1174
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 2362f621d94c..0832776262ac 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -8,6 +8,7 @@
#include <linux/bug.h>
#include <linux/completion.h>
#include <linux/device.h>
+#include <linux/idr.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/lockdep_types.h>
@@ -50,6 +51,7 @@ struct sdw_slave;
#define SDW_FRAME_CTRL_BITS 48
#define SDW_MAX_DEVICES 11
+#define SDW_FW_MAX_DEVICES 16
#define SDW_MAX_PORTS 15
#define SDW_VALID_PORT_RANGE(n) ((n) < SDW_MAX_PORTS && (n) >= 1)
@@ -630,6 +632,7 @@ struct sdw_slave_ops {
* struct sdw_slave - SoundWire Slave
* @id: MIPI device ID
* @dev: Linux device
+ * @index: internal ID for this slave
* @irq: IRQ number
* @status: Status reported by the Slave
* @bus: Bus handle
@@ -661,6 +664,7 @@ struct sdw_slave_ops {
struct sdw_slave {
struct sdw_slave_id id;
struct device dev;
+ int index;
int irq;
enum sdw_slave_status status;
struct sdw_bus *bus;
@@ -968,6 +972,7 @@ struct sdw_stream_runtime {
* @md: Master device
* @bus_lock_key: bus lock key associated to @bus_lock
* @bus_lock: bus lock
+ * @slave_ida: IDA for allocating internal slave IDs
* @slaves: list of Slaves on this bus
* @msg_lock_key: message lock key associated to @msg_lock
* @msg_lock: message lock
@@ -1010,6 +1015,7 @@ struct sdw_bus {
struct sdw_master_device *md;
struct lock_class_key bus_lock_key;
struct mutex bus_lock;
+ struct ida slave_ida;
struct list_head slaves;
struct lock_class_key msg_lock_key;
struct mutex msg_lock;
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index dc6ebaee3d18..9c9435009537 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -189,6 +189,9 @@
#define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2 BIT(14)
#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE BIT(15)
+/* ACE3+ Mic privacy control and status register */
+#define SDW_SHIM2_INTEL_VS_PVCCS 0x10
+
/**
* struct sdw_intel_stream_params_data: configuration passed during
* the @params_stream callback, e.g. for interaction with DSP
@@ -331,6 +334,7 @@ struct sdw_intel_ctx {
* @shim_base: sdw shim base.
* @alh_base: sdw alh base.
* @ext: extended HDaudio link support
+ * @mic_privacy: ACE version supports microphone privacy
* @hbus: hdac_bus pointer, needed for power management
* @eml_lock: mutex protecting shared registers in the HDaudio multi-link
* space
@@ -349,6 +353,7 @@ struct sdw_intel_res {
u32 shim_base;
u32 alh_base;
bool ext;
+ bool mic_privacy;
struct hdac_bus *hbus;
struct mutex *eml_lock;
};
diff --git a/include/net/checksum.h b/include/net/checksum.h
index e57986b173f8..3cbab35de5ab 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -152,7 +152,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr);
+ __wsum diff, bool pseudohdr, bool ipv6);
static __always_inline
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 85180e4aaa5a..0b4a2f124d11 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2056,6 +2056,7 @@ union bpf_attr {
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
* that the modified header field is part of the pseudo-header.
+ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6072,6 +6073,7 @@ enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
+ BPF_F_IPV6 = (1ULL << 7),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
diff --git a/init/Kconfig b/init/Kconfig
index ab83abe0fd9d..af4c2f085455 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -136,6 +136,9 @@ config LD_CAN_USE_KEEP_IN_OVERLAY
config RUSTC_HAS_COERCE_POINTEE
def_bool RUSTC_VERSION >= 108400
+config RUSTC_HAS_SPAN_FILE
+ def_bool RUSTC_VERSION >= 108800
+
config RUSTC_HAS_UNNECESSARY_TRANSMUTES
def_bool RUSTC_VERSION >= 108800
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 66da03cc0b33..6d29d3cbc670 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -138,6 +138,7 @@ found:
goto retry;
}
+#ifdef CONFIG_NUMA
/*
* Tracks nodes that have not yet been visited when searching for an idle
* CPU across all available nodes.
@@ -186,6 +187,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i
return cpu;
}
+#else
+static inline s32
+pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags)
+{
+ return -EBUSY;
+}
+#endif
/*
* Find an idle CPU in the system, starting from @node.
@@ -447,11 +455,18 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr;
int node = scx_cpu_node_if_enabled(prev_cpu);
+ bool is_prev_allowed;
s32 cpu;
preempt_disable();
/*
+ * Check whether @prev_cpu is still within the allowed set. If not,
+ * we can still try selecting a nearby CPU.
+ */
+ is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed);
+
+ /*
* Determine the subset of CPUs usable by @p within @cpus_allowed.
*/
if (allowed != p->cpus_ptr) {
@@ -465,21 +480,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
cpu = -EBUSY;
goto out_enable;
}
-
- /*
- * If @prev_cpu is not in the allowed CPUs, skip topology
- * optimizations and try to pick any idle CPU usable by the
- * task.
- *
- * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
- * the current node, as it may optimize some waker->wakee
- * workloads.
- */
- if (!cpumask_test_cpu(prev_cpu, allowed)) {
- node = scx_cpu_node_if_enabled(smp_processor_id());
- cpu = scx_pick_idle_cpu(allowed, node, flags);
- goto out_enable;
- }
}
/*
@@ -525,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
* then avoid a migration.
*/
cpu = smp_processor_id();
- if (cpus_share_cache(cpu, prev_cpu) &&
+ if (is_prev_allowed && cpus_share_cache(cpu, prev_cpu) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
@@ -562,7 +562,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
/*
* Keep using @prev_cpu if it's part of a fully idle core.
*/
- if (cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
+ if (is_prev_allowed &&
+ cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
@@ -611,7 +612,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
/*
* Use @prev_cpu if it's idle.
*/
- if (scx_idle_test_and_clear_cpu(prev_cpu)) {
+ if (is_prev_allowed && scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 042d3ac3b4a3..a5bde5db58ef 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4870,7 +4870,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
SMP_ALLOW_STK)) {
- result = L2CAP_CR_LE_AUTHENTICATION;
+ result = pchan->sec_level == BT_SECURITY_MEDIUM ?
+ L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION;
chan = NULL;
goto response_unlock;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 261926dccc7e..14a9462fced5 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2566,7 +2566,8 @@ static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev,
struct mgmt_pending_cmd *cmd;
int err;
- if (len < sizeof(*cp))
+ if (len != (offsetof(struct mgmt_cp_hci_cmd_sync, params) +
+ le16_to_cpu(cp->params_len)))
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
MGMT_STATUS_INVALID_PARAMS);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b514d95c528..be97c440ecd5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9968,6 +9968,7 @@ int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
return dev->netdev_ops->ndo_bpf(dev, bpf);
}
+EXPORT_SYMBOL_GPL(netif_xdp_propagate);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
@@ -10498,7 +10499,7 @@ static void dev_index_release(struct net *net, int ifindex)
static bool from_cleanup_net(void)
{
#ifdef CONFIG_NET_NS
- return current == cleanup_net_task;
+ return current == READ_ONCE(cleanup_net_task);
#else
return false;
#endif
diff --git a/net/core/devmem.h b/net/core/devmem.h
index e7ba77050b8f..0a3b28ba5c13 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -170,8 +170,9 @@ static inline void __net_devmem_dmabuf_binding_free(struct work_struct *wq)
}
static inline struct net_devmem_dmabuf_binding *
-net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+net_devmem_bind_dmabuf(struct net_device *dev,
enum dma_data_direction direction,
+ unsigned int dmabuf_fd,
struct netdev_nl_sock *priv,
struct netlink_ext_ack *extack)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index ab456bf1056e..327ca73f9cd7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1968,10 +1968,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
bool do_mforce = flags & BPF_F_MARK_ENFORCE;
+ bool is_ipv6 = flags & BPF_F_IPV6;
__sum16 *ptr;
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
- BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6)))
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
@@ -1987,7 +1988,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
if (unlikely(from != 0))
return -EINVAL;
- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6);
break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42ee7fce3d95..ae54f26709ca 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -654,7 +654,7 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
LIST_HEAD(net_exit_list);
- cleanup_net_task = current;
+ WRITE_ONCE(cleanup_net_task, current);
/* Atomically snapshot the list of namespaces to cleanup */
net_kill_list = llist_del_all(&cleanup_list);
@@ -704,7 +704,7 @@ static void cleanup_net(struct work_struct *work)
put_user_ns(net->user_ns);
net_passive_dec(net);
}
- cleanup_net_task = NULL;
+ WRITE_ONCE(cleanup_net_task, NULL);
}
/**
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4011eb305cee..ba7cf3e3c32f 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -153,9 +153,9 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#else
-#define alloc_stat_inc(pool, __stat)
-#define recycle_stat_inc(pool, __stat)
-#define recycle_stat_add(pool, __stat, val)
+#define alloc_stat_inc(...) do { } while (0)
+#define recycle_stat_inc(...) do { } while (0)
+#define recycle_stat_add(...) do { } while (0)
#endif
static bool page_pool_producer_lock(struct page_pool *pool)
@@ -741,19 +741,16 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
{
- int ret;
- /* BH protection not needed if current is softirq */
- if (in_softirq())
- ret = ptr_ring_produce(&pool->ring, (__force void *)netmem);
- else
- ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem);
+ bool in_softirq, ret;
- if (!ret) {
+ /* BH protection not needed if current is softirq */
+ in_softirq = page_pool_producer_lock(pool);
+ ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem);
+ if (ret)
recycle_stat_inc(pool, ring);
- return true;
- }
+ page_pool_producer_unlock(pool, in_softirq);
- return false;
+ return ret;
}
/* Only allow direct recycling in special circumstances, into the
@@ -1150,10 +1147,14 @@ static void page_pool_scrub(struct page_pool *pool)
static int page_pool_release(struct page_pool *pool)
{
+ bool in_softirq;
int inflight;
page_pool_scrub(pool);
inflight = page_pool_inflight(pool, true);
+ /* Acquire producer lock to make sure producers have exited. */
+ in_softirq = page_pool_producer_lock(pool);
+ page_pool_producer_unlock(pool, in_softirq);
if (!inflight)
__page_pool_destroy(pool);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f9a35bdc58ad..c57692eb8da9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3671,7 +3671,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
- dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
if (tb[IFLA_GSO_MAX_SIZE])
netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
if (tb[IFLA_GSO_MAX_SEGS])
diff --git a/net/core/sock.c b/net/core/sock.c
index 341979874459..3b409bc8ef6d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3284,16 +3284,16 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
struct proto *prot = sk->sk_prot;
- bool charged = false;
+ bool charged = true;
long allocated;
sk_memory_allocated_add(sk, amt);
allocated = sk_memory_allocated(sk);
if (memcg) {
- if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
+ charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge());
+ if (!charged)
goto suppress_allocation;
- charged = true;
}
/* Under limit. */
@@ -3378,7 +3378,7 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
- if (charged)
+ if (memcg && charged)
mem_cgroup_uncharge_skmem(memcg, amt);
return 0;
diff --git a/net/core/utils.c b/net/core/utils.c
index e47feeaa5a49..5e63b0ea21f3 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
EXPORT_SYMBOL(inet_proto_csum_replace16);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr)
+ __wsum diff, bool pseudohdr, bool ipv6)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace_by_diff(sum, diff);
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
skb->csum = ~csum_sub(diff, skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 8c3c068728e5..fe75821623a4 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -257,7 +257,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
int source_port;
u8 *brcm_tag;
- if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
+ if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN)))
return NULL;
brcm_tag = dsa_etype_header_pos_rx(skb);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 9c775f8aa438..85b5aa82d7d7 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -495,6 +495,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
bool copy_dtor;
__sum16 check;
__be16 newlen;
+ int ret = 0;
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
@@ -523,6 +524,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ ret = __skb_linearize(gso_skb);
+ if (ret)
+ return ERR_PTR(ret);
+
/* Setup csum, as fraglist skips this in udp4_gro_receive. */
gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
gso_skb->csum_offset = offsetof(struct udphdr, check);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 95e9146918cc..b8d43ed4689d 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&th->check, skb,
- diff, true);
+ diff, true, true);
}
break;
case NEXTHDR_UDP:
@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&uh->check, skb,
- diff, true);
+ diff, true, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
- diff, true);
+ diff, true, true);
}
break;
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ac1dbd492c22..a11a02b4ba95 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -1644,10 +1644,8 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
- [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
- .len = sizeof(struct in_addr) },
- [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr) },
+ [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)),
+ [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b84150dbfe8c..948909a242d6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7220,11 +7220,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type);
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
struct ieee80211_ext *ext = (void *) mgmt;
-
- if (ieee80211_is_s1g_short_beacon(ext->frame_control))
- variable = ext->u.s1g_short_beacon.variable;
- else
- variable = ext->u.s1g_beacon.variable;
+ variable = ext->u.s1g_beacon.variable +
+ ieee80211_s1g_optional_len(ext->frame_control);
}
baselen = (u8 *) variable - (u8 *) mgmt;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 7b8da40a912d..cd8385ecafd9 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -276,6 +276,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
struct ieee80211_mgmt *mgmt = (void *)skb->data;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
+ struct ieee80211_ext *ext;
size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
@@ -285,12 +286,10 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
return;
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_short_beacon.variable);
- else
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_beacon);
+ ext = (struct ieee80211_ext *)mgmt;
+ min_hdr_len =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ ieee80211_s1g_optional_len(ext->frame_control);
}
if (skb->len < min_hdr_len)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index aad84aabd7f1..f391cd267922 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -248,7 +248,7 @@ static noinline bool
nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_ct)
{
- static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
+ static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST;
const struct nf_conntrack_tuple_hash *thash;
const struct nf_conntrack_zone *zone;
struct nf_conn *ct;
@@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
zone = nf_ct_zone(ignored_ct);
thash = nf_conntrack_find_get(net, zone, tuple);
- if (unlikely(!thash)) /* clashing entry went away */
- return false;
+ if (unlikely(!thash)) {
+ struct nf_conntrack_tuple reply;
+
+ nf_ct_invert_tuple(&reply, tuple);
+ thash = nf_conntrack_find_get(net, zone, &reply);
+ if (!thash) /* clashing entry went away */
+ return false;
+ }
ct = nf_ct_tuplehash_to_ctrack(thash);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index c15db28c5ebc..be7c16c79f71 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1114,6 +1114,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
}
/**
+ * pipapo_resmap_init_avx2() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Like pipapo_resmap_init() but do not set start map bits covered by the first field.
+ */
+static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ /* Starting map doesn't need to be set to all-ones for this implementation,
+ * but we do need to zero the remaining bits, if any.
+ */
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
+
+/**
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
@@ -1171,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
res = scratch->map + (map_index ? m->bsize_max : 0);
fill = scratch->map + (map_index ? 0 : m->bsize_max);
- /* Starting map doesn't need to be set for this implementation */
+ pipapo_resmap_init_avx2(m, res);
nft_pipapo_avx2_prepare();
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 6ea16138582c..33b77084a4e5 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1165,8 +1165,10 @@ int netlbl_conn_setattr(struct sock *sk,
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- if (sk->sk_family != AF_INET6)
- return -EAFNOSUPPORT;
+ if (sk->sk_family != AF_INET6) {
+ ret_val = -EAFNOSUPPORT;
+ goto conn_setattr_return;
+ }
addr6 = (struct sockaddr_in6 *)addr;
entry = netlbl_domhsh_getentry_af6(secattr->domain,
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 1f7c136d6d0e..0a260df45d25 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -45,8 +45,9 @@ static void none_free_call_crypto(struct rxrpc_call *call)
static bool none_validate_challenge(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
- rxrpc_eproto_rxnull_challenge);
+ rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_challenge);
+ return true;
}
static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge,
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f4cfe88670f5..ea5bb131ebd0 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -818,7 +818,11 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
}
/* Get net to avoid freed tipc_crypto when delete namespace */
- get_net(aead->crypto->net);
+ if (!maybe_get_net(aead->crypto->net)) {
+ tipc_bearer_put(b);
+ rc = -ENODEV;
+ goto exit;
+ }
/* Now, do encrypt */
rc = crypto_aead_encrypt(req);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ddd3a97f6609..e8a4fe44ec2d 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -3250,6 +3250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
const u8 *ie;
size_t ielen;
u64 tsf;
+ size_t s1g_optional_len;
if (WARN_ON(!mgmt))
return NULL;
@@ -3264,12 +3265,11 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
ext = (void *) mgmt;
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_short_beacon.variable);
- else
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_beacon.variable);
+ s1g_optional_len =
+ ieee80211_s1g_optional_len(ext->frame_control);
+ min_hdr_len =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ s1g_optional_len;
} else {
/* same for beacons */
min_hdr_len = offsetof(struct ieee80211_mgmt,
@@ -3285,11 +3285,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
const struct ieee80211_s1g_bcn_compat_ie *compat;
const struct element *elem;
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- ie = ext->u.s1g_short_beacon.variable;
- else
- ie = ext->u.s1g_beacon.variable;
-
+ ie = ext->u.s1g_beacon.variable + s1g_optional_len;
elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen);
if (!elem)
return NULL;
diff --git a/rust/Makefile b/rust/Makefile
index 80c84749d734..27dec7904c3a 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -60,6 +60,8 @@ endif
core-cfgs = \
--cfg no_fp_fmt_parse
+core-edition := $(if $(call rustc-min-version,108700),2024,2021)
+
# `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only
# since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust
# 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both
@@ -106,8 +108,8 @@ rustdoc-macros: $(src)/macros/lib.rs FORCE
# Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should
# not be needed -- see https://github.com/rust-lang/rust/pull/128307.
-rustdoc-core: private skip_flags = -Wrustdoc::unescaped_backticks
-rustdoc-core: private rustc_target_flags = $(core-cfgs)
+rustdoc-core: private skip_flags = --edition=2021 -Wrustdoc::unescaped_backticks
+rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs)
rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
+$(call if_changed,rustdoc)
@@ -273,7 +275,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \
-fzero-call-used-regs=% -fno-stack-clash-protection \
-fno-inline-functions-called-once -fsanitize=bounds-strict \
-fstrict-flex-arrays=% -fmin-function-alignment=% \
- -fzero-init-padding-bits=% \
+ -fzero-init-padding-bits=% -mno-fdpic \
--param=% --param asan-%
# Derived from `scripts/Makefile.clang`.
@@ -402,7 +404,8 @@ quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@
-Clink-args='$(call escsq,$(KBUILD_PROCMACROLDFLAGS))' \
--emit=dep-info=$(depfile) --emit=link=$@ --extern proc_macro \
--crate-type proc-macro \
- --crate-name $(patsubst lib%.$(libmacros_extension),%,$(notdir $@)) $<
+ --crate-name $(patsubst lib%.$(libmacros_extension),%,$(notdir $@)) \
+ @$(objtree)/include/generated/rustc_cfg $<
# Procedural macros can only be used with the `rustc` that compiled it.
$(obj)/$(libmacros_name): $(src)/macros/lib.rs FORCE
@@ -416,7 +419,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L
cmd_rustc_library = \
OBJTREE=$(abspath $(objtree)) \
$(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \
- $(filter-out $(skip_flags),$(rust_flags) $(rustc_target_flags)) \
+ $(filter-out $(skip_flags),$(rust_flags)) $(rustc_target_flags) \
--emit=dep-info=$(depfile) --emit=obj=$@ \
--emit=metadata=$(dir $@)$(patsubst %.o,lib%.rmeta,$(notdir $@)) \
--crate-type rlib -L$(objtree)/$(obj) \
@@ -427,7 +430,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L
rust-analyzer:
$(Q)MAKEFLAGS= $(srctree)/scripts/generate_rust_analyzer.py \
- --cfgs='core=$(core-cfgs)' \
+ --cfgs='core=$(core-cfgs)' $(core-edition) \
$(realpath $(srctree)) $(realpath $(objtree)) \
$(rustc_sysroot) $(RUST_LIB_SRC) $(if $(KBUILD_EXTMOD),$(srcroot)) \
> rust-project.json
@@ -483,9 +486,9 @@ $(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE
$(obj)/exports.o: private skip_gendwarfksyms = 1
$(obj)/core.o: private skip_clippy = 1
-$(obj)/core.o: private skip_flags = -Wunreachable_pub
+$(obj)/core.o: private skip_flags = --edition=2021 -Wunreachable_pub
$(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym))
-$(obj)/core.o: private rustc_target_flags = $(core-cfgs)
+$(obj)/core.o: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs)
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs \
$(wildcard $(objtree)/include/config/RUSTC_VERSION_TEXT) FORCE
+$(call if_changed_rule,rustc_library)
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index a5a6fb45d405..bc494745f67b 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -6,6 +6,28 @@
* Sorted alphabetically.
*/
+/*
+ * First, avoid forward references to `enum` types.
+ *
+ * This workarounds a `bindgen` issue with them:
+ * <https://github.com/rust-lang/rust-bindgen/issues/3179>.
+ *
+ * Without this, the generated Rust type may be the wrong one (`i32`) or
+ * the proper one (typically `c_uint`) depending on how the headers are
+ * included, which in turn may depend on the particular kernel configuration
+ * or the architecture.
+ *
+ * The alternative would be to use casts and likely an
+ * `#[allow(clippy::unnecessary_cast)]` in the Rust source files. Instead,
+ * this approach allows us to keep the correct code in the source files and
+ * simply remove this section when the issue is fixed upstream and we bump
+ * the minimum `bindgen` version.
+ *
+ * This workaround may not be possible in some cases, depending on how the C
+ * headers are set up.
+ */
+#include <linux/hrtimer_types.h>
+
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
@@ -48,6 +70,7 @@
#include <linux/tracepoint.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <linux/xarray.h>
#include <trace/events/rust_sample.h>
#if defined(CONFIG_DRM_PANIC_SCREEN_QR_CODE)
@@ -67,3 +90,8 @@ const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM;
const gfp_t RUST_CONST_HELPER___GFP_NOWARN = ___GFP_NOWARN;
const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL;
const fop_flags_t RUST_CONST_HELPER_FOP_UNSIGNED_OFFSET = FOP_UNSIGNED_OFFSET;
+
+const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT;
+
+const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC;
+const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1;
diff --git a/rust/ffi.rs b/rust/ffi.rs
index 584f75b49862..d60aad792af4 100644
--- a/rust/ffi.rs
+++ b/rust/ffi.rs
@@ -17,7 +17,7 @@ macro_rules! alias {
// Check size compatibility with `core`.
const _: () = assert!(
- core::mem::size_of::<$name>() == core::mem::size_of::<core::ffi::$name>()
+ ::core::mem::size_of::<$name>() == ::core::mem::size_of::<::core::ffi::$name>()
);
)*}
}
diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c
index 805307018f0e..0f1b5d115985 100644
--- a/rust/helpers/helpers.c
+++ b/rust/helpers/helpers.c
@@ -43,3 +43,4 @@
#include "vmalloc.c"
#include "wait.c"
#include "workqueue.c"
+#include "xarray.c"
diff --git a/rust/helpers/xarray.c b/rust/helpers/xarray.c
new file mode 100644
index 000000000000..60b299f11451
--- /dev/null
+++ b/rust/helpers/xarray.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/xarray.h>
+
+int rust_helper_xa_err(void *entry)
+{
+ return xa_err(entry);
+}
+
+void rust_helper_xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+ return xa_init_flags(xa, flags);
+}
+
+int rust_helper_xa_trylock(struct xarray *xa)
+{
+ return xa_trylock(xa);
+}
+
+void rust_helper_xa_lock(struct xarray *xa)
+{
+ return xa_lock(xa);
+}
+
+void rust_helper_xa_unlock(struct xarray *xa)
+{
+ return xa_unlock(xa);
+}
diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs
index fc9c9c41cd79..a2c49e5494d3 100644
--- a/rust/kernel/alloc.rs
+++ b/rust/kernel/alloc.rs
@@ -94,10 +94,10 @@ pub mod flags {
///
/// A lower watermark is applied to allow access to "atomic reserves". The current
/// implementation doesn't support NMI and few other strict non-preemptive contexts (e.g.
- /// raw_spin_lock). The same applies to [`GFP_NOWAIT`].
+ /// `raw_spin_lock`). The same applies to [`GFP_NOWAIT`].
pub const GFP_ATOMIC: Flags = Flags(bindings::GFP_ATOMIC);
- /// Typical for kernel-internal allocations. The caller requires ZONE_NORMAL or a lower zone
+ /// Typical for kernel-internal allocations. The caller requires `ZONE_NORMAL` or a lower zone
/// for direct access but can direct reclaim.
pub const GFP_KERNEL: Flags = Flags(bindings::GFP_KERNEL);
diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs
index c37d4c0c64e9..d19c06ef0498 100644
--- a/rust/kernel/alloc/allocator_test.rs
+++ b/rust/kernel/alloc/allocator_test.rs
@@ -4,7 +4,7 @@
//! of those types (e.g. `CString`) use kernel allocators for instantiation.
//!
//! In order to allow userspace test cases to make use of such types as well, implement the
-//! `Cmalloc` allocator within the allocator_test module and type alias all kernel allocators to
+//! `Cmalloc` allocator within the `allocator_test` module and type alias all kernel allocators to
//! `Cmalloc`. The `Cmalloc` allocator uses libc's `realloc()` function as allocator backend.
#![allow(missing_docs)]
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index b77d32f3a58b..c386ff771d50 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -57,12 +57,50 @@ use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption};
/// assert!(KVBox::<Huge>::new_uninit(GFP_KERNEL).is_ok());
/// ```
///
+/// [`Box`]es can also be used to store trait objects by coercing their type:
+///
+/// ```
+/// trait FooTrait {}
+///
+/// struct FooStruct;
+/// impl FooTrait for FooStruct {}
+///
+/// let _ = KBox::new(FooStruct, GFP_KERNEL)? as KBox<dyn FooTrait>;
+/// # Ok::<(), Error>(())
+/// ```
+///
/// # Invariants
///
/// `self.0` is always properly aligned and either points to memory allocated with `A` or, for
/// zero-sized types, is a dangling, well aligned pointer.
#[repr(transparent)]
-pub struct Box<T: ?Sized, A: Allocator>(NonNull<T>, PhantomData<A>);
+#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
+pub struct Box<#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, pointee)] T: ?Sized, A: Allocator>(
+ NonNull<T>,
+ PhantomData<A>,
+);
+
+// This is to allow coercion from `Box<T, A>` to `Box<U, A>` if `T` can be converted to the
+// dynamically-sized type (DST) `U`.
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T, U, A> core::ops::CoerceUnsized<Box<U, A>> for Box<T, A>
+where
+ T: ?Sized + core::marker::Unsize<U>,
+ U: ?Sized,
+ A: Allocator,
+{
+}
+
+// This is to allow `Box<U, A>` to be dispatched on when `Box<T, A>` can be coerced into `Box<U,
+// A>`.
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T, U, A> core::ops::DispatchFromDyn<Box<U, A>> for Box<T, A>
+where
+ T: ?Sized + core::marker::Unsize<U>,
+ U: ?Sized,
+ A: Allocator,
+{
+}
/// Type alias for [`Box`] with a [`Kmalloc`] allocator.
///
@@ -101,7 +139,7 @@ pub type VBox<T> = Box<T, super::allocator::Vmalloc>;
pub type KVBox<T> = Box<T, super::allocator::KVmalloc>;
// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee:
-// https://doc.rust-lang.org/stable/std/option/index.html#representation).
+// <https://doc.rust-lang.org/stable/std/option/index.html#representation>).
unsafe impl<T, A: Allocator> ZeroableOption for Box<T, A> {}
// SAFETY: `Box` is `Send` if `T` is `Send` because the `Box` owns a `T`.
@@ -360,68 +398,70 @@ where
}
}
-impl<T: 'static, A> ForeignOwnable for Box<T, A>
+// SAFETY: The `into_foreign` function returns a pointer that is well-aligned.
+unsafe impl<T: 'static, A> ForeignOwnable for Box<T, A>
where
A: Allocator,
{
+ type PointedTo = T;
type Borrowed<'a> = &'a T;
type BorrowedMut<'a> = &'a mut T;
- fn into_foreign(self) -> *mut crate::ffi::c_void {
- Box::into_raw(self).cast()
+ fn into_foreign(self) -> *mut Self::PointedTo {
+ Box::into_raw(self)
}
- unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self {
+ unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self {
// SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
// call to `Self::into_foreign`.
- unsafe { Box::from_raw(ptr.cast()) }
+ unsafe { Box::from_raw(ptr) }
}
- unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> &'a T {
+ unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> &'a T {
// SAFETY: The safety requirements of this method ensure that the object remains alive and
// immutable for the duration of 'a.
- unsafe { &*ptr.cast() }
+ unsafe { &*ptr }
}
- unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> &'a mut T {
- let ptr = ptr.cast();
+ unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> &'a mut T {
// SAFETY: The safety requirements of this method ensure that the pointer is valid and that
// nothing else will access the value for the duration of 'a.
unsafe { &mut *ptr }
}
}
-impl<T: 'static, A> ForeignOwnable for Pin<Box<T, A>>
+// SAFETY: The `into_foreign` function returns a pointer that is well-aligned.
+unsafe impl<T: 'static, A> ForeignOwnable for Pin<Box<T, A>>
where
A: Allocator,
{
+ type PointedTo = T;
type Borrowed<'a> = Pin<&'a T>;
type BorrowedMut<'a> = Pin<&'a mut T>;
- fn into_foreign(self) -> *mut crate::ffi::c_void {
+ fn into_foreign(self) -> *mut Self::PointedTo {
// SAFETY: We are still treating the box as pinned.
- Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }).cast()
+ Box::into_raw(unsafe { Pin::into_inner_unchecked(self) })
}
- unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self {
+ unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self {
// SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
// call to `Self::into_foreign`.
- unsafe { Pin::new_unchecked(Box::from_raw(ptr.cast())) }
+ unsafe { Pin::new_unchecked(Box::from_raw(ptr)) }
}
- unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> Pin<&'a T> {
+ unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> Pin<&'a T> {
// SAFETY: The safety requirements for this function ensure that the object is still alive,
// so it is safe to dereference the raw pointer.
// The safety requirements of `from_foreign` also ensure that the object remains alive for
// the lifetime of the returned value.
- let r = unsafe { &*ptr.cast() };
+ let r = unsafe { &*ptr };
// SAFETY: This pointer originates from a `Pin<Box<T>>`.
unsafe { Pin::new_unchecked(r) }
}
- unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> Pin<&'a mut T> {
- let ptr = ptr.cast();
+ unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> Pin<&'a mut T> {
// SAFETY: The safety requirements for this function ensure that the object is still alive,
// so it is safe to dereference the raw pointer.
// The safety requirements of `from_foreign` also ensure that the object remains alive for
diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs
index 87a71fd40c3c..1a0dd852a468 100644
--- a/rust/kernel/alloc/kvec.rs
+++ b/rust/kernel/alloc/kvec.rs
@@ -2,9 +2,6 @@
//! Implementation of [`Vec`].
-// May not be needed in Rust 1.87.0 (pending beta backport).
-#![allow(clippy::ptr_eq)]
-
use super::{
allocator::{KVmalloc, Kmalloc, Vmalloc},
layout::ArrayLayout,
@@ -24,6 +21,9 @@ use core::{
slice::SliceIndex,
};
+mod errors;
+pub use self::errors::{InsertError, PushError, RemoveError};
+
/// Create a [`KVec`] containing the arguments.
///
/// New memory is allocated with `GFP_KERNEL`.
@@ -93,6 +93,8 @@ macro_rules! kvec {
/// without re-allocation. For ZSTs `self.layout`'s capacity is zero. However, it is legal for the
/// backing buffer to be larger than `layout`.
///
+/// - `self.len()` is always less than or equal to `self.capacity()`.
+///
/// - The `Allocator` type `A` of the vector is the exact same `Allocator` type the backing buffer
/// was allocated with (and must be freed with).
pub struct Vec<T, A: Allocator> {
@@ -186,17 +188,38 @@ where
self.len
}
- /// Forcefully sets `self.len` to `new_len`.
+ /// Increments `self.len` by `additional`.
///
/// # Safety
///
- /// - `new_len` must be less than or equal to [`Self::capacity`].
- /// - If `new_len` is greater than `self.len`, all elements within the interval
- /// [`self.len`,`new_len`) must be initialized.
+ /// - `additional` must be less than or equal to `self.capacity - self.len`.
+ /// - All elements within the interval [`self.len`,`self.len + additional`) must be initialized.
#[inline]
- pub unsafe fn set_len(&mut self, new_len: usize) {
- debug_assert!(new_len <= self.capacity());
- self.len = new_len;
+ pub unsafe fn inc_len(&mut self, additional: usize) {
+ // Guaranteed by the type invariant to never underflow.
+ debug_assert!(additional <= self.capacity() - self.len());
+ // INVARIANT: By the safety requirements of this method this represents the exact number of
+ // elements stored within `self`.
+ self.len += additional;
+ }
+
+ /// Decreases `self.len` by `count`.
+ ///
+ /// Returns a mutable slice to the elements forgotten by the vector. It is the caller's
+ /// responsibility to drop these elements if necessary.
+ ///
+ /// # Safety
+ ///
+ /// - `count` must be less than or equal to `self.len`.
+ unsafe fn dec_len(&mut self, count: usize) -> &mut [T] {
+ debug_assert!(count <= self.len());
+ // INVARIANT: We relinquish ownership of the elements within the range `[self.len - count,
+ // self.len)`, hence the updated value of `set.len` represents the exact number of elements
+ // stored within `self`.
+ self.len -= count;
+ // SAFETY: The memory after `self.len()` is guaranteed to contain `count` initialized
+ // elements of type `T`.
+ unsafe { slice::from_raw_parts_mut(self.as_mut_ptr().add(self.len), count) }
}
/// Returns a slice of the entire vector.
@@ -262,8 +285,8 @@ where
/// Returns a slice of `MaybeUninit<T>` for the remaining spare capacity of the vector.
pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit<T>] {
// SAFETY:
- // - `self.len` is smaller than `self.capacity` and hence, the resulting pointer is
- // guaranteed to be part of the same allocated object.
+ // - `self.len` is smaller than `self.capacity` by the type invariant and hence, the
+ // resulting pointer is guaranteed to be part of the same allocated object.
// - `self.len` can not overflow `isize`.
let ptr = unsafe { self.as_mut_ptr().add(self.len) } as *mut MaybeUninit<T>;
@@ -287,24 +310,170 @@ where
/// ```
pub fn push(&mut self, v: T, flags: Flags) -> Result<(), AllocError> {
self.reserve(1, flags)?;
+ // SAFETY: The call to `reserve` was successful, so the capacity is at least one greater
+ // than the length.
+ unsafe { self.push_within_capacity_unchecked(v) };
+ Ok(())
+ }
- // SAFETY:
- // - `self.len` is smaller than `self.capacity` and hence, the resulting pointer is
- // guaranteed to be part of the same allocated object.
- // - `self.len` can not overflow `isize`.
- let ptr = unsafe { self.as_mut_ptr().add(self.len) };
+ /// Appends an element to the back of the [`Vec`] instance without reallocating.
+ ///
+ /// Fails if the vector does not have capacity for the new element.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::with_capacity(10, GFP_KERNEL)?;
+ /// for i in 0..10 {
+ /// v.push_within_capacity(i)?;
+ /// }
+ ///
+ /// assert!(v.push_within_capacity(10).is_err());
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn push_within_capacity(&mut self, v: T) -> Result<(), PushError<T>> {
+ if self.len() < self.capacity() {
+ // SAFETY: The length is less than the capacity.
+ unsafe { self.push_within_capacity_unchecked(v) };
+ Ok(())
+ } else {
+ Err(PushError(v))
+ }
+ }
- // SAFETY:
- // - `ptr` is properly aligned and valid for writes.
- unsafe { core::ptr::write(ptr, v) };
+ /// Appends an element to the back of the [`Vec`] instance without reallocating.
+ ///
+ /// # Safety
+ ///
+ /// The length must be less than the capacity.
+ unsafe fn push_within_capacity_unchecked(&mut self, v: T) {
+ let spare = self.spare_capacity_mut();
+
+ // SAFETY: By the safety requirements, `spare` is non-empty.
+ unsafe { spare.get_unchecked_mut(0) }.write(v);
// SAFETY: We just initialised the first spare entry, so it is safe to increase the length
- // by 1. We also know that the new length is <= capacity because of the previous call to
- // `reserve` above.
- unsafe { self.set_len(self.len() + 1) };
+ // by 1. We also know that the new length is <= capacity because the caller guarantees that
+ // the length is less than the capacity at the beginning of this function.
+ unsafe { self.inc_len(1) };
+ }
+
+ /// Inserts an element at the given index in the [`Vec`] instance.
+ ///
+ /// Fails if the vector does not have capacity for the new element. Panics if the index is out
+ /// of bounds.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::kvec::InsertError;
+ ///
+ /// let mut v = KVec::with_capacity(5, GFP_KERNEL)?;
+ /// for i in 0..5 {
+ /// v.insert_within_capacity(0, i)?;
+ /// }
+ ///
+ /// assert!(matches!(v.insert_within_capacity(0, 5), Err(InsertError::OutOfCapacity(_))));
+ /// assert!(matches!(v.insert_within_capacity(1000, 5), Err(InsertError::IndexOutOfBounds(_))));
+ /// assert_eq!(v, [4, 3, 2, 1, 0]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn insert_within_capacity(
+ &mut self,
+ index: usize,
+ element: T,
+ ) -> Result<(), InsertError<T>> {
+ let len = self.len();
+ if index > len {
+ return Err(InsertError::IndexOutOfBounds(element));
+ }
+
+ if len >= self.capacity() {
+ return Err(InsertError::OutOfCapacity(element));
+ }
+
+ // SAFETY: This is in bounds since `index <= len < capacity`.
+ let p = unsafe { self.as_mut_ptr().add(index) };
+ // INVARIANT: This breaks the Vec invariants by making `index` contain an invalid element,
+ // but we restore the invariants below.
+ // SAFETY: Both the src and dst ranges end no later than one element after the length.
+ // Since the length is less than the capacity, both ranges are in bounds of the allocation.
+ unsafe { ptr::copy(p, p.add(1), len - index) };
+ // INVARIANT: This restores the Vec invariants.
+ // SAFETY: The pointer is in-bounds of the allocation.
+ unsafe { ptr::write(p, element) };
+ // SAFETY: Index `len` contains a valid element due to the above copy and write.
+ unsafe { self.inc_len(1) };
Ok(())
}
+ /// Removes the last element from a vector and returns it, or `None` if it is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ /// v.push(2, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 2]);
+ ///
+ /// assert_eq!(v.pop(), Some(2));
+ /// assert_eq!(v.pop(), Some(1));
+ /// assert_eq!(v.pop(), None);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn pop(&mut self) -> Option<T> {
+ if self.is_empty() {
+ return None;
+ }
+
+ let removed: *mut T = {
+ // SAFETY: We just checked that the length is at least one.
+ let slice = unsafe { self.dec_len(1) };
+ // SAFETY: The argument to `dec_len` was 1 so this returns a slice of length 1.
+ unsafe { slice.get_unchecked_mut(0) }
+ };
+
+ // SAFETY: The guarantees of `dec_len` allow us to take ownership of this value.
+ Some(unsafe { removed.read() })
+ }
+
+ /// Removes the element at the given index.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![1, 2, 3]?;
+ /// assert_eq!(v.remove(1)?, 2);
+ /// assert_eq!(v, [1, 3]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn remove(&mut self, i: usize) -> Result<T, RemoveError> {
+ let value = {
+ let value_ref = self.get(i).ok_or(RemoveError)?;
+ // INVARIANT: This breaks the invariants by invalidating the value at index `i`, but we
+ // restore the invariants below.
+ // SAFETY: The value at index `i` is valid, because otherwise we would have already
+ // failed with `RemoveError`.
+ unsafe { ptr::read(value_ref) }
+ };
+
+ // SAFETY: We checked that `i` is in-bounds.
+ let p = unsafe { self.as_mut_ptr().add(i) };
+
+ // INVARIANT: After this call, the invalid value is at the last slot, so the Vec invariants
+ // are restored after the below call to `dec_len(1)`.
+ // SAFETY: `p.add(1).add(self.len - i - 1)` is `i+1+len-i-1 == len` elements after the
+ // beginning of the vector, so this is in-bounds of the vector's allocation.
+ unsafe { ptr::copy(p.add(1), p, self.len - i - 1) };
+
+ // SAFETY: Since the check at the beginning of this call did not fail with `RemoveError`,
+ // the length is at least one.
+ unsafe { self.dec_len(1) };
+
+ Ok(value)
+ }
+
/// Creates a new [`Vec`] instance with at least the given capacity.
///
/// # Examples
@@ -398,6 +567,26 @@ where
(ptr, len, capacity)
}
+ /// Clears the vector, removing all values.
+ ///
+ /// Note that this method has no effect on the allocated capacity
+ /// of the vector.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![1, 2, 3]?;
+ ///
+ /// v.clear();
+ ///
+ /// assert!(v.is_empty());
+ /// # Ok::<(), Error>(())
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ self.truncate(0);
+ }
+
/// Ensures that the capacity exceeds the length by at least `additional` elements.
///
/// # Examples
@@ -455,6 +644,80 @@ where
Ok(())
}
+
+ /// Shortens the vector, setting the length to `len` and drops the removed values.
+ /// If `len` is greater than or equal to the current length, this does nothing.
+ ///
+ /// This has no effect on the capacity and will not allocate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![1, 2, 3]?;
+ /// v.truncate(1);
+ /// assert_eq!(v.len(), 1);
+ /// assert_eq!(&v, &[1]);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn truncate(&mut self, len: usize) {
+ if let Some(count) = self.len().checked_sub(len) {
+ // SAFETY: `count` is `self.len() - len` so it is guaranteed to be less than or
+ // equal to `self.len()`.
+ let ptr: *mut [T] = unsafe { self.dec_len(count) };
+
+ // SAFETY: the contract of `dec_len` guarantees that the elements in `ptr` are
+ // valid elements whose ownership has been transferred to the caller.
+ unsafe { ptr::drop_in_place(ptr) };
+ }
+ }
+
+ /// Takes ownership of all items in this vector without consuming the allocation.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![0, 1, 2, 3]?;
+ ///
+ /// for (i, j) in v.drain_all().enumerate() {
+ /// assert_eq!(i, j);
+ /// }
+ ///
+ /// assert!(v.capacity() >= 4);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn drain_all(&mut self) -> DrainAll<'_, T> {
+ // SAFETY: This does not underflow the length.
+ let elems = unsafe { self.dec_len(self.len()) };
+ // INVARIANT: The first `len` elements of the spare capacity are valid values, and as we
+ // just set the length to zero, we may transfer ownership to the `DrainAll` object.
+ DrainAll {
+ elements: elems.iter_mut(),
+ }
+ }
+
+ /// Removes all elements that don't match the provided closure.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![1, 2, 3, 4]?;
+ /// v.retain(|i| *i % 2 == 0);
+ /// assert_eq!(v, [2, 4]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn retain(&mut self, mut f: impl FnMut(&mut T) -> bool) {
+ let mut num_kept = 0;
+ let mut next_to_check = 0;
+ while let Some(to_check) = self.get_mut(next_to_check) {
+ if f(to_check) {
+ self.swap(num_kept, next_to_check);
+ num_kept += 1;
+ }
+ next_to_check += 1;
+ }
+ self.truncate(num_kept);
+ }
}
impl<T: Clone, A: Allocator> Vec<T, A> {
@@ -478,7 +741,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
// SAFETY:
// - `self.len() + n < self.capacity()` due to the call to reserve above,
// - the loop and the line above initialized the next `n` elements.
- unsafe { self.set_len(self.len() + n) };
+ unsafe { self.inc_len(n) };
Ok(())
}
@@ -509,7 +772,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
// the length by the same number.
// - `self.len() + other.len() <= self.capacity()` is guaranteed by the preceding `reserve`
// call.
- unsafe { self.set_len(self.len() + other.len()) };
+ unsafe { self.inc_len(other.len()) };
Ok(())
}
@@ -521,6 +784,33 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
Ok(v)
}
+
+ /// Resizes the [`Vec`] so that `len` is equal to `new_len`.
+ ///
+ /// If `new_len` is smaller than `len`, the `Vec` is [`Vec::truncate`]d.
+ /// If `new_len` is larger, each new slot is filled with clones of `value`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![1, 2, 3]?;
+ /// v.resize(1, 42, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1]);
+ ///
+ /// v.resize(3, 42, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 42, 42]);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn resize(&mut self, new_len: usize, value: T, flags: Flags) -> Result<(), AllocError> {
+ match new_len.checked_sub(self.len()) {
+ Some(n) => self.extend_with(n, value, flags),
+ None => {
+ self.truncate(new_len);
+ Ok(())
+ }
+ }
+ }
}
impl<T, A> Drop for Vec<T, A>
@@ -760,12 +1050,13 @@ where
unsafe { ptr::copy(ptr, buf.as_ptr(), len) };
ptr = buf.as_ptr();
- // SAFETY: `len` is guaranteed to be smaller than `self.layout.len()`.
+ // SAFETY: `len` is guaranteed to be smaller than `self.layout.len()` by the type
+ // invariant.
let layout = unsafe { ArrayLayout::<T>::new_unchecked(len) };
- // SAFETY: `buf` points to the start of the backing buffer and `len` is guaranteed to be
- // smaller than `cap`. Depending on `alloc` this operation may shrink the buffer or leaves
- // it as it is.
+ // SAFETY: `buf` points to the start of the backing buffer and `len` is guaranteed by
+ // the type invariant to be smaller than `cap`. Depending on `realloc` this operation
+ // may shrink the buffer or leave it as it is.
ptr = match unsafe {
A::realloc(Some(buf.cast()), layout.into(), old_layout.into(), flags)
} {
@@ -914,3 +1205,87 @@ where
}
}
}
+
+/// An iterator that owns all items in a vector, but does not own its allocation.
+///
+/// # Invariants
+///
+/// Every `&mut T` returned by the iterator references a `T` that the iterator may take ownership
+/// of.
+pub struct DrainAll<'vec, T> {
+ elements: slice::IterMut<'vec, T>,
+}
+
+impl<'vec, T> Iterator for DrainAll<'vec, T> {
+ type Item = T;
+
+ fn next(&mut self) -> Option<T> {
+ let elem: *mut T = self.elements.next()?;
+ // SAFETY: By the type invariants, we may take ownership of this value.
+ Some(unsafe { elem.read() })
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.elements.size_hint()
+ }
+}
+
+impl<'vec, T> Drop for DrainAll<'vec, T> {
+ fn drop(&mut self) {
+ if core::mem::needs_drop::<T>() {
+ let iter = core::mem::take(&mut self.elements);
+ let ptr: *mut [T] = iter.into_slice();
+ // SAFETY: By the type invariants, we own these values so we may destroy them.
+ unsafe { ptr::drop_in_place(ptr) };
+ }
+ }
+}
+
+#[macros::kunit_tests(rust_kvec_kunit)]
+mod tests {
+ use super::*;
+ use crate::prelude::*;
+
+ #[test]
+ fn test_kvec_retain() {
+ /// Verify correctness for one specific function.
+ #[expect(clippy::needless_range_loop)]
+ fn verify(c: &[bool]) {
+ let mut vec1: KVec<usize> = KVec::with_capacity(c.len(), GFP_KERNEL).unwrap();
+ let mut vec2: KVec<usize> = KVec::with_capacity(c.len(), GFP_KERNEL).unwrap();
+
+ for i in 0..c.len() {
+ vec1.push_within_capacity(i).unwrap();
+ if c[i] {
+ vec2.push_within_capacity(i).unwrap();
+ }
+ }
+
+ vec1.retain(|i| c[*i]);
+
+ assert_eq!(vec1, vec2);
+ }
+
+ /// Add one to a binary integer represented as a boolean array.
+ fn add(value: &mut [bool]) {
+ let mut carry = true;
+ for v in value {
+ let new_v = carry != *v;
+ carry = carry && *v;
+ *v = new_v;
+ }
+ }
+
+ // This boolean array represents a function from index to boolean. We check that `retain`
+ // behaves correctly for all possible boolean arrays of every possible length less than
+ // ten.
+ let mut func = KVec::with_capacity(10, GFP_KERNEL).unwrap();
+ for len in 0..10 {
+ for _ in 0u32..1u32 << len {
+ verify(&func);
+ add(&mut func);
+ }
+ func.push_within_capacity(false).unwrap();
+ }
+ }
+}
diff --git a/rust/kernel/alloc/kvec/errors.rs b/rust/kernel/alloc/kvec/errors.rs
new file mode 100644
index 000000000000..348b8d27e102
--- /dev/null
+++ b/rust/kernel/alloc/kvec/errors.rs
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Errors for the [`Vec`] type.
+
+use core::fmt::{self, Debug, Formatter};
+use kernel::prelude::*;
+
+/// Error type for [`Vec::push_within_capacity`].
+pub struct PushError<T>(pub T);
+
+impl<T> Debug for PushError<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(f, "Not enough capacity")
+ }
+}
+
+impl<T> From<PushError<T>> for Error {
+ fn from(_: PushError<T>) -> Error {
+ // Returning ENOMEM isn't appropriate because the system is not out of memory. The vector
+ // is just full and we are refusing to resize it.
+ EINVAL
+ }
+}
+
+/// Error type for [`Vec::remove`].
+pub struct RemoveError;
+
+impl Debug for RemoveError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(f, "Index out of bounds")
+ }
+}
+
+impl From<RemoveError> for Error {
+ fn from(_: RemoveError) -> Error {
+ EINVAL
+ }
+}
+
+/// Error type for [`Vec::insert_within_capacity`].
+pub enum InsertError<T> {
+ /// The value could not be inserted because the index is out of bounds.
+ IndexOutOfBounds(T),
+ /// The value could not be inserted because the vector is out of capacity.
+ OutOfCapacity(T),
+}
+
+impl<T> Debug for InsertError<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match self {
+ InsertError::IndexOutOfBounds(_) => write!(f, "Index out of bounds"),
+ InsertError::OutOfCapacity(_) => write!(f, "Not enough capacity"),
+ }
+ }
+}
+
+impl<T> From<InsertError<T>> for Error {
+ fn from(_: InsertError<T>) -> Error {
+ EINVAL
+ }
+}
diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs
index 5c072960dee0..d2cfe1eeefb6 100644
--- a/rust/kernel/auxiliary.rs
+++ b/rust/kernel/auxiliary.rs
@@ -73,7 +73,9 @@ impl<T: Driver + 'static> Adapter<T> {
// Let the `struct auxiliary_device` own a reference of the driver's private data.
// SAFETY: By the type invariant `adev.as_raw` returns a valid pointer to a
// `struct auxiliary_device`.
- unsafe { bindings::auxiliary_set_drvdata(adev.as_raw(), data.into_foreign()) };
+ unsafe {
+ bindings::auxiliary_set_drvdata(adev.as_raw(), data.into_foreign().cast())
+ };
}
Err(err) => return Error::to_errno(err),
}
@@ -89,7 +91,7 @@ impl<T: Driver + 'static> Adapter<T> {
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized
// `KBox<T>` pointer created through `KBox::into_foreign`.
- drop(unsafe { KBox::<T>::from_foreign(ptr) });
+ drop(unsafe { KBox::<T>::from_foreign(ptr.cast()) });
}
}
@@ -234,7 +236,7 @@ impl Device {
extern "C" fn release(dev: *mut bindings::device) {
// SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device`
// embedded in `struct auxiliary_device`.
- let adev = unsafe { container_of!(dev, bindings::auxiliary_device, dev) }.cast_mut();
+ let adev = unsafe { container_of!(dev, bindings::auxiliary_device, dev) };
// SAFETY: `adev` points to the memory that has been allocated in `Registration::new`, via
// `KBox::new(Opaque::<bindings::auxiliary_device>::zeroed(), GFP_KERNEL)`.
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 14806e1997fd..cd54cd64ea88 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -129,7 +129,7 @@ impl GenDiskBuilder {
get_unique_id: None,
// TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
// be merged (unstable in rustc 1.78 which is staged for linux 6.10)
- // https://github.com/rust-lang/rust/issues/119618
+ // <https://github.com/rust-lang/rust/issues/119618>
owner: core::ptr::null_mut(),
pr_ops: core::ptr::null_mut(),
free_disk: None,
diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs
index b93ac7b0bebc..34d0bea4f9a5 100644
--- a/rust/kernel/configfs.rs
+++ b/rust/kernel/configfs.rs
@@ -554,7 +554,7 @@ where
let c_group: *mut bindings::config_group =
// SAFETY: By function safety requirements, `item` is embedded in a
// `config_group`.
- unsafe { container_of!(item, bindings::config_group, cg_item) }.cast_mut();
+ unsafe { container_of!(item, bindings::config_group, cg_item) };
// SAFETY: The function safety requirements for this function satisfy
// the conditions for this call.
@@ -588,7 +588,7 @@ where
let c_group: *mut bindings::config_group =
// SAFETY: By function safety requirements, `item` is embedded in a
// `config_group`.
- unsafe { container_of!(item, bindings::config_group, cg_item) }.cast_mut();
+ unsafe { container_of!(item, bindings::config_group, cg_item) };
// SAFETY: The function safety requirements for this function satisfy
// the conditions for this call.
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index 09b856bb297b..b0a9c6182aec 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -635,7 +635,7 @@ impl Policy {
None
} else {
// SAFETY: The data is earlier set from [`set_data`].
- Some(unsafe { T::borrow(self.as_ref().driver_data) })
+ Some(unsafe { T::borrow(self.as_ref().driver_data.cast()) })
}
}
@@ -662,7 +662,7 @@ impl Policy {
let data = Some(
// SAFETY: The data is earlier set by us from [`set_data`]. It is safe to take
// back the ownership of the data from the foreign interface.
- unsafe { <T as ForeignOwnable>::from_foreign(self.as_ref().driver_data) },
+ unsafe { <T as ForeignOwnable>::from_foreign(self.as_ref().driver_data.cast()) },
);
self.as_mut_ref().driver_data = ptr::null_mut();
data
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index f08583fa39c9..dea06b79ecb5 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -345,7 +345,7 @@ macro_rules! impl_device_context_into_aref {
macro_rules! dev_printk {
($method:ident, $dev:expr, $($f:tt)*) => {
{
- ($dev).$method(core::format_args!($($f)*));
+ ($dev).$method(::core::format_args!($($f)*));
}
}
}
@@ -357,9 +357,10 @@ macro_rules! dev_printk {
/// Equivalent to the kernel's `dev_emerg` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -382,9 +383,10 @@ macro_rules! dev_emerg {
/// Equivalent to the kernel's `dev_alert` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -407,9 +409,10 @@ macro_rules! dev_alert {
/// Equivalent to the kernel's `dev_crit` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -432,9 +435,10 @@ macro_rules! dev_crit {
/// Equivalent to the kernel's `dev_err` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -457,9 +461,10 @@ macro_rules! dev_err {
/// Equivalent to the kernel's `dev_warn` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -482,9 +487,10 @@ macro_rules! dev_warn {
/// Equivalent to the kernel's `dev_notice` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -507,9 +513,10 @@ macro_rules! dev_notice {
/// Equivalent to the kernel's `dev_info` macro.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -532,9 +539,10 @@ macro_rules! dev_info {
/// Equivalent to the kernel's `dev_dbg` macro, except that it doesn't support dynamic debug yet.
///
/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
-/// [`core::fmt`] and `alloc::format!`.
+/// [`core::fmt`] and [`std::format!`].
///
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs
index e5859217a579..0a4eb56d98f2 100644
--- a/rust/kernel/device_id.rs
+++ b/rust/kernel/device_id.rs
@@ -159,7 +159,7 @@ macro_rules! module_device_table {
"_", line!(),
"_", stringify!($table_name))
]
- static $module_table_name: [core::mem::MaybeUninit<u8>; $table_name.raw_ids().size()] =
- unsafe { core::mem::transmute_copy($table_name.raw_ids()) };
+ static $module_table_name: [::core::mem::MaybeUninit<u8>; $table_name.raw_ids().size()] =
+ unsafe { ::core::mem::transmute_copy($table_name.raw_ids()) };
};
}
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 605e01e35715..a33261c62e0c 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -94,7 +94,7 @@ pub mod attrs {
pub const DMA_ATTR_ALLOC_SINGLE_PAGES: Attrs = Attrs(bindings::DMA_ATTR_ALLOC_SINGLE_PAGES);
/// This tells the DMA-mapping subsystem to suppress allocation failure reports (similarly to
- /// __GFP_NOWARN).
+ /// `__GFP_NOWARN`).
pub const DMA_ATTR_NO_WARN: Attrs = Attrs(bindings::DMA_ATTR_NO_WARN);
/// Used to indicate that the buffer is fully accessible at an elevated privilege level (and
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 74c9a3dd719e..624d7a4c83ea 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -135,6 +135,8 @@ impl<T: drm::Driver> Device<T> {
///
/// `ptr` must be a valid pointer to a `struct device` embedded in `Self`.
unsafe fn from_drm_device(ptr: *const bindings::drm_device) -> *mut Self {
+ let ptr: *const Opaque<bindings::drm_device> = ptr.cast();
+
// SAFETY: By the safety requirements of this function `ptr` is a valid pointer to a
// `struct drm_device` embedded in `Self`.
unsafe { crate::container_of!(ptr, Self, dev) }.cast_mut()
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index d8765e61c6c2..4cd69fa84318 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -125,6 +125,8 @@ impl<T: DriverObject> IntoGEMObject for Object<T> {
}
unsafe fn as_ref<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self {
+ let self_ptr: *mut Opaque<bindings::drm_gem_object> = self_ptr.cast();
+
// SAFETY: `obj` is guaranteed to be in an `Object<T>` via the safety contract of this
// function
unsafe { &*crate::container_of!(self_ptr, Object<T>, obj) }
@@ -269,8 +271,10 @@ impl<T: DriverObject> Object<T> {
}
extern "C" fn free_callback(obj: *mut bindings::drm_gem_object) {
+ let ptr: *mut Opaque<bindings::drm_gem_object> = obj.cast();
+
// SAFETY: All of our objects are of type `Object<T>`.
- let this = unsafe { crate::container_of!(obj, Self, obj) }.cast_mut();
+ let this = unsafe { crate::container_of!(ptr, Self, obj) };
// SAFETY: The C code only ever calls this callback with a valid pointer to a `struct
// drm_gem_object`.
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index 1604fb6a5b1b..4b8cdcb21e77 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -6,6 +6,7 @@
//!
//! Reference: <https://docs.kernel.org/dev-tools/kunit/index.html>
+use crate::prelude::*;
use core::{ffi::c_void, fmt};
/// Prints a KUnit error-level message.
@@ -40,8 +41,6 @@ pub fn info(args: fmt::Arguments<'_>) {
}
}
-use macros::kunit_tests;
-
/// Asserts that a boolean expression is `true` at runtime.
///
/// Public but hidden since it should only be used from generated tests.
@@ -59,7 +58,7 @@ macro_rules! kunit_assert {
}
static FILE: &'static $crate::str::CStr = $crate::c_str!($file);
- static LINE: i32 = core::line!() as i32 - $diff;
+ static LINE: i32 = ::core::line!() as i32 - $diff;
static CONDITION: &'static $crate::str::CStr = $crate::c_str!(stringify!($condition));
// SAFETY: FFI call without safety requirements.
@@ -130,11 +129,11 @@ macro_rules! kunit_assert {
unsafe {
$crate::bindings::__kunit_do_failed_assertion(
kunit_test,
- core::ptr::addr_of!(LOCATION.0),
+ ::core::ptr::addr_of!(LOCATION.0),
$crate::bindings::kunit_assert_type_KUNIT_ASSERTION,
- core::ptr::addr_of!(ASSERTION.0.assert),
+ ::core::ptr::addr_of!(ASSERTION.0.assert),
Some($crate::bindings::kunit_unary_assert_format),
- core::ptr::null(),
+ ::core::ptr::null(),
);
}
@@ -164,6 +163,31 @@ macro_rules! kunit_assert_eq {
}};
}
+trait TestResult {
+ fn is_test_result_ok(&self) -> bool;
+}
+
+impl TestResult for () {
+ fn is_test_result_ok(&self) -> bool {
+ true
+ }
+}
+
+impl<T, E> TestResult for Result<T, E> {
+ fn is_test_result_ok(&self) -> bool {
+ self.is_ok()
+ }
+}
+
+/// Returns whether a test result is to be considered OK.
+///
+/// This will be `assert!`ed from the generated tests.
+#[doc(hidden)]
+#[expect(private_bounds)]
+pub fn is_test_result_ok(t: impl TestResult) -> bool {
+ t.is_test_result_ok()
+}
+
/// Represents an individual test case.
///
/// The [`kunit_unsafe_test_suite!`] macro expects a NULL-terminated list of valid test cases.
@@ -323,7 +347,6 @@ mod tests {
#[test]
fn rust_test_kunit_example_test() {
- #![expect(clippy::eq_op)]
assert_eq!(1 + 1, 2);
}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 911c72a0fc21..6b4774b2b1c3 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -12,20 +12,34 @@
//! do so first instead of bypassing this crate.
#![no_std]
-#![feature(arbitrary_self_types)]
-#![cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, feature(derive_coerce_pointee))]
-#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(coerce_unsized))]
-#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(dispatch_from_dyn))]
-#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(unsize))]
+//
+// Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on
+// the unstable features in use.
+//
+// Stable since Rust 1.79.0.
#![feature(inline_const)]
+//
+// Stable since Rust 1.81.0.
#![feature(lint_reasons)]
-// Stable in Rust 1.82
+//
+// Stable since Rust 1.82.0.
#![feature(raw_ref_op)]
-// Stable in Rust 1.83
+//
+// Stable since Rust 1.83.0.
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_mut_refs)]
#![feature(const_ptr_write)]
#![feature(const_refs_to_cell)]
+//
+// Expected to become stable.
+#![feature(arbitrary_self_types)]
+//
+// `feature(derive_coerce_pointee)` is expected to become stable. Before Rust
+// 1.84.0, it did not exist, so enable the predecessor features.
+#![cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, feature(derive_coerce_pointee))]
+#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(coerce_unsized))]
+#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(dispatch_from_dyn))]
+#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(unsize))]
// Ensure conditional compilation based on the kernel configuration works;
// otherwise we may silently break things like initcall handling.
@@ -102,6 +116,7 @@ pub mod transmute;
pub mod types;
pub mod uaccess;
pub mod workqueue;
+pub mod xarray;
#[doc(hidden)]
pub use bindings;
@@ -204,7 +219,7 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
/// }
///
/// let test = Test { a: 10, b: 20 };
-/// let b_ptr = &test.b;
+/// let b_ptr: *const _ = &test.b;
/// // SAFETY: The pointer points at the `b` field of a `Test`, so the resulting pointer will be
/// // in-bounds of the same allocation as `b_ptr`.
/// let test_alias = unsafe { container_of!(b_ptr, Test, b) };
@@ -212,13 +227,19 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
/// ```
#[macro_export]
macro_rules! container_of {
- ($ptr:expr, $type:ty, $($f:tt)*) => {{
- let ptr = $ptr as *const _ as *const u8;
- let offset: usize = ::core::mem::offset_of!($type, $($f)*);
- ptr.sub(offset) as *const $type
+ ($field_ptr:expr, $Container:ty, $($fields:tt)*) => {{
+ let offset: usize = ::core::mem::offset_of!($Container, $($fields)*);
+ let field_ptr = $field_ptr;
+ let container_ptr = field_ptr.byte_sub(offset).cast::<$Container>();
+ $crate::assert_same_type(field_ptr, (&raw const (*container_ptr).$($fields)*).cast_mut());
+ container_ptr
}}
}
+/// Helper for [`container_of!`].
+#[doc(hidden)]
+pub fn assert_same_type<T>(_: T, _: T) {}
+
/// Helper for `.rs.S` files.
#[doc(hidden)]
#[macro_export]
diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs
index 2054682c5724..c391c30b80f8 100644
--- a/rust/kernel/list.rs
+++ b/rust/kernel/list.rs
@@ -4,9 +4,6 @@
//! A linked list implementation.
-// May not be needed in Rust 1.87.0 (pending beta backport).
-#![allow(clippy::ptr_eq)]
-
use crate::sync::ArcBorrow;
use crate::types::Opaque;
use core::iter::{DoubleEndedIterator, FusedIterator};
@@ -38,6 +35,114 @@ pub use self::arc_field::{define_list_arc_field_getter, ListArcField};
/// * All prev/next pointers in `ListLinks` fields of items in the list are valid and form a cycle.
/// * For every item in the list, the list owns the associated [`ListArc`] reference and has
/// exclusive access to the `ListLinks` field.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::list::*;
+///
+/// #[pin_data]
+/// struct BasicItem {
+/// value: i32,
+/// #[pin]
+/// links: ListLinks,
+/// }
+///
+/// impl BasicItem {
+/// fn new(value: i32) -> Result<ListArc<Self>> {
+/// ListArc::pin_init(try_pin_init!(Self {
+/// value,
+/// links <- ListLinks::new(),
+/// }), GFP_KERNEL)
+/// }
+/// }
+///
+/// impl_has_list_links! {
+/// impl HasListLinks<0> for BasicItem { self.links }
+/// }
+/// impl_list_arc_safe! {
+/// impl ListArcSafe<0> for BasicItem { untracked; }
+/// }
+/// impl_list_item! {
+/// impl ListItem<0> for BasicItem { using ListLinks; }
+/// }
+///
+/// // Create a new empty list.
+/// let mut list = List::new();
+/// {
+/// assert!(list.is_empty());
+/// }
+///
+/// // Insert 3 elements using `push_back()`.
+/// list.push_back(BasicItem::new(15)?);
+/// list.push_back(BasicItem::new(10)?);
+/// list.push_back(BasicItem::new(30)?);
+///
+/// // Iterate over the list to verify the nodes were inserted correctly.
+/// // [15, 10, 30]
+/// {
+/// let mut iter = list.iter();
+/// assert_eq!(iter.next().unwrap().value, 15);
+/// assert_eq!(iter.next().unwrap().value, 10);
+/// assert_eq!(iter.next().unwrap().value, 30);
+/// assert!(iter.next().is_none());
+///
+/// // Verify the length of the list.
+/// assert_eq!(list.iter().count(), 3);
+/// }
+///
+/// // Pop the items from the list using `pop_back()` and verify the content.
+/// {
+/// assert_eq!(list.pop_back().unwrap().value, 30);
+/// assert_eq!(list.pop_back().unwrap().value, 10);
+/// assert_eq!(list.pop_back().unwrap().value, 15);
+/// }
+///
+/// // Insert 3 elements using `push_front()`.
+/// list.push_front(BasicItem::new(15)?);
+/// list.push_front(BasicItem::new(10)?);
+/// list.push_front(BasicItem::new(30)?);
+///
+/// // Iterate over the list to verify the nodes were inserted correctly.
+/// // [30, 10, 15]
+/// {
+/// let mut iter = list.iter();
+/// assert_eq!(iter.next().unwrap().value, 30);
+/// assert_eq!(iter.next().unwrap().value, 10);
+/// assert_eq!(iter.next().unwrap().value, 15);
+/// assert!(iter.next().is_none());
+///
+/// // Verify the length of the list.
+/// assert_eq!(list.iter().count(), 3);
+/// }
+///
+/// // Pop the items from the list using `pop_front()` and verify the content.
+/// {
+/// assert_eq!(list.pop_front().unwrap().value, 30);
+/// assert_eq!(list.pop_front().unwrap().value, 10);
+/// }
+///
+/// // Push `list2` to `list` through `push_all_back()`.
+/// // list: [15]
+/// // list2: [25, 35]
+/// {
+/// let mut list2 = List::new();
+/// list2.push_back(BasicItem::new(25)?);
+/// list2.push_back(BasicItem::new(35)?);
+///
+/// list.push_all_back(&mut list2);
+///
+/// // list: [15, 25, 35]
+/// // list2: []
+/// let mut iter = list.iter();
+/// assert_eq!(iter.next().unwrap().value, 15);
+/// assert_eq!(iter.next().unwrap().value, 25);
+/// assert_eq!(iter.next().unwrap().value, 35);
+/// assert!(iter.next().is_none());
+/// assert!(list2.is_empty());
+/// }
+/// # Result::<(), Error>::Ok(())
+/// ```
pub struct List<T: ?Sized + ListItem<ID>, const ID: u64 = 0> {
first: *mut ListLinksFields,
_ty: PhantomData<ListArc<T, ID>>,
@@ -322,7 +427,7 @@ impl<T: ?Sized + ListItem<ID>, const ID: u64> List<T, ID> {
/// Removes the last item from this list.
pub fn pop_back(&mut self) -> Option<ListArc<T, ID>> {
- if self.first.is_null() {
+ if self.is_empty() {
return None;
}
@@ -334,7 +439,7 @@ impl<T: ?Sized + ListItem<ID>, const ID: u64> List<T, ID> {
/// Removes the first item from this list.
pub fn pop_front(&mut self) -> Option<ListArc<T, ID>> {
- if self.first.is_null() {
+ if self.is_empty() {
return None;
}
diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs
index 13c50df37b89..d92bcf665c89 100644
--- a/rust/kernel/list/arc.rs
+++ b/rust/kernel/list/arc.rs
@@ -74,7 +74,7 @@ pub unsafe trait TryNewListArc<const ID: u64 = 0>: ListArcSafe<ID> {
///
/// * The `untracked` strategy does not actually keep track of whether a [`ListArc`] exists. When
/// using this strategy, the only way to create a [`ListArc`] is using a [`UniqueArc`].
-/// * The `tracked_by` strategy defers the tracking to a field of the struct. The user much specify
+/// * The `tracked_by` strategy defers the tracking to a field of the struct. The user must specify
/// which field to defer the tracking to. The field must implement [`ListArcSafe`]. If the field
/// implements [`TryNewListArc`], then the type will also implement [`TryNewListArc`].
///
@@ -96,7 +96,7 @@ macro_rules! impl_list_arc_safe {
} $($rest:tt)*) => {
impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t {
unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) {
- $crate::assert_pinned!($t, $field, $fty, inline);
+ ::pin_init::assert_pinned!($t, $field, $fty, inline);
// SAFETY: This field is structurally pinned as per the above assertion.
let field = unsafe {
@@ -464,7 +464,7 @@ where
/// A utility for tracking whether a [`ListArc`] exists using an atomic.
///
-/// # Invariant
+/// # Invariants
///
/// If the boolean is `false`, then there is no [`ListArc`] for this value.
#[repr(transparent)]
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
index 9d9771247c38..f33c13c3ff97 100644
--- a/rust/kernel/miscdevice.rs
+++ b/rust/kernel/miscdevice.rs
@@ -217,7 +217,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
// type.
//
// SAFETY: The open call of a file can access the private data.
- unsafe { (*raw_file).private_data = ptr.into_foreign() };
+ unsafe { (*raw_file).private_data = ptr.into_foreign().cast() };
0
}
@@ -228,7 +228,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
/// must be associated with a `MiscDeviceRegistration<T>`.
unsafe extern "C" fn release(_inode: *mut bindings::inode, file: *mut bindings::file) -> c_int {
// SAFETY: The release call of a file owns the private data.
- let private = unsafe { (*file).private_data };
+ let private = unsafe { (*file).private_data }.cast();
// SAFETY: The release call of a file owns the private data.
let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) };
@@ -253,7 +253,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
// SAFETY: This is a Rust Miscdevice, so we call `into_foreign` in `open` and
// `from_foreign` in `release`, and `fops_mmap` is guaranteed to be called between those
// two operations.
- let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private.cast()) };
// SAFETY: The caller provides a vma that is undergoing initial VMA setup.
let area = unsafe { VmaNew::from_raw(vma) };
// SAFETY:
@@ -272,7 +272,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
/// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
unsafe extern "C" fn ioctl(file: *mut bindings::file, cmd: c_uint, arg: c_ulong) -> c_long {
// SAFETY: The ioctl call of a file can access the private data.
- let private = unsafe { (*file).private_data };
+ let private = unsafe { (*file).private_data }.cast();
// SAFETY: Ioctl calls can borrow the private data of the file.
let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
@@ -297,7 +297,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
arg: c_ulong,
) -> c_long {
// SAFETY: The compat ioctl call of a file can access the private data.
- let private = unsafe { (*file).private_data };
+ let private = unsafe { (*file).private_data }.cast();
// SAFETY: Ioctl calls can borrow the private data of the file.
let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
@@ -318,7 +318,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
/// - `seq_file` must be a valid `struct seq_file` that we can write to.
unsafe extern "C" fn show_fdinfo(seq_file: *mut bindings::seq_file, file: *mut bindings::file) {
// SAFETY: The release call of a file owns the private data.
- let private = unsafe { (*file).private_data };
+ let private = unsafe { (*file).private_data }.cast();
// SAFETY: Ioctl calls can borrow the private data of the file.
let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
// SAFETY:
diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs
index f6126aca33a6..7c1b17246ed5 100644
--- a/rust/kernel/page.rs
+++ b/rust/kernel/page.rs
@@ -69,6 +69,7 @@ impl Page {
/// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
/// # Ok::<(), kernel::alloc::AllocError>(())
/// ```
+ #[inline]
pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
// SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
// is always safe to call this method.
@@ -251,6 +252,7 @@ impl Page {
}
impl Drop for Page {
+ #[inline]
fn drop(&mut self) {
// SAFETY: By the type invariants, we have ownership of the page and can free it.
unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index 38fc8d5ffbf9..8435f8132e38 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -89,7 +89,7 @@ impl<T: Driver + 'static> Adapter<T> {
extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) {
// SAFETY: The PCI bus only ever calls the remove callback with a valid pointer to a
// `struct pci_dev`.
- let ptr = unsafe { bindings::pci_get_drvdata(pdev) };
+ let ptr = unsafe { bindings::pci_get_drvdata(pdev) }.cast();
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized
@@ -118,7 +118,9 @@ macro_rules! module_pci_driver {
};
}
-/// Abstraction for bindings::pci_device_id.
+/// Abstraction for the PCI device ID structure ([`struct pci_device_id`]).
+///
+/// [`struct pci_device_id`]: https://docs.kernel.org/PCI/pci.html#c.pci_device_id
#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct DeviceId(bindings::pci_device_id);
@@ -173,7 +175,7 @@ unsafe impl RawDeviceId for DeviceId {
}
}
-/// IdTable type for PCI
+/// `IdTable` type for PCI.
pub type IdTable<T> = &'static dyn kernel::device_id::IdTable<DeviceId, T>;
/// Create a PCI `IdTable` with its alias for modpost.
@@ -224,10 +226,11 @@ macro_rules! pci_device_table {
/// `Adapter` documentation for an example.
pub trait Driver: Send {
/// The type holding information about each device id supported by the driver.
- ///
- /// TODO: Use associated_type_defaults once stabilized:
- ///
- /// type IdInfo: 'static = ();
+ // TODO: Use `associated_type_defaults` once stabilized:
+ //
+ // ```
+ // type IdInfo: 'static = ();
+ // ```
type IdInfo: 'static;
/// The table of device ids supported by the driver.
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
index 08849d92c074..5b21fa517e55 100644
--- a/rust/kernel/platform.rs
+++ b/rust/kernel/platform.rs
@@ -79,7 +79,7 @@ impl<T: Driver + 'static> Adapter<T> {
extern "C" fn remove_callback(pdev: *mut bindings::platform_device) {
// SAFETY: `pdev` is a valid pointer to a `struct platform_device`.
- let ptr = unsafe { bindings::platform_get_drvdata(pdev) };
+ let ptr = unsafe { bindings::platform_get_drvdata(pdev) }.cast();
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized
@@ -150,10 +150,11 @@ macro_rules! module_platform_driver {
///```
pub trait Driver: Send {
/// The type holding driver private data about each device id supported by the driver.
- ///
- /// TODO: Use associated_type_defaults once stabilized:
- ///
- /// type IdInfo: 'static = ();
+ // TODO: Use associated_type_defaults once stabilized:
+ //
+ // ```
+ // type IdInfo: 'static = ();
+ // ```
type IdInfo: 'static;
/// The table of OF device ids supported by the driver.
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index baa774a351ce..2f30a398dddd 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -14,10 +14,15 @@
#[doc(no_inline)]
pub use core::pin::Pin;
+pub use ::ffi::{
+ c_char, c_int, c_long, c_longlong, c_schar, c_short, c_uchar, c_uint, c_ulong, c_ulonglong,
+ c_ushort, c_void,
+};
+
pub use crate::alloc::{flags::*, Box, KBox, KVBox, KVVec, KVec, VBox, VVec, Vec};
#[doc(no_inline)]
-pub use macros::{export, module, vtable};
+pub use macros::{export, kunit_tests, module, vtable};
pub use pin_init::{init, pin_data, pin_init, pinned_drop, InPlaceWrite, Init, PinInit, Zeroable};
diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs
index cf4714242e14..9783d960a97a 100644
--- a/rust/kernel/print.rs
+++ b/rust/kernel/print.rs
@@ -198,10 +198,11 @@ macro_rules! print_macro (
/// Equivalent to the kernel's [`pr_emerg`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_emerg`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_emerg
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -222,10 +223,11 @@ macro_rules! pr_emerg (
/// Equivalent to the kernel's [`pr_alert`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_alert`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_alert
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -246,10 +248,11 @@ macro_rules! pr_alert (
/// Equivalent to the kernel's [`pr_crit`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_crit`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_crit
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -270,10 +273,11 @@ macro_rules! pr_crit (
/// Equivalent to the kernel's [`pr_err`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_err`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_err
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -294,10 +298,11 @@ macro_rules! pr_err (
/// Equivalent to the kernel's [`pr_warn`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_warn`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_warn
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -318,10 +323,11 @@ macro_rules! pr_warn (
/// Equivalent to the kernel's [`pr_notice`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_notice`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_notice
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -342,10 +348,11 @@ macro_rules! pr_notice (
/// Equivalent to the kernel's [`pr_info`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_info`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_info
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -368,10 +375,11 @@ macro_rules! pr_info (
/// yet.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_debug`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_debug
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
@@ -395,11 +403,12 @@ macro_rules! pr_debug (
/// Equivalent to the kernel's [`pr_cont`] macro.
///
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
-/// `alloc::format!` for information about the formatting syntax.
+/// [`std::format!`] for information about the formatting syntax.
///
/// [`pr_info!`]: crate::pr_info!
/// [`pr_cont`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_cont
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+/// [`std::format!`]: https://doc.rust-lang.org/std/macro.format.html
///
/// # Examples
///
diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs
index 5246b2c8a4ff..8d978c896747 100644
--- a/rust/kernel/rbtree.rs
+++ b/rust/kernel/rbtree.rs
@@ -424,7 +424,7 @@ where
while !node.is_null() {
// SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
// point to the links field of `Node<K, V>` objects.
- let this = unsafe { container_of!(node, Node<K, V>, links) }.cast_mut();
+ let this = unsafe { container_of!(node, Node<K, V>, links) };
// SAFETY: `this` is a non-null node so it is valid by the type invariants.
let this_key = unsafe { &(*this).key };
// SAFETY: `node` is a non-null node so it is valid by the type invariants.
@@ -496,7 +496,7 @@ impl<K, V> Drop for RBTree<K, V> {
// but it is not observable. The loop invariant is still maintained.
// SAFETY: `this` is valid per the loop invariant.
- unsafe { drop(KBox::from_raw(this.cast_mut())) };
+ unsafe { drop(KBox::from_raw(this)) };
}
}
}
@@ -761,7 +761,7 @@ impl<'a, K, V> Cursor<'a, K, V> {
let next = self.get_neighbor_raw(Direction::Next);
// SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
// point to the links field of `Node<K, V>` objects.
- let this = unsafe { container_of!(self.current.as_ptr(), Node<K, V>, links) }.cast_mut();
+ let this = unsafe { container_of!(self.current.as_ptr(), Node<K, V>, links) };
// SAFETY: `this` is valid by the type invariants as described above.
let node = unsafe { KBox::from_raw(this) };
let node = RBTreeNode { node };
@@ -806,7 +806,7 @@ impl<'a, K, V> Cursor<'a, K, V> {
unsafe { bindings::rb_erase(neighbor, addr_of_mut!(self.tree.root)) };
// SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
// point to the links field of `Node<K, V>` objects.
- let this = unsafe { container_of!(neighbor, Node<K, V>, links) }.cast_mut();
+ let this = unsafe { container_of!(neighbor, Node<K, V>, links) };
// SAFETY: `this` is valid by the type invariants as described above.
let node = unsafe { KBox::from_raw(this) };
return Some(RBTreeNode { node });
@@ -912,7 +912,7 @@ impl<'a, K, V> Cursor<'a, K, V> {
unsafe fn to_key_value_raw<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, *mut V) {
// SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
// point to the links field of `Node<K, V>` objects.
- let this = unsafe { container_of!(node.as_ptr(), Node<K, V>, links) }.cast_mut();
+ let this = unsafe { container_of!(node.as_ptr(), Node<K, V>, links) };
// SAFETY: The passed `node` is the current node or a non-null neighbor,
// thus `this` is valid by the type invariants.
let k = unsafe { &(*this).key };
@@ -1021,7 +1021,7 @@ impl<K, V> Iterator for IterRaw<K, V> {
// SAFETY: By the type invariant of `IterRaw`, `self.next` is a valid node in an `RBTree`,
// and by the type invariant of `RBTree`, all nodes point to the links field of `Node<K, V>` objects.
- let cur = unsafe { container_of!(self.next, Node<K, V>, links) }.cast_mut();
+ let cur = unsafe { container_of!(self.next, Node<K, V>, links) };
// SAFETY: `self.next` is a valid tree node by the type invariants.
self.next = unsafe { bindings::rb_next(self.next) };
@@ -1216,7 +1216,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
// SAFETY:
// - `self.node_links` is a valid pointer to a node in the tree.
// - We have exclusive access to the underlying tree, and can thus give out a mutable reference.
- unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links).cast_mut())).value }
+ unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links))).value }
}
/// Converts the entry into a mutable reference to its value.
@@ -1226,7 +1226,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
// SAFETY:
// - `self.node_links` is a valid pointer to a node in the tree.
// - This consumes the `&'a mut RBTree<K, V>`, therefore it can give out a mutable reference that lives for `'a`.
- unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links).cast_mut())).value }
+ unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links))).value }
}
/// Remove this entry from the [`RBTree`].
@@ -1239,9 +1239,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
RBTreeNode {
// SAFETY: The node was a node in the tree, but we removed it, so we can convert it
// back into a box.
- node: unsafe {
- KBox::from_raw(container_of!(self.node_links, Node<K, V>, links).cast_mut())
- },
+ node: unsafe { KBox::from_raw(container_of!(self.node_links, Node<K, V>, links)) },
}
}
@@ -1272,8 +1270,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
// SAFETY:
// - `self.node_ptr` produces a valid pointer to a node in the tree.
// - Now that we removed this entry from the tree, we can convert the node to a box.
- let old_node =
- unsafe { KBox::from_raw(container_of!(self.node_links, Node<K, V>, links).cast_mut()) };
+ let old_node = unsafe { KBox::from_raw(container_of!(self.node_links, Node<K, V>, links)) };
RBTreeNode { node: old_node }
}
diff --git a/rust/kernel/static_assert.rs b/rust/kernel/static_assert.rs
index 3115ee0ba8e9..a57ba14315a0 100644
--- a/rust/kernel/static_assert.rs
+++ b/rust/kernel/static_assert.rs
@@ -6,6 +6,10 @@
///
/// Similar to C11 [`_Static_assert`] and C++11 [`static_assert`].
///
+/// An optional panic message can be supplied after the expression.
+/// Currently only a string literal without formatting is supported
+/// due to constness limitations of the [`assert!`] macro.
+///
/// The feature may be added to Rust in the future: see [RFC 2790].
///
/// [`_Static_assert`]: https://en.cppreference.com/w/c/language/_Static_assert
@@ -25,10 +29,11 @@
/// x + 2
/// }
/// static_assert!(f(40) == 42);
+/// static_assert!(f(40) == 42, "f(x) must add 2 to the given input.");
/// ```
#[macro_export]
macro_rules! static_assert {
- ($condition:expr) => {
- const _: () = core::assert!($condition);
+ ($condition:expr $(,$arg:literal)?) => {
+ const _: () = ::core::assert!($condition $(,$arg)?);
};
}
diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs
index 279bd353687a..abbab5050cc5 100644
--- a/rust/kernel/std_vendor.rs
+++ b/rust/kernel/std_vendor.rs
@@ -148,7 +148,7 @@ macro_rules! dbg {
};
($val:expr $(,)?) => {
// Use of `match` here is intentional because it affects the lifetimes
- // of temporaries - https://stackoverflow.com/a/48732525/1063961
+ // of temporaries - <https://stackoverflow.com/a/48732525/1063961>
match $val {
tmp => {
$crate::pr_info!("[{}:{}:{}] {} = {:#?}\n",
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index fb61ce81ea28..a927db8e079c 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -6,7 +6,7 @@ use crate::alloc::{flags::*, AllocError, KVec};
use core::fmt::{self, Write};
use core::ops::{self, Deref, DerefMut, Index};
-use crate::error::{code::*, Error};
+use crate::prelude::*;
/// Byte string without UTF-8 validity guarantee.
#[repr(transparent)]
@@ -572,30 +572,13 @@ macro_rules! c_str {
}};
}
-#[cfg(test)]
-#[expect(clippy::items_after_test_module)]
+#[kunit_tests(rust_kernel_str)]
mod tests {
use super::*;
- struct String(CString);
-
- impl String {
- fn from_fmt(args: fmt::Arguments<'_>) -> Self {
- String(CString::try_from_fmt(args).unwrap())
- }
- }
-
- impl Deref for String {
- type Target = str;
-
- fn deref(&self) -> &str {
- self.0.to_str().unwrap()
- }
- }
-
macro_rules! format {
($($f:tt)*) => ({
- &*String::from_fmt(kernel::fmt!($($f)*))
+ CString::try_from_fmt(::kernel::fmt!($($f)*))?.to_str()?
})
}
@@ -614,67 +597,72 @@ mod tests {
\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff";
#[test]
- fn test_cstr_to_str() {
+ fn test_cstr_to_str() -> Result {
let good_bytes = b"\xf0\x9f\xa6\x80\0";
- let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap();
- let checked_str = checked_cstr.to_str().unwrap();
+ let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?;
+ let checked_str = checked_cstr.to_str()?;
assert_eq!(checked_str, "🦀");
+ Ok(())
}
#[test]
- #[should_panic]
- fn test_cstr_to_str_panic() {
+ fn test_cstr_to_str_invalid_utf8() -> Result {
let bad_bytes = b"\xc3\x28\0";
- let checked_cstr = CStr::from_bytes_with_nul(bad_bytes).unwrap();
- checked_cstr.to_str().unwrap();
+ let checked_cstr = CStr::from_bytes_with_nul(bad_bytes)?;
+ assert!(checked_cstr.to_str().is_err());
+ Ok(())
}
#[test]
- fn test_cstr_as_str_unchecked() {
+ fn test_cstr_as_str_unchecked() -> Result {
let good_bytes = b"\xf0\x9f\x90\xA7\0";
- let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap();
+ let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?;
// SAFETY: The contents come from a string literal which contains valid UTF-8.
let unchecked_str = unsafe { checked_cstr.as_str_unchecked() };
assert_eq!(unchecked_str, "🐧");
+ Ok(())
}
#[test]
- fn test_cstr_display() {
- let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap();
+ fn test_cstr_display() -> Result {
+ let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?;
assert_eq!(format!("{hello_world}"), "hello, world!");
- let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap();
+ let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?;
assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a");
- let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap();
+ let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?;
assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu");
- let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap();
+ let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?;
assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80");
+ Ok(())
}
#[test]
- fn test_cstr_display_all_bytes() {
+ fn test_cstr_display_all_bytes() -> Result {
let mut bytes: [u8; 256] = [0; 256];
// fill `bytes` with [1..=255] + [0]
for i in u8::MIN..=u8::MAX {
bytes[i as usize] = i.wrapping_add(1);
}
- let cstr = CStr::from_bytes_with_nul(&bytes).unwrap();
+ let cstr = CStr::from_bytes_with_nul(&bytes)?;
assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS);
+ Ok(())
}
#[test]
- fn test_cstr_debug() {
- let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap();
+ fn test_cstr_debug() -> Result {
+ let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?;
assert_eq!(format!("{hello_world:?}"), "\"hello, world!\"");
- let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap();
+ let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?;
assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\"");
- let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap();
+ let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?;
assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\"");
- let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap();
+ let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?;
assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\"");
+ Ok(())
}
#[test]
- fn test_bstr_display() {
+ fn test_bstr_display() -> Result {
let hello_world = BStr::from_bytes(b"hello, world!");
assert_eq!(format!("{hello_world}"), "hello, world!");
let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
@@ -685,10 +673,11 @@ mod tests {
assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu");
let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80");
+ Ok(())
}
#[test]
- fn test_bstr_debug() {
+ fn test_bstr_debug() -> Result {
let hello_world = BStr::from_bytes(b"hello, world!");
assert_eq!(format!("{hello_world:?}"), "\"hello, world!\"");
let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
@@ -699,6 +688,7 @@ mod tests {
assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\"");
let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\"");
+ Ok(())
}
}
@@ -752,7 +742,7 @@ impl RawFormatter {
/// for the lifetime of the returned [`RawFormatter`].
pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self {
let pos = buf as usize;
- // INVARIANT: We ensure that `end` is never less then `buf`, and the safety requirements
+ // INVARIANT: We ensure that `end` is never less than `buf`, and the safety requirements
// guarantees that the memory region is valid for writes.
Self {
pos,
@@ -886,7 +876,7 @@ impl CString {
// SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is
// `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`.
- unsafe { buf.set_len(f.bytes_written()) };
+ unsafe { buf.inc_len(f.bytes_written()) };
// Check that there are no `NUL` bytes before the end.
// SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size`
@@ -944,5 +934,5 @@ impl fmt::Debug for CString {
/// A convenience alias for [`core::format_args`].
#[macro_export]
macro_rules! fmt {
- ($($f:tt)*) => ( core::format_args!($($f)*) )
+ ($($f:tt)*) => ( ::core::format_args!($($f)*) )
}
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index 8484c814609a..c7af0aa48a0a 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -135,14 +135,15 @@ pub struct Arc<T: ?Sized> {
// meaningful with respect to dropck - but this may change in the future so this is left here
// out of an abundance of caution.
//
- // See https://doc.rust-lang.org/nomicon/phantom-data.html#generic-parameters-and-drop-checking
+ // See <https://doc.rust-lang.org/nomicon/phantom-data.html#generic-parameters-and-drop-checking>
// for more detail on the semantics of dropck in the presence of `PhantomData`.
_p: PhantomData<ArcInner<T>>,
}
+#[doc(hidden)]
#[pin_data]
#[repr(C)]
-struct ArcInner<T: ?Sized> {
+pub struct ArcInner<T: ?Sized> {
refcount: Opaque<bindings::refcount_t>,
data: T,
}
@@ -371,18 +372,20 @@ impl<T: ?Sized> Arc<T> {
}
}
-impl<T: 'static> ForeignOwnable for Arc<T> {
+// SAFETY: The `into_foreign` function returns a pointer that is well-aligned.
+unsafe impl<T: 'static> ForeignOwnable for Arc<T> {
+ type PointedTo = ArcInner<T>;
type Borrowed<'a> = ArcBorrow<'a, T>;
type BorrowedMut<'a> = Self::Borrowed<'a>;
- fn into_foreign(self) -> *mut crate::ffi::c_void {
- ManuallyDrop::new(self).ptr.as_ptr().cast()
+ fn into_foreign(self) -> *mut Self::PointedTo {
+ ManuallyDrop::new(self).ptr.as_ptr()
}
- unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self {
+ unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self {
// SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
// call to `Self::into_foreign`.
- let inner = unsafe { NonNull::new_unchecked(ptr.cast::<ArcInner<T>>()) };
+ let inner = unsafe { NonNull::new_unchecked(ptr) };
// SAFETY: By the safety requirement of this function, we know that `ptr` came from
// a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and
@@ -390,17 +393,17 @@ impl<T: 'static> ForeignOwnable for Arc<T> {
unsafe { Self::from_inner(inner) }
}
- unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> ArcBorrow<'a, T> {
+ unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> {
// SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
// call to `Self::into_foreign`.
- let inner = unsafe { NonNull::new_unchecked(ptr.cast::<ArcInner<T>>()) };
+ let inner = unsafe { NonNull::new_unchecked(ptr) };
// SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive
// for the lifetime of the returned value.
unsafe { ArcBorrow::new(inner) }
}
- unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> ArcBorrow<'a, T> {
+ unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> {
// SAFETY: The safety requirements for `borrow_mut` are a superset of the safety
// requirements for `borrow`.
unsafe { Self::borrow(ptr) }
@@ -489,7 +492,7 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
/// There are no mutable references to the underlying [`Arc`], and it remains valid for the
/// lifetime of the [`ArcBorrow`] instance.
///
-/// # Example
+/// # Examples
///
/// ```
/// use kernel::sync::{Arc, ArcBorrow};
diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs
index f509cb0eb71e..a8089a98da9e 100644
--- a/rust/kernel/time.rs
+++ b/rust/kernel/time.rs
@@ -5,14 +5,36 @@
//! This module contains the kernel APIs related to time and timers that
//! have been ported or wrapped for usage by Rust code in the kernel.
//!
+//! There are two types in this module:
+//!
+//! - The [`Instant`] type represents a specific point in time.
+//! - The [`Delta`] type represents a span of time.
+//!
+//! Note that the C side uses `ktime_t` type to represent both. However, timestamp
+//! and timedelta are different. To avoid confusion, we use two different types.
+//!
+//! A [`Instant`] object can be created by calling the [`Instant::now()`] function.
+//! It represents a point in time at which the object was created.
+//! By calling the [`Instant::elapsed()`] method, a [`Delta`] object representing
+//! the elapsed time can be created. The [`Delta`] object can also be created
+//! by subtracting two [`Instant`] objects.
+//!
+//! A [`Delta`] type supports methods to retrieve the duration in various units.
+//!
//! C header: [`include/linux/jiffies.h`](srctree/include/linux/jiffies.h).
//! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h).
pub mod hrtimer;
+/// The number of nanoseconds per microsecond.
+pub const NSEC_PER_USEC: i64 = bindings::NSEC_PER_USEC as i64;
+
/// The number of nanoseconds per millisecond.
pub const NSEC_PER_MSEC: i64 = bindings::NSEC_PER_MSEC as i64;
+/// The number of nanoseconds per second.
+pub const NSEC_PER_SEC: i64 = bindings::NSEC_PER_SEC as i64;
+
/// The time unit of Linux kernel. One jiffy equals (1/HZ) second.
pub type Jiffies = crate::ffi::c_ulong;
@@ -27,59 +49,44 @@ pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies {
unsafe { bindings::__msecs_to_jiffies(msecs) }
}
-/// A Rust wrapper around a `ktime_t`.
+/// A specific point in time.
+///
+/// # Invariants
+///
+/// The `inner` value is in the range from 0 to `KTIME_MAX`.
#[repr(transparent)]
-#[derive(Copy, Clone)]
-pub struct Ktime {
+#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
+pub struct Instant {
inner: bindings::ktime_t,
}
-impl Ktime {
- /// Create a `Ktime` from a raw `ktime_t`.
- #[inline]
- pub fn from_raw(inner: bindings::ktime_t) -> Self {
- Self { inner }
- }
-
+impl Instant {
/// Get the current time using `CLOCK_MONOTONIC`.
#[inline]
- pub fn ktime_get() -> Self {
- // SAFETY: It is always safe to call `ktime_get` outside of NMI context.
- Self::from_raw(unsafe { bindings::ktime_get() })
- }
-
- /// Divide the number of nanoseconds by a compile-time constant.
- #[inline]
- fn divns_constant<const DIV: i64>(self) -> i64 {
- self.to_ns() / DIV
- }
-
- /// Returns the number of nanoseconds.
- #[inline]
- pub fn to_ns(self) -> i64 {
- self.inner
+ pub fn now() -> Self {
+ // INVARIANT: The `ktime_get()` function returns a value in the range
+ // from 0 to `KTIME_MAX`.
+ Self {
+ // SAFETY: It is always safe to call `ktime_get()` outside of NMI context.
+ inner: unsafe { bindings::ktime_get() },
+ }
}
- /// Returns the number of milliseconds.
+ /// Return the amount of time elapsed since the [`Instant`].
#[inline]
- pub fn to_ms(self) -> i64 {
- self.divns_constant::<NSEC_PER_MSEC>()
+ pub fn elapsed(&self) -> Delta {
+ Self::now() - *self
}
}
-/// Returns the number of milliseconds between two ktimes.
-#[inline]
-pub fn ktime_ms_delta(later: Ktime, earlier: Ktime) -> i64 {
- (later - earlier).to_ms()
-}
-
-impl core::ops::Sub for Ktime {
- type Output = Ktime;
+impl core::ops::Sub for Instant {
+ type Output = Delta;
+ // By the type invariant, it never overflows.
#[inline]
- fn sub(self, other: Ktime) -> Ktime {
- Self {
- inner: self.inner - other.inner,
+ fn sub(self, other: Instant) -> Delta {
+ Delta {
+ nanos: self.inner - other.inner,
}
}
}
@@ -149,3 +156,85 @@ impl ClockId {
self as bindings::clockid_t
}
}
+
+/// A span of time.
+///
+/// This struct represents a span of time, with its value stored as nanoseconds.
+/// The value can represent any valid i64 value, including negative, zero, and
+/// positive numbers.
+#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug)]
+pub struct Delta {
+ nanos: i64,
+}
+
+impl Delta {
+ /// A span of time equal to zero.
+ pub const ZERO: Self = Self { nanos: 0 };
+
+ /// Create a new [`Delta`] from a number of microseconds.
+ ///
+ /// The `micros` can range from -9_223_372_036_854_775 to 9_223_372_036_854_775.
+ /// If `micros` is outside this range, `i64::MIN` is used for negative values,
+ /// and `i64::MAX` is used for positive values due to saturation.
+ #[inline]
+ pub const fn from_micros(micros: i64) -> Self {
+ Self {
+ nanos: micros.saturating_mul(NSEC_PER_USEC),
+ }
+ }
+
+ /// Create a new [`Delta`] from a number of milliseconds.
+ ///
+ /// The `millis` can range from -9_223_372_036_854 to 9_223_372_036_854.
+ /// If `millis` is outside this range, `i64::MIN` is used for negative values,
+ /// and `i64::MAX` is used for positive values due to saturation.
+ #[inline]
+ pub const fn from_millis(millis: i64) -> Self {
+ Self {
+ nanos: millis.saturating_mul(NSEC_PER_MSEC),
+ }
+ }
+
+ /// Create a new [`Delta`] from a number of seconds.
+ ///
+ /// The `secs` can range from -9_223_372_036 to 9_223_372_036.
+ /// If `secs` is outside this range, `i64::MIN` is used for negative values,
+ /// and `i64::MAX` is used for positive values due to saturation.
+ #[inline]
+ pub const fn from_secs(secs: i64) -> Self {
+ Self {
+ nanos: secs.saturating_mul(NSEC_PER_SEC),
+ }
+ }
+
+ /// Return `true` if the [`Delta`] spans no time.
+ #[inline]
+ pub fn is_zero(self) -> bool {
+ self.as_nanos() == 0
+ }
+
+ /// Return `true` if the [`Delta`] spans a negative amount of time.
+ #[inline]
+ pub fn is_negative(self) -> bool {
+ self.as_nanos() < 0
+ }
+
+ /// Return the number of nanoseconds in the [`Delta`].
+ #[inline]
+ pub const fn as_nanos(self) -> i64 {
+ self.nanos
+ }
+
+ /// Return the smallest number of microseconds greater than or equal
+ /// to the value in the [`Delta`].
+ #[inline]
+ pub const fn as_micros_ceil(self) -> i64 {
+ self.as_nanos().saturating_add(NSEC_PER_USEC - 1) / NSEC_PER_USEC
+ }
+
+ /// Return the number of milliseconds in the [`Delta`].
+ #[inline]
+ pub const fn as_millis(self) -> i64 {
+ self.as_nanos() / NSEC_PER_MSEC
+ }
+}
diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs
index ce53f8579d18..9df3dcd2fa39 100644
--- a/rust/kernel/time/hrtimer.rs
+++ b/rust/kernel/time/hrtimer.rs
@@ -68,10 +68,26 @@
//! `start` operation.
use super::ClockId;
-use crate::{prelude::*, time::Ktime, types::Opaque};
+use crate::{prelude::*, types::Opaque};
use core::marker::PhantomData;
use pin_init::PinInit;
+/// A Rust wrapper around a `ktime_t`.
+// NOTE: Ktime is going to be removed when hrtimer is converted to Instant/Delta.
+#[repr(transparent)]
+#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
+pub struct Ktime {
+ inner: bindings::ktime_t,
+}
+
+impl Ktime {
+ /// Returns the number of nanoseconds.
+ #[inline]
+ pub fn to_ns(self) -> i64 {
+ self.inner
+ }
+}
+
/// A timer backed by a C `struct hrtimer`.
///
/// # Invariants
@@ -384,11 +400,9 @@ pub unsafe trait HasHrTimer<T> {
#[repr(u32)]
pub enum HrTimerRestart {
/// Timer should not be restarted.
- #[allow(clippy::unnecessary_cast)]
- NoRestart = bindings::hrtimer_restart_HRTIMER_NORESTART as u32,
+ NoRestart = bindings::hrtimer_restart_HRTIMER_NORESTART,
/// Timer should be restarted.
- #[allow(clippy::unnecessary_cast)]
- Restart = bindings::hrtimer_restart_HRTIMER_RESTART as u32,
+ Restart = bindings::hrtimer_restart_HRTIMER_RESTART,
}
impl HrTimerRestart {
diff --git a/rust/kernel/time/hrtimer/arc.rs b/rust/kernel/time/hrtimer/arc.rs
index 4a984d85b4a1..ccf1e66e5b2d 100644
--- a/rust/kernel/time/hrtimer/arc.rs
+++ b/rust/kernel/time/hrtimer/arc.rs
@@ -5,10 +5,10 @@ use super::HrTimer;
use super::HrTimerCallback;
use super::HrTimerHandle;
use super::HrTimerPointer;
+use super::Ktime;
use super::RawHrTimerCallback;
use crate::sync::Arc;
use crate::sync::ArcBorrow;
-use crate::time::Ktime;
/// A handle for an `Arc<HasHrTimer<T>>` returned by a call to
/// [`HrTimerPointer::start`].
diff --git a/rust/kernel/time/hrtimer/pin.rs b/rust/kernel/time/hrtimer/pin.rs
index f760db265c7b..293ca9cf058c 100644
--- a/rust/kernel/time/hrtimer/pin.rs
+++ b/rust/kernel/time/hrtimer/pin.rs
@@ -4,9 +4,9 @@ use super::HasHrTimer;
use super::HrTimer;
use super::HrTimerCallback;
use super::HrTimerHandle;
+use super::Ktime;
use super::RawHrTimerCallback;
use super::UnsafeHrTimerPointer;
-use crate::time::Ktime;
use core::pin::Pin;
/// A handle for a `Pin<&HasHrTimer>`. When the handle exists, the timer might be
diff --git a/rust/kernel/time/hrtimer/pin_mut.rs b/rust/kernel/time/hrtimer/pin_mut.rs
index 90c0351d62e4..6033572d35ad 100644
--- a/rust/kernel/time/hrtimer/pin_mut.rs
+++ b/rust/kernel/time/hrtimer/pin_mut.rs
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
use super::{
- HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, RawHrTimerCallback, UnsafeHrTimerPointer,
+ HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, Ktime, RawHrTimerCallback,
+ UnsafeHrTimerPointer,
};
-use crate::time::Ktime;
use core::{marker::PhantomData, pin::Pin, ptr::NonNull};
/// A handle for a `Pin<&mut HasHrTimer>`. When the handle exists, the timer might
diff --git a/rust/kernel/time/hrtimer/tbox.rs b/rust/kernel/time/hrtimer/tbox.rs
index 2071cae07234..29526a5da203 100644
--- a/rust/kernel/time/hrtimer/tbox.rs
+++ b/rust/kernel/time/hrtimer/tbox.rs
@@ -5,9 +5,9 @@ use super::HrTimer;
use super::HrTimerCallback;
use super::HrTimerHandle;
use super::HrTimerPointer;
+use super::Ktime;
use super::RawHrTimerCallback;
use crate::prelude::*;
-use crate::time::Ktime;
use core::ptr::NonNull;
/// A handle for a [`Box<HasHrTimer<T>>`] returned by a call to
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index eee387727d1a..22985b6f6982 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -18,7 +18,19 @@ use pin_init::{PinInit, Zeroable};
///
/// This trait is meant to be used in cases when Rust objects are stored in C objects and
/// eventually "freed" back to Rust.
-pub trait ForeignOwnable: Sized {
+///
+/// # Safety
+///
+/// Implementers must ensure that [`into_foreign`] returns a pointer which meets the alignment
+/// requirements of [`PointedTo`].
+///
+/// [`into_foreign`]: Self::into_foreign
+/// [`PointedTo`]: Self::PointedTo
+pub unsafe trait ForeignOwnable: Sized {
+ /// Type used when the value is foreign-owned. In practical terms only defines the alignment of
+ /// the pointer.
+ type PointedTo;
+
/// Type used to immutably borrow a value that is currently foreign-owned.
type Borrowed<'a>;
@@ -27,16 +39,18 @@ pub trait ForeignOwnable: Sized {
/// Converts a Rust-owned object to a foreign-owned one.
///
- /// The foreign representation is a pointer to void. There are no guarantees for this pointer.
- /// For example, it might be invalid, dangling or pointing to uninitialized memory. Using it in
- /// any way except for [`from_foreign`], [`try_from_foreign`], [`borrow`], or [`borrow_mut`] can
- /// result in undefined behavior.
+ /// # Guarantees
+ ///
+ /// The return value is guaranteed to be well-aligned, but there are no other guarantees for
+ /// this pointer. For example, it might be null, dangling, or point to uninitialized memory.
+ /// Using it in any way except for [`ForeignOwnable::from_foreign`], [`ForeignOwnable::borrow`],
+ /// [`ForeignOwnable::try_from_foreign`] can result in undefined behavior.
///
/// [`from_foreign`]: Self::from_foreign
/// [`try_from_foreign`]: Self::try_from_foreign
/// [`borrow`]: Self::borrow
/// [`borrow_mut`]: Self::borrow_mut
- fn into_foreign(self) -> *mut crate::ffi::c_void;
+ fn into_foreign(self) -> *mut Self::PointedTo;
/// Converts a foreign-owned object back to a Rust-owned one.
///
@@ -46,7 +60,7 @@ pub trait ForeignOwnable: Sized {
/// must not be passed to `from_foreign` more than once.
///
/// [`into_foreign`]: Self::into_foreign
- unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self;
+ unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self;
/// Tries to convert a foreign-owned object back to a Rust-owned one.
///
@@ -58,7 +72,7 @@ pub trait ForeignOwnable: Sized {
/// `ptr` must either be null or satisfy the safety requirements for [`from_foreign`].
///
/// [`from_foreign`]: Self::from_foreign
- unsafe fn try_from_foreign(ptr: *mut crate::ffi::c_void) -> Option<Self> {
+ unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option<Self> {
if ptr.is_null() {
None
} else {
@@ -81,7 +95,7 @@ pub trait ForeignOwnable: Sized {
///
/// [`into_foreign`]: Self::into_foreign
/// [`from_foreign`]: Self::from_foreign
- unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> Self::Borrowed<'a>;
+ unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> Self::Borrowed<'a>;
/// Borrows a foreign-owned object mutably.
///
@@ -109,21 +123,23 @@ pub trait ForeignOwnable: Sized {
/// [`from_foreign`]: Self::from_foreign
/// [`borrow`]: Self::borrow
/// [`Arc`]: crate::sync::Arc
- unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> Self::BorrowedMut<'a>;
+ unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> Self::BorrowedMut<'a>;
}
-impl ForeignOwnable for () {
+// SAFETY: The `into_foreign` function returns a pointer that is dangling, but well-aligned.
+unsafe impl ForeignOwnable for () {
+ type PointedTo = ();
type Borrowed<'a> = ();
type BorrowedMut<'a> = ();
- fn into_foreign(self) -> *mut crate::ffi::c_void {
+ fn into_foreign(self) -> *mut Self::PointedTo {
core::ptr::NonNull::dangling().as_ptr()
}
- unsafe fn from_foreign(_: *mut crate::ffi::c_void) -> Self {}
+ unsafe fn from_foreign(_: *mut Self::PointedTo) -> Self {}
- unsafe fn borrow<'a>(_: *mut crate::ffi::c_void) -> Self::Borrowed<'a> {}
- unsafe fn borrow_mut<'a>(_: *mut crate::ffi::c_void) -> Self::BorrowedMut<'a> {}
+ unsafe fn borrow<'a>(_: *mut Self::PointedTo) -> Self::Borrowed<'a> {}
+ unsafe fn borrow_mut<'a>(_: *mut Self::PointedTo) -> Self::BorrowedMut<'a> {}
}
/// Runs a cleanup function/closure when dropped.
diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs
index 80a9782b1c6e..6d70edd8086a 100644
--- a/rust/kernel/uaccess.rs
+++ b/rust/kernel/uaccess.rs
@@ -46,10 +46,9 @@ pub type UserPtr = usize;
///
/// ```no_run
/// use kernel::ffi::c_void;
-/// use kernel::error::Result;
/// use kernel::uaccess::{UserPtr, UserSlice};
///
-/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result<()> {
+/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result {
/// let (read, mut write) = UserSlice::new(uptr, len).reader_writer();
///
/// let mut buf = KVec::new();
@@ -68,7 +67,6 @@ pub type UserPtr = usize;
///
/// ```no_run
/// use kernel::ffi::c_void;
-/// use kernel::error::{code::EINVAL, Result};
/// use kernel::uaccess::{UserPtr, UserSlice};
///
/// /// Returns whether the data in this region is valid.
@@ -290,7 +288,7 @@ impl UserSliceReader {
// SAFETY: Since the call to `read_raw` was successful, so the next `len` bytes of the
// vector have been initialized.
- unsafe { buf.set_len(buf.len() + len) };
+ unsafe { buf.inc_len(len) };
Ok(())
}
}
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index f98bd02b838f..d092112d843f 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -429,51 +429,28 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> {
///
/// # Safety
///
-/// The [`OFFSET`] constant must be the offset of a field in `Self` of type [`Work<T, ID>`]. The
-/// methods on this trait must have exactly the behavior that the definitions given below have.
+/// The methods [`raw_get_work`] and [`work_container_of`] must return valid pointers and must be
+/// true inverses of each other; that is, they must satisfy the following invariants:
+/// - `work_container_of(raw_get_work(ptr)) == ptr` for any `ptr: *mut Self`.
+/// - `raw_get_work(work_container_of(ptr)) == ptr` for any `ptr: *mut Work<T, ID>`.
///
/// [`impl_has_work!`]: crate::impl_has_work
-/// [`OFFSET`]: HasWork::OFFSET
+/// [`raw_get_work`]: HasWork::raw_get_work
+/// [`work_container_of`]: HasWork::work_container_of
pub unsafe trait HasWork<T, const ID: u64 = 0> {
- /// The offset of the [`Work<T, ID>`] field.
- const OFFSET: usize;
-
- /// Returns the offset of the [`Work<T, ID>`] field.
- ///
- /// This method exists because the [`OFFSET`] constant cannot be accessed if the type is not
- /// [`Sized`].
- ///
- /// [`OFFSET`]: HasWork::OFFSET
- #[inline]
- fn get_work_offset(&self) -> usize {
- Self::OFFSET
- }
-
/// Returns a pointer to the [`Work<T, ID>`] field.
///
/// # Safety
///
/// The provided pointer must point at a valid struct of type `Self`.
- #[inline]
- unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<T, ID> {
- // SAFETY: The caller promises that the pointer is valid.
- unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut Work<T, ID> }
- }
+ unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<T, ID>;
/// Returns a pointer to the struct containing the [`Work<T, ID>`] field.
///
/// # Safety
///
/// The pointer must point at a [`Work<T, ID>`] field in a struct of type `Self`.
- #[inline]
- unsafe fn work_container_of(ptr: *mut Work<T, ID>) -> *mut Self
- where
- Self: Sized,
- {
- // SAFETY: The caller promises that the pointer points at a field of the right type in the
- // right kind of struct.
- unsafe { (ptr as *mut u8).sub(Self::OFFSET) as *mut Self }
- }
+ unsafe fn work_container_of(ptr: *mut Work<T, ID>) -> *mut Self;
}
/// Used to safely implement the [`HasWork<T, ID>`] trait.
@@ -504,8 +481,6 @@ macro_rules! impl_has_work {
// SAFETY: The implementation of `raw_get_work` only compiles if the field has the right
// type.
unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self {
- const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize;
-
#[inline]
unsafe fn raw_get_work(ptr: *mut Self) -> *mut $crate::workqueue::Work<$work_type $(, $id)?> {
// SAFETY: The caller promises that the pointer is not dangling.
@@ -513,6 +488,15 @@ macro_rules! impl_has_work {
::core::ptr::addr_of_mut!((*ptr).$field)
}
}
+
+ #[inline]
+ unsafe fn work_container_of(
+ ptr: *mut $crate::workqueue::Work<$work_type $(, $id)?>,
+ ) -> *mut Self {
+ // SAFETY: The caller promises that the pointer points at a field of the right type
+ // in the right kind of struct.
+ unsafe { $crate::container_of!(ptr, Self, $field) }
+ }
}
)*};
}
diff --git a/rust/kernel/xarray.rs b/rust/kernel/xarray.rs
new file mode 100644
index 000000000000..75719e7bb491
--- /dev/null
+++ b/rust/kernel/xarray.rs
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! XArray abstraction.
+//!
+//! C header: [`include/linux/xarray.h`](srctree/include/linux/xarray.h)
+
+use crate::{
+ alloc, bindings, build_assert,
+ error::{Error, Result},
+ types::{ForeignOwnable, NotThreadSafe, Opaque},
+};
+use core::{iter, marker::PhantomData, mem, pin::Pin, ptr::NonNull};
+use pin_init::{pin_data, pin_init, pinned_drop, PinInit};
+
+/// An array which efficiently maps sparse integer indices to owned objects.
+///
+/// This is similar to a [`crate::alloc::kvec::Vec<Option<T>>`], but more efficient when there are
+/// holes in the index space, and can be efficiently grown.
+///
+/// # Invariants
+///
+/// `self.xa` is always an initialized and valid [`bindings::xarray`] whose entries are either
+/// `XA_ZERO_ENTRY` or came from `T::into_foreign`.
+///
+/// # Examples
+///
+/// ```rust
+/// use kernel::alloc::KBox;
+/// use kernel::xarray::{AllocKind, XArray};
+///
+/// let xa = KBox::pin_init(XArray::new(AllocKind::Alloc1), GFP_KERNEL)?;
+///
+/// let dead = KBox::new(0xdead, GFP_KERNEL)?;
+/// let beef = KBox::new(0xbeef, GFP_KERNEL)?;
+///
+/// let mut guard = xa.lock();
+///
+/// assert_eq!(guard.get(0), None);
+///
+/// assert_eq!(guard.store(0, dead, GFP_KERNEL)?.as_deref(), None);
+/// assert_eq!(guard.get(0).copied(), Some(0xdead));
+///
+/// *guard.get_mut(0).unwrap() = 0xffff;
+/// assert_eq!(guard.get(0).copied(), Some(0xffff));
+///
+/// assert_eq!(guard.store(0, beef, GFP_KERNEL)?.as_deref().copied(), Some(0xffff));
+/// assert_eq!(guard.get(0).copied(), Some(0xbeef));
+///
+/// guard.remove(0);
+/// assert_eq!(guard.get(0), None);
+///
+/// # Ok::<(), Error>(())
+/// ```
+#[pin_data(PinnedDrop)]
+pub struct XArray<T: ForeignOwnable> {
+ #[pin]
+ xa: Opaque<bindings::xarray>,
+ _p: PhantomData<T>,
+}
+
+#[pinned_drop]
+impl<T: ForeignOwnable> PinnedDrop for XArray<T> {
+ fn drop(self: Pin<&mut Self>) {
+ self.iter().for_each(|ptr| {
+ let ptr = ptr.as_ptr();
+ // SAFETY: `ptr` came from `T::into_foreign`.
+ //
+ // INVARIANT: we own the only reference to the array which is being dropped so the
+ // broken invariant is not observable on function exit.
+ drop(unsafe { T::from_foreign(ptr) })
+ });
+
+ // SAFETY: `self.xa` is always valid by the type invariant.
+ unsafe { bindings::xa_destroy(self.xa.get()) };
+ }
+}
+
+/// Flags passed to [`XArray::new`] to configure the array's allocation tracking behavior.
+pub enum AllocKind {
+ /// Consider the first element to be at index 0.
+ Alloc,
+ /// Consider the first element to be at index 1.
+ Alloc1,
+}
+
+impl<T: ForeignOwnable> XArray<T> {
+ /// Creates a new initializer for this type.
+ pub fn new(kind: AllocKind) -> impl PinInit<Self> {
+ let flags = match kind {
+ AllocKind::Alloc => bindings::XA_FLAGS_ALLOC,
+ AllocKind::Alloc1 => bindings::XA_FLAGS_ALLOC1,
+ };
+ pin_init!(Self {
+ // SAFETY: `xa` is valid while the closure is called.
+ //
+ // INVARIANT: `xa` is initialized here to an empty, valid [`bindings::xarray`].
+ xa <- Opaque::ffi_init(|xa| unsafe {
+ bindings::xa_init_flags(xa, flags)
+ }),
+ _p: PhantomData,
+ })
+ }
+
+ fn iter(&self) -> impl Iterator<Item = NonNull<T::PointedTo>> + '_ {
+ let mut index = 0;
+
+ // SAFETY: `self.xa` is always valid by the type invariant.
+ iter::once(unsafe {
+ bindings::xa_find(self.xa.get(), &mut index, usize::MAX, bindings::XA_PRESENT)
+ })
+ .chain(iter::from_fn(move || {
+ // SAFETY: `self.xa` is always valid by the type invariant.
+ Some(unsafe {
+ bindings::xa_find_after(self.xa.get(), &mut index, usize::MAX, bindings::XA_PRESENT)
+ })
+ }))
+ .map_while(|ptr| NonNull::new(ptr.cast()))
+ }
+
+ /// Attempts to lock the [`XArray`] for exclusive access.
+ pub fn try_lock(&self) -> Option<Guard<'_, T>> {
+ // SAFETY: `self.xa` is always valid by the type invariant.
+ if (unsafe { bindings::xa_trylock(self.xa.get()) } != 0) {
+ Some(Guard {
+ xa: self,
+ _not_send: NotThreadSafe,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Locks the [`XArray`] for exclusive access.
+ pub fn lock(&self) -> Guard<'_, T> {
+ // SAFETY: `self.xa` is always valid by the type invariant.
+ unsafe { bindings::xa_lock(self.xa.get()) };
+
+ Guard {
+ xa: self,
+ _not_send: NotThreadSafe,
+ }
+ }
+}
+
+/// A lock guard.
+///
+/// The lock is unlocked when the guard goes out of scope.
+#[must_use = "the lock unlocks immediately when the guard is unused"]
+pub struct Guard<'a, T: ForeignOwnable> {
+ xa: &'a XArray<T>,
+ _not_send: NotThreadSafe,
+}
+
+impl<T: ForeignOwnable> Drop for Guard<'_, T> {
+ fn drop(&mut self) {
+ // SAFETY:
+ // - `self.xa.xa` is always valid by the type invariant.
+ // - The caller holds the lock, so it is safe to unlock it.
+ unsafe { bindings::xa_unlock(self.xa.xa.get()) };
+ }
+}
+
+/// The error returned by [`store`](Guard::store).
+///
+/// Contains the underlying error and the value that was not stored.
+pub struct StoreError<T> {
+ /// The error that occurred.
+ pub error: Error,
+ /// The value that was not stored.
+ pub value: T,
+}
+
+impl<T> From<StoreError<T>> for Error {
+ fn from(value: StoreError<T>) -> Self {
+ value.error
+ }
+}
+
+impl<'a, T: ForeignOwnable> Guard<'a, T> {
+ fn load<F, U>(&self, index: usize, f: F) -> Option<U>
+ where
+ F: FnOnce(NonNull<T::PointedTo>) -> U,
+ {
+ // SAFETY: `self.xa.xa` is always valid by the type invariant.
+ let ptr = unsafe { bindings::xa_load(self.xa.xa.get(), index) };
+ let ptr = NonNull::new(ptr.cast())?;
+ Some(f(ptr))
+ }
+
+ /// Provides a reference to the element at the given index.
+ pub fn get(&self, index: usize) -> Option<T::Borrowed<'_>> {
+ self.load(index, |ptr| {
+ // SAFETY: `ptr` came from `T::into_foreign`.
+ unsafe { T::borrow(ptr.as_ptr()) }
+ })
+ }
+
+ /// Provides a mutable reference to the element at the given index.
+ pub fn get_mut(&mut self, index: usize) -> Option<T::BorrowedMut<'_>> {
+ self.load(index, |ptr| {
+ // SAFETY: `ptr` came from `T::into_foreign`.
+ unsafe { T::borrow_mut(ptr.as_ptr()) }
+ })
+ }
+
+ /// Removes and returns the element at the given index.
+ pub fn remove(&mut self, index: usize) -> Option<T> {
+ // SAFETY:
+ // - `self.xa.xa` is always valid by the type invariant.
+ // - The caller holds the lock.
+ let ptr = unsafe { bindings::__xa_erase(self.xa.xa.get(), index) }.cast();
+ // SAFETY:
+ // - `ptr` is either NULL or came from `T::into_foreign`.
+ // - `&mut self` guarantees that the lifetimes of [`T::Borrowed`] and [`T::BorrowedMut`]
+ // borrowed from `self` have ended.
+ unsafe { T::try_from_foreign(ptr) }
+ }
+
+ /// Stores an element at the given index.
+ ///
+ /// May drop the lock if needed to allocate memory, and then reacquire it afterwards.
+ ///
+ /// On success, returns the element which was previously at the given index.
+ ///
+ /// On failure, returns the element which was attempted to be stored.
+ pub fn store(
+ &mut self,
+ index: usize,
+ value: T,
+ gfp: alloc::Flags,
+ ) -> Result<Option<T>, StoreError<T>> {
+ build_assert!(
+ mem::align_of::<T::PointedTo>() >= 4,
+ "pointers stored in XArray must be 4-byte aligned"
+ );
+ let new = value.into_foreign();
+
+ let old = {
+ let new = new.cast();
+ // SAFETY:
+ // - `self.xa.xa` is always valid by the type invariant.
+ // - The caller holds the lock.
+ //
+ // INVARIANT: `new` came from `T::into_foreign`.
+ unsafe { bindings::__xa_store(self.xa.xa.get(), index, new, gfp.as_raw()) }
+ };
+
+ // SAFETY: `__xa_store` returns the old entry at this index on success or `xa_err` if an
+ // error happened.
+ let errno = unsafe { bindings::xa_err(old) };
+ if errno != 0 {
+ // SAFETY: `new` came from `T::into_foreign` and `__xa_store` does not take
+ // ownership of the value on error.
+ let value = unsafe { T::from_foreign(new) };
+ Err(StoreError {
+ value,
+ error: Error::from_errno(errno),
+ })
+ } else {
+ let old = old.cast();
+ // SAFETY: `ptr` is either NULL or came from `T::into_foreign`.
+ //
+ // NB: `XA_ZERO_ENTRY` is never returned by functions belonging to the Normal XArray
+ // API; such entries present as `NULL`.
+ Ok(unsafe { T::try_from_foreign(old) })
+ }
+ }
+}
+
+// SAFETY: `XArray<T>` has no shared mutable state so it is `Send` iff `T` is `Send`.
+unsafe impl<T: ForeignOwnable + Send> Send for XArray<T> {}
+
+// SAFETY: `XArray<T>` serialises the interior mutability it provides so it is `Sync` iff `T` is
+// `Send`.
+unsafe impl<T: ForeignOwnable + Send> Sync for XArray<T> {}
diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs
index a3ee27e29a6f..e2602be402c1 100644
--- a/rust/macros/helpers.rs
+++ b/rust/macros/helpers.rs
@@ -86,3 +86,20 @@ pub(crate) fn function_name(input: TokenStream) -> Option<Ident> {
}
None
}
+
+pub(crate) fn file() -> String {
+ #[cfg(not(CONFIG_RUSTC_HAS_SPAN_FILE))]
+ {
+ proc_macro::Span::call_site()
+ .source_file()
+ .path()
+ .to_string_lossy()
+ .into_owned()
+ }
+
+ #[cfg(CONFIG_RUSTC_HAS_SPAN_FILE)]
+ #[allow(clippy::incompatible_msrv)]
+ {
+ proc_macro::Span::call_site().file()
+ }
+}
diff --git a/rust/macros/kunit.rs b/rust/macros/kunit.rs
index 99ccac82edde..81d18149a0cc 100644
--- a/rust/macros/kunit.rs
+++ b/rust/macros/kunit.rs
@@ -57,8 +57,8 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream {
}
}
- // Add `#[cfg(CONFIG_KUNIT)]` before the module declaration.
- let config_kunit = "#[cfg(CONFIG_KUNIT)]".to_owned().parse().unwrap();
+ // Add `#[cfg(CONFIG_KUNIT="y")]` before the module declaration.
+ let config_kunit = "#[cfg(CONFIG_KUNIT=\"y\")]".to_owned().parse().unwrap();
tokens.insert(
0,
TokenTree::Group(Group::new(Delimiter::None, config_kunit)),
@@ -85,28 +85,52 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream {
// Looks like:
//
// ```
- // unsafe extern "C" fn kunit_rust_wrapper_foo(_test: *mut kernel::bindings::kunit) { foo(); }
- // unsafe extern "C" fn kunit_rust_wrapper_bar(_test: *mut kernel::bindings::kunit) { bar(); }
+ // unsafe extern "C" fn kunit_rust_wrapper_foo(_test: *mut ::kernel::bindings::kunit) { foo(); }
+ // unsafe extern "C" fn kunit_rust_wrapper_bar(_test: *mut ::kernel::bindings::kunit) { bar(); }
//
- // static mut TEST_CASES: [kernel::bindings::kunit_case; 3] = [
- // kernel::kunit::kunit_case(kernel::c_str!("foo"), kunit_rust_wrapper_foo),
- // kernel::kunit::kunit_case(kernel::c_str!("bar"), kunit_rust_wrapper_bar),
- // kernel::kunit::kunit_case_null(),
+ // static mut TEST_CASES: [::kernel::bindings::kunit_case; 3] = [
+ // ::kernel::kunit::kunit_case(::kernel::c_str!("foo"), kunit_rust_wrapper_foo),
+ // ::kernel::kunit::kunit_case(::kernel::c_str!("bar"), kunit_rust_wrapper_bar),
+ // ::kernel::kunit::kunit_case_null(),
// ];
//
- // kernel::kunit_unsafe_test_suite!(kunit_test_suit_name, TEST_CASES);
+ // ::kernel::kunit_unsafe_test_suite!(kunit_test_suit_name, TEST_CASES);
// ```
let mut kunit_macros = "".to_owned();
let mut test_cases = "".to_owned();
+ let mut assert_macros = "".to_owned();
+ let path = crate::helpers::file();
for test in &tests {
let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}");
+ // An extra `use` is used here to reduce the length of the message.
let kunit_wrapper = format!(
- "unsafe extern \"C\" fn {kunit_wrapper_fn_name}(_test: *mut kernel::bindings::kunit) {{ {test}(); }}"
+ "unsafe extern \"C\" fn {kunit_wrapper_fn_name}(_test: *mut ::kernel::bindings::kunit) {{ use ::kernel::kunit::is_test_result_ok; assert!(is_test_result_ok({test}())); }}",
);
writeln!(kunit_macros, "{kunit_wrapper}").unwrap();
writeln!(
test_cases,
- " kernel::kunit::kunit_case(kernel::c_str!(\"{test}\"), {kunit_wrapper_fn_name}),"
+ " ::kernel::kunit::kunit_case(::kernel::c_str!(\"{test}\"), {kunit_wrapper_fn_name}),"
+ )
+ .unwrap();
+ writeln!(
+ assert_macros,
+ r#"
+/// Overrides the usual [`assert!`] macro with one that calls KUnit instead.
+#[allow(unused)]
+macro_rules! assert {{
+ ($cond:expr $(,)?) => {{{{
+ kernel::kunit_assert!("{test}", "{path}", 0, $cond);
+ }}}}
+}}
+
+/// Overrides the usual [`assert_eq!`] macro with one that calls KUnit instead.
+#[allow(unused)]
+macro_rules! assert_eq {{
+ ($left:expr, $right:expr $(,)?) => {{{{
+ kernel::kunit_assert_eq!("{test}", "{path}", 0, $left, $right);
+ }}}}
+}}
+ "#
)
.unwrap();
}
@@ -114,14 +138,14 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream {
writeln!(kunit_macros).unwrap();
writeln!(
kunit_macros,
- "static mut TEST_CASES: [kernel::bindings::kunit_case; {}] = [\n{test_cases} kernel::kunit::kunit_case_null(),\n];",
+ "static mut TEST_CASES: [::kernel::bindings::kunit_case; {}] = [\n{test_cases} ::kernel::kunit::kunit_case_null(),\n];",
tests.len() + 1
)
.unwrap();
writeln!(
kunit_macros,
- "kernel::kunit_unsafe_test_suite!({attr}, TEST_CASES);"
+ "::kernel::kunit_unsafe_test_suite!({attr}, TEST_CASES);"
)
.unwrap();
@@ -147,10 +171,12 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream {
}
}
- let mut new_body = TokenStream::from_iter(new_body);
- new_body.extend::<TokenStream>(kunit_macros.parse().unwrap());
+ let mut final_body = TokenStream::new();
+ final_body.extend::<TokenStream>(assert_macros.parse().unwrap());
+ final_body.extend(new_body);
+ final_body.extend::<TokenStream>(kunit_macros.parse().unwrap());
- tokens.push(TokenTree::Group(Group::new(Delimiter::Brace, new_body)));
+ tokens.push(TokenTree::Group(Group::new(Delimiter::Brace, final_body)));
tokens.into_iter().collect()
}
diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index 9acaa68c974e..fa847cf3a9b5 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -6,6 +6,11 @@
// and thus add a dependency on `include/config/RUSTC_VERSION_TEXT`, which is
// touched by Kconfig when the version string from the compiler changes.
+// Stable since Rust 1.88.0 under a different name, `proc_macro_span_file`,
+// which was added in Rust 1.88.0. This is why `cfg_attr` is used here, i.e.
+// to avoid depending on the full `proc_macro_span` on Rust >= 1.88.0.
+#![cfg_attr(not(CONFIG_RUSTC_HAS_SPAN_FILE), feature(proc_macro_span))]
+
#[macro_use]
mod quote;
mod concat_idents;
@@ -263,7 +268,7 @@ pub fn concat_idents(ts: TokenStream) -> TokenStream {
/// literals (lifetimes and documentation strings are not supported). There is a difference in
/// supported modifiers as well.
///
-/// # Example
+/// # Examples
///
/// ```
/// # const binder_driver_return_protocol_BR_OK: u32 = 0;
@@ -283,7 +288,7 @@ pub fn concat_idents(ts: TokenStream) -> TokenStream {
/// # const binder_driver_return_protocol_BR_FAILED_REPLY: u32 = 14;
/// macro_rules! pub_no_prefix {
/// ($prefix:ident, $($newname:ident),+) => {
-/// kernel::macros::paste! {
+/// ::kernel::macros::paste! {
/// $(pub(crate) const $newname: u32 = [<$prefix $newname>];)+
/// }
/// };
@@ -340,7 +345,7 @@ pub fn concat_idents(ts: TokenStream) -> TokenStream {
/// # const binder_driver_return_protocol_BR_FAILED_REPLY: u32 = 14;
/// macro_rules! pub_no_prefix {
/// ($prefix:ident, $($newname:ident),+) => {
-/// kernel::macros::paste! {
+/// ::kernel::macros::paste! {
/// $(pub(crate) const fn [<$newname:lower:span>]() -> u32 { [<$prefix $newname:span>] })+
/// }
/// };
@@ -375,7 +380,7 @@ pub fn concat_idents(ts: TokenStream) -> TokenStream {
/// ```
/// macro_rules! create_numbered_fn {
/// ($name:literal, $val:literal) => {
-/// kernel::macros::paste! {
+/// ::kernel::macros::paste! {
/// fn [<some_ $name _fn $val>]() -> u32 { $val }
/// }
/// };
@@ -402,7 +407,7 @@ pub fn paste(input: TokenStream) -> TokenStream {
/// # Examples
///
/// ```ignore
-/// # use macros::kunit_tests;
+/// # use kernel::prelude::*;
/// #[kunit_tests(kunit_test_suit_name)]
/// mod tests {
/// #[test]
diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index c4afdd69e490..2ddd2eeb2852 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -217,24 +217,24 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
// SAFETY: `__this_module` is constructed by the kernel at load time and will not be
// freed until the module is unloaded.
#[cfg(MODULE)]
- static THIS_MODULE: kernel::ThisModule = unsafe {{
+ static THIS_MODULE: ::kernel::ThisModule = unsafe {{
extern \"C\" {{
- static __this_module: kernel::types::Opaque<kernel::bindings::module>;
+ static __this_module: ::kernel::types::Opaque<::kernel::bindings::module>;
}}
- kernel::ThisModule::from_ptr(__this_module.get())
+ ::kernel::ThisModule::from_ptr(__this_module.get())
}};
#[cfg(not(MODULE))]
- static THIS_MODULE: kernel::ThisModule = unsafe {{
- kernel::ThisModule::from_ptr(core::ptr::null_mut())
+ static THIS_MODULE: ::kernel::ThisModule = unsafe {{
+ ::kernel::ThisModule::from_ptr(::core::ptr::null_mut())
}};
/// The `LocalModule` type is the type of the module created by `module!`,
/// `module_pci_driver!`, `module_platform_driver!`, etc.
type LocalModule = {type_};
- impl kernel::ModuleMetadata for {type_} {{
- const NAME: &'static kernel::str::CStr = kernel::c_str!(\"{name}\");
+ impl ::kernel::ModuleMetadata for {type_} {{
+ const NAME: &'static ::kernel::str::CStr = ::kernel::c_str!(\"{name}\");
}}
// Double nested modules, since then nobody can access the public items inside.
@@ -252,8 +252,8 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[used]
static __IS_RUST_MODULE: () = ();
- static mut __MOD: core::mem::MaybeUninit<{type_}> =
- core::mem::MaybeUninit::uninit();
+ static mut __MOD: ::core::mem::MaybeUninit<{type_}> =
+ ::core::mem::MaybeUninit::uninit();
// Loadable modules need to export the `{{init,cleanup}}_module` identifiers.
/// # Safety
@@ -264,7 +264,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[doc(hidden)]
#[no_mangle]
#[link_section = \".init.text\"]
- pub unsafe extern \"C\" fn init_module() -> kernel::ffi::c_int {{
+ pub unsafe extern \"C\" fn init_module() -> ::kernel::ffi::c_int {{
// SAFETY: This function is inaccessible to the outside due to the double
// module wrapping it. It is called exactly once by the C side via its
// unique name.
@@ -280,6 +280,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[cfg(MODULE)]
#[doc(hidden)]
#[no_mangle]
+ #[link_section = \".exit.text\"]
pub extern \"C\" fn cleanup_module() {{
// SAFETY:
// - This function is inaccessible to the outside due to the double
@@ -304,11 +305,11 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[link_section = \"{initcall_section}\"]
#[used]
pub static __{ident}_initcall: extern \"C\" fn() ->
- kernel::ffi::c_int = __{ident}_init;
+ ::kernel::ffi::c_int = __{ident}_init;
#[cfg(not(MODULE))]
#[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)]
- core::arch::global_asm!(
+ ::core::arch::global_asm!(
r#\".section \"{initcall_section}\", \"a\"
__{ident}_initcall:
.long __{ident}_init - .
@@ -319,7 +320,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[cfg(not(MODULE))]
#[doc(hidden)]
#[no_mangle]
- pub extern \"C\" fn __{ident}_init() -> kernel::ffi::c_int {{
+ pub extern \"C\" fn __{ident}_init() -> ::kernel::ffi::c_int {{
// SAFETY: This function is inaccessible to the outside due to the double
// module wrapping it. It is called exactly once by the C side via its
// placement above in the initcall section.
@@ -342,9 +343,9 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
/// # Safety
///
/// This function must only be called once.
- unsafe fn __init() -> kernel::ffi::c_int {{
+ unsafe fn __init() -> ::kernel::ffi::c_int {{
let initer =
- <{type_} as kernel::InPlaceModule>::init(&super::super::THIS_MODULE);
+ <{type_} as ::kernel::InPlaceModule>::init(&super::super::THIS_MODULE);
// SAFETY: No data race, since `__MOD` can only be accessed by this module
// and there only `__init` and `__exit` access it. These functions are only
// called once and `__exit` cannot be called before or during `__init`.
diff --git a/rust/pin-init/README.md b/rust/pin-init/README.md
index 3d04796b212b..2d0cda961d45 100644
--- a/rust/pin-init/README.md
+++ b/rust/pin-init/README.md
@@ -40,6 +40,12 @@ However, using the crate on stable compilers is possible by disabling `alloc`. I
will require the `std` feature, because stable compilers have neither `Box` nor `Arc` in no-std
mode.
+### Nightly needed for `unsafe-pinned` feature
+
+This feature enables the `Wrapper` implementation on the unstable `core::pin::UnsafePinned` type.
+This requires the [`unsafe_pinned` unstable feature](https://github.com/rust-lang/rust/issues/125735)
+and therefore a nightly compiler. Note that this feature is not enabled by default.
+
## Overview
To initialize a `struct` with an in-place constructor you will need two things:
@@ -216,13 +222,15 @@ the `kernel` crate. The [`sync`] module is a good starting point.
[`sync`]: https://rust.docs.kernel.org/kernel/sync/index.html
[pinning]: https://doc.rust-lang.org/std/pin/index.html
-[structurally pinned fields]: https://doc.rust-lang.org/std/pin/index.html#pinning-is-structural-for-field
+[structurally pinned fields]: https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning
[stack]: https://docs.rs/pin-init/latest/pin_init/macro.stack_pin_init.html
-[`Arc<T>`]: https://doc.rust-lang.org/stable/alloc/sync/struct.Arc.html
-[`Box<T>`]: https://doc.rust-lang.org/stable/alloc/boxed/struct.Box.html
[`impl PinInit<Foo>`]: https://docs.rs/pin-init/latest/pin_init/trait.PinInit.html
[`impl PinInit<T, E>`]: https://docs.rs/pin-init/latest/pin_init/trait.PinInit.html
[`impl Init<T, E>`]: https://docs.rs/pin-init/latest/pin_init/trait.Init.html
[Rust-for-Linux]: https://rust-for-linux.com/
<!-- cargo-rdme end -->
+
+<!-- These links are not picked up by cargo-rdme, since they are behind cfgs... -->
+[`Arc<T>`]: https://doc.rust-lang.org/stable/alloc/sync/struct.Arc.html
+[`Box<T>`]: https://doc.rust-lang.org/stable/alloc/boxed/struct.Box.html
diff --git a/rust/pin-init/examples/linked_list.rs b/rust/pin-init/examples/linked_list.rs
index 6d7eb0a0ec0d..0bbc7b8d83a1 100644
--- a/rust/pin-init/examples/linked_list.rs
+++ b/rust/pin-init/examples/linked_list.rs
@@ -2,6 +2,7 @@
#![allow(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(feature = "alloc", feature(allocator_api))]
+#![cfg_attr(not(RUSTC_LINT_REASONS_IS_STABLE), feature(lint_reasons))]
use core::{
cell::Cell,
diff --git a/rust/pin-init/examples/mutex.rs b/rust/pin-init/examples/mutex.rs
index 073bb79341d1..3e3630780c96 100644
--- a/rust/pin-init/examples/mutex.rs
+++ b/rust/pin-init/examples/mutex.rs
@@ -2,6 +2,7 @@
#![allow(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(feature = "alloc", feature(allocator_api))]
+#![cfg_attr(not(RUSTC_LINT_REASONS_IS_STABLE), feature(lint_reasons))]
#![allow(clippy::missing_safety_doc)]
use core::{
diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs
index 5ac22f1880d2..5acc5108b954 100644
--- a/rust/pin-init/examples/pthread_mutex.rs
+++ b/rust/pin-init/examples/pthread_mutex.rs
@@ -3,6 +3,8 @@
// inspired by <https://github.com/nbdd0121/pin-init/blob/trunk/examples/pthread_mutex.rs>
#![allow(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(feature = "alloc", feature(allocator_api))]
+#![cfg_attr(not(RUSTC_LINT_REASONS_IS_STABLE), feature(lint_reasons))]
+
#[cfg(not(windows))]
mod pthread_mtx {
#[cfg(feature = "alloc")]
@@ -40,7 +42,7 @@ mod pthread_mtx {
#[derive(Debug)]
pub enum Error {
- #[expect(dead_code)]
+ #[allow(dead_code)]
IO(std::io::Error),
Alloc,
}
diff --git a/rust/pin-init/examples/static_init.rs b/rust/pin-init/examples/static_init.rs
index 3487d761aa26..48531413ab94 100644
--- a/rust/pin-init/examples/static_init.rs
+++ b/rust/pin-init/examples/static_init.rs
@@ -2,6 +2,7 @@
#![allow(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(feature = "alloc", feature(allocator_api))]
+#![cfg_attr(not(RUSTC_LINT_REASONS_IS_STABLE), feature(lint_reasons))]
use core::{
cell::{Cell, UnsafeCell},
diff --git a/rust/pin-init/internal/src/lib.rs b/rust/pin-init/internal/src/lib.rs
index babe5e878550..297b0129a5bf 100644
--- a/rust/pin-init/internal/src/lib.rs
+++ b/rust/pin-init/internal/src/lib.rs
@@ -22,6 +22,7 @@ use proc_macro::TokenStream;
#[cfg(kernel)]
#[path = "../../../macros/quote.rs"]
#[macro_use]
+#[cfg_attr(not(kernel), rustfmt::skip)]
mod quote;
#[cfg(not(kernel))]
#[macro_use]
@@ -46,3 +47,8 @@ pub fn pinned_drop(args: TokenStream, input: TokenStream) -> TokenStream {
pub fn derive_zeroable(input: TokenStream) -> TokenStream {
zeroable::derive(input.into()).into()
}
+
+#[proc_macro_derive(MaybeZeroable)]
+pub fn maybe_derive_zeroable(input: TokenStream) -> TokenStream {
+ zeroable::maybe_derive(input.into()).into()
+}
diff --git a/rust/pin-init/internal/src/zeroable.rs b/rust/pin-init/internal/src/zeroable.rs
index acc94008c152..e0ed3998445c 100644
--- a/rust/pin-init/internal/src/zeroable.rs
+++ b/rust/pin-init/internal/src/zeroable.rs
@@ -6,7 +6,14 @@ use proc_macro2 as proc_macro;
use crate::helpers::{parse_generics, Generics};
use proc_macro::{TokenStream, TokenTree};
-pub(crate) fn derive(input: TokenStream) -> TokenStream {
+pub(crate) fn parse_zeroable_derive_input(
+ input: TokenStream,
+) -> (
+ Vec<TokenTree>,
+ Vec<TokenTree>,
+ Vec<TokenTree>,
+ Option<TokenTree>,
+) {
let (
Generics {
impl_generics,
@@ -64,6 +71,11 @@ pub(crate) fn derive(input: TokenStream) -> TokenStream {
if in_generic && !inserted {
new_impl_generics.extend(quote! { : ::pin_init::Zeroable });
}
+ (rest, new_impl_generics, ty_generics, last)
+}
+
+pub(crate) fn derive(input: TokenStream) -> TokenStream {
+ let (rest, new_impl_generics, ty_generics, last) = parse_zeroable_derive_input(input);
quote! {
::pin_init::__derive_zeroable!(
parse_input:
@@ -74,3 +86,16 @@ pub(crate) fn derive(input: TokenStream) -> TokenStream {
);
}
}
+
+pub(crate) fn maybe_derive(input: TokenStream) -> TokenStream {
+ let (rest, new_impl_generics, ty_generics, last) = parse_zeroable_derive_input(input);
+ quote! {
+ ::pin_init::__maybe_derive_zeroable!(
+ parse_input:
+ @sig(#(#rest)*),
+ @impl_generics(#(#new_impl_generics)*),
+ @ty_generics(#(#ty_generics)*),
+ @body(#last),
+ );
+ }
+}
diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs
index 0806c689f693..9ab34036e6bc 100644
--- a/rust/pin-init/src/lib.rs
+++ b/rust/pin-init/src/lib.rs
@@ -32,6 +32,12 @@
//! will require the `std` feature, because stable compilers have neither `Box` nor `Arc` in no-std
//! mode.
//!
+//! ## Nightly needed for `unsafe-pinned` feature
+//!
+//! This feature enables the `Wrapper` implementation on the unstable `core::pin::UnsafePinned` type.
+//! This requires the [`unsafe_pinned` unstable feature](https://github.com/rust-lang/rust/issues/125735)
+//! and therefore a nightly compiler. Note that this feature is not enabled by default.
+//!
//! # Overview
//!
//! To initialize a `struct` with an in-place constructor you will need two things:
@@ -241,7 +247,7 @@
//! [`sync`]: https://rust.docs.kernel.org/kernel/sync/index.html
//! [pinning]: https://doc.rust-lang.org/std/pin/index.html
//! [structurally pinned fields]:
-//! https://doc.rust-lang.org/std/pin/index.html#pinning-is-structural-for-field
+//! https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning
//! [stack]: crate::stack_pin_init
#![cfg_attr(
kernel,
@@ -269,6 +275,10 @@
#![forbid(missing_docs, unsafe_op_in_unsafe_fn)]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(feature = "alloc", feature(allocator_api))]
+#![cfg_attr(
+ all(feature = "unsafe-pinned", CONFIG_RUSTC_HAS_UNSAFE_PINNED),
+ feature(unsafe_pinned)
+)]
use core::{
cell::UnsafeCell,
@@ -385,9 +395,10 @@ pub use ::pin_init_internal::pin_data;
/// ```
pub use ::pin_init_internal::pinned_drop;
-/// Derives the [`Zeroable`] trait for the given struct.
+/// Derives the [`Zeroable`] trait for the given `struct` or `union`.
///
-/// This can only be used for structs where every field implements the [`Zeroable`] trait.
+/// This can only be used for `struct`s/`union`s where every field implements the [`Zeroable`]
+/// trait.
///
/// # Examples
///
@@ -396,13 +407,54 @@ pub use ::pin_init_internal::pinned_drop;
///
/// #[derive(Zeroable)]
/// pub struct DriverData {
-/// id: i64,
+/// pub(crate) id: i64,
/// buf_ptr: *mut u8,
/// len: usize,
/// }
/// ```
+///
+/// ```
+/// use pin_init::Zeroable;
+///
+/// #[derive(Zeroable)]
+/// pub union SignCast {
+/// signed: i64,
+/// unsigned: u64,
+/// }
+/// ```
pub use ::pin_init_internal::Zeroable;
+/// Derives the [`Zeroable`] trait for the given `struct` or `union` if all fields implement
+/// [`Zeroable`].
+///
+/// Contrary to the derive macro named [`macro@Zeroable`], this one silently fails when a field
+/// doesn't implement [`Zeroable`].
+///
+/// # Examples
+///
+/// ```
+/// use pin_init::MaybeZeroable;
+///
+/// // implmements `Zeroable`
+/// #[derive(MaybeZeroable)]
+/// pub struct DriverData {
+/// pub(crate) id: i64,
+/// buf_ptr: *mut u8,
+/// len: usize,
+/// }
+///
+/// // does not implmement `Zeroable`
+/// #[derive(MaybeZeroable)]
+/// pub struct DriverData2 {
+/// pub(crate) id: i64,
+/// buf_ptr: *mut u8,
+/// len: usize,
+/// // this field doesn't implement `Zeroable`
+/// other_data: &'static i32,
+/// }
+/// ```
+pub use ::pin_init_internal::MaybeZeroable;
+
/// Initialize and pin a type directly on the stack.
///
/// # Examples
@@ -1216,6 +1268,38 @@ pub const unsafe fn init_from_closure<T: ?Sized, E>(
__internal::InitClosure(f, PhantomData)
}
+/// Changes the to be initialized type.
+///
+/// # Safety
+///
+/// - `*mut U` must be castable to `*mut T` and any value of type `T` written through such a
+/// pointer must result in a valid `U`.
+#[expect(clippy::let_and_return)]
+pub const unsafe fn cast_pin_init<T, U, E>(init: impl PinInit<T, E>) -> impl PinInit<U, E> {
+ // SAFETY: initialization delegated to a valid initializer. Cast is valid by function safety
+ // requirements.
+ let res = unsafe { pin_init_from_closure(|ptr: *mut U| init.__pinned_init(ptr.cast::<T>())) };
+ // FIXME: remove the let statement once the nightly-MSRV allows it (1.78 otherwise encounters a
+ // cycle when computing the type returned by this function)
+ res
+}
+
+/// Changes the to be initialized type.
+///
+/// # Safety
+///
+/// - `*mut U` must be castable to `*mut T` and any value of type `T` written through such a
+/// pointer must result in a valid `U`.
+#[expect(clippy::let_and_return)]
+pub const unsafe fn cast_init<T, U, E>(init: impl Init<T, E>) -> impl Init<U, E> {
+ // SAFETY: initialization delegated to a valid initializer. Cast is valid by function safety
+ // requirements.
+ let res = unsafe { init_from_closure(|ptr: *mut U| init.__init(ptr.cast::<T>())) };
+ // FIXME: remove the let statement once the nightly-MSRV allows it (1.78 otherwise encounters a
+ // cycle when computing the type returned by this function)
+ res
+}
+
/// An initializer that leaves the memory uninitialized.
///
/// The initializer is a no-op. The `slot` memory is not changed.
@@ -1481,3 +1565,55 @@ macro_rules! impl_tuple_zeroable {
}
impl_tuple_zeroable!(A, B, C, D, E, F, G, H, I, J);
+
+/// This trait allows creating an instance of `Self` which contains exactly one
+/// [structurally pinned value](https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning).
+///
+/// This is useful when using wrapper `struct`s like [`UnsafeCell`] or with new-type `struct`s.
+///
+/// # Examples
+///
+/// ```
+/// # use core::cell::UnsafeCell;
+/// # use pin_init::{pin_data, pin_init, Wrapper};
+///
+/// #[pin_data]
+/// struct Foo {}
+///
+/// #[pin_data]
+/// struct Bar {
+/// #[pin]
+/// content: UnsafeCell<Foo>
+/// };
+///
+/// let foo_initializer = pin_init!(Foo{});
+/// let initializer = pin_init!(Bar {
+/// content <- UnsafeCell::pin_init(foo_initializer)
+/// });
+/// ```
+pub trait Wrapper<T> {
+ /// Creates an pin-initializer for a [`Self`] containing `T` from the `value_init` initializer.
+ fn pin_init<E>(value_init: impl PinInit<T, E>) -> impl PinInit<Self, E>;
+}
+
+impl<T> Wrapper<T> for UnsafeCell<T> {
+ fn pin_init<E>(value_init: impl PinInit<T, E>) -> impl PinInit<Self, E> {
+ // SAFETY: `UnsafeCell<T>` has a compatible layout to `T`.
+ unsafe { cast_pin_init(value_init) }
+ }
+}
+
+impl<T> Wrapper<T> for MaybeUninit<T> {
+ fn pin_init<E>(value_init: impl PinInit<T, E>) -> impl PinInit<Self, E> {
+ // SAFETY: `MaybeUninit<T>` has a compatible layout to `T`.
+ unsafe { cast_pin_init(value_init) }
+ }
+}
+
+#[cfg(all(feature = "unsafe-pinned", CONFIG_RUSTC_HAS_UNSAFE_PINNED))]
+impl<T> Wrapper<T> for core::pin::UnsafePinned<T> {
+ fn pin_init<E>(init: impl PinInit<T, E>) -> impl PinInit<Self, E> {
+ // SAFETY: `UnsafePinned<T>` has a compatible layout to `T`.
+ unsafe { cast_pin_init(init) }
+ }
+}
diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs
index 361623324d5c..935d77745d1d 100644
--- a/rust/pin-init/src/macros.rs
+++ b/rust/pin-init/src/macros.rs
@@ -1393,7 +1393,7 @@ macro_rules! __derive_zeroable {
@body({
$(
$(#[$($field_attr:tt)*])*
- $field:ident : $field_ty:ty
+ $field_vis:vis $field:ident : $field_ty:ty
),* $(,)?
}),
) => {
@@ -1412,4 +1412,93 @@ macro_rules! __derive_zeroable {
}
};
};
+ (parse_input:
+ @sig(
+ $(#[$($struct_attr:tt)*])*
+ $vis:vis union $name:ident
+ $(where $($whr:tt)*)?
+ ),
+ @impl_generics($($impl_generics:tt)*),
+ @ty_generics($($ty_generics:tt)*),
+ @body({
+ $(
+ $(#[$($field_attr:tt)*])*
+ $field_vis:vis $field:ident : $field_ty:ty
+ ),* $(,)?
+ }),
+ ) => {
+ // SAFETY: Every field type implements `Zeroable` and padding bytes may be zero.
+ #[automatically_derived]
+ unsafe impl<$($impl_generics)*> $crate::Zeroable for $name<$($ty_generics)*>
+ where
+ $($($whr)*)?
+ {}
+ const _: () = {
+ fn assert_zeroable<T: ?::core::marker::Sized + $crate::Zeroable>() {}
+ fn ensure_zeroable<$($impl_generics)*>()
+ where $($($whr)*)?
+ {
+ $(assert_zeroable::<$field_ty>();)*
+ }
+ };
+ };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __maybe_derive_zeroable {
+ (parse_input:
+ @sig(
+ $(#[$($struct_attr:tt)*])*
+ $vis:vis struct $name:ident
+ $(where $($whr:tt)*)?
+ ),
+ @impl_generics($($impl_generics:tt)*),
+ @ty_generics($($ty_generics:tt)*),
+ @body({
+ $(
+ $(#[$($field_attr:tt)*])*
+ $field_vis:vis $field:ident : $field_ty:ty
+ ),* $(,)?
+ }),
+ ) => {
+ // SAFETY: Every field type implements `Zeroable` and padding bytes may be zero.
+ #[automatically_derived]
+ unsafe impl<$($impl_generics)*> $crate::Zeroable for $name<$($ty_generics)*>
+ where
+ $(
+ // the `for<'__dummy>` HRTB makes this not error without the `trivial_bounds`
+ // feature <https://github.com/rust-lang/rust/issues/48214#issuecomment-2557829956>.
+ $field_ty: for<'__dummy> $crate::Zeroable,
+ )*
+ $($($whr)*)?
+ {}
+ };
+ (parse_input:
+ @sig(
+ $(#[$($struct_attr:tt)*])*
+ $vis:vis union $name:ident
+ $(where $($whr:tt)*)?
+ ),
+ @impl_generics($($impl_generics:tt)*),
+ @ty_generics($($ty_generics:tt)*),
+ @body({
+ $(
+ $(#[$($field_attr:tt)*])*
+ $field_vis:vis $field:ident : $field_ty:ty
+ ),* $(,)?
+ }),
+ ) => {
+ // SAFETY: Every field type implements `Zeroable` and padding bytes may be zero.
+ #[automatically_derived]
+ unsafe impl<$($impl_generics)*> $crate::Zeroable for $name<$($ty_generics)*>
+ where
+ $(
+ // the `for<'__dummy>` HRTB makes this not error without the `trivial_bounds`
+ // feature <https://github.com/rust-lang/rust/issues/48214#issuecomment-2557829956>.
+ $field_ty: for<'__dummy> $crate::Zeroable,
+ )*
+ $($($whr)*)?
+ {}
+ };
}
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 884dc86ce04e..557e725ab932 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -222,6 +222,15 @@ $(obj)/%.lst: $(obj)/%.c FORCE
# Compile Rust sources (.rs)
# ---------------------------------------------------------------------------
+# The features in this list are the ones allowed for non-`rust/` code.
+#
+# - Stable since Rust 1.81.0: `feature(lint_reasons)`.
+# - Stable since Rust 1.82.0: `feature(asm_const)`, `feature(raw_ref_op)`.
+# - Stable since Rust 1.87.0: `feature(asm_goto)`.
+# - Expected to become stable: `feature(arbitrary_self_types)`.
+#
+# Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on
+# the unstable features in use.
rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op
# `--out-dir` is required to avoid temporaries being created by `rustc` in the
diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index fe663dd0c43b..7c3ea2b55041 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -19,7 +19,7 @@ def args_crates_cfgs(cfgs):
return crates_cfgs
-def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
+def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edition):
# Generate the configuration list.
cfg = []
with open(objtree / "include" / "generated" / "rustc_cfg") as fd:
@@ -35,7 +35,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
crates_indexes = {}
crates_cfgs = args_crates_cfgs(cfgs)
- def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False):
+ def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False, edition="2021"):
crate = {
"display_name": display_name,
"root_module": str(root_module),
@@ -43,7 +43,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
"is_proc_macro": is_proc_macro,
"deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps],
"cfg": cfg,
- "edition": "2021",
+ "edition": edition,
"env": {
"RUST_MODFILE": "This is only for rust-analyzer"
}
@@ -61,6 +61,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
display_name,
deps,
cfg=[],
+ edition="2021",
):
append_crate(
display_name,
@@ -68,12 +69,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
deps,
cfg,
is_workspace_member=False,
+ edition=edition,
)
# NB: sysroot crates reexport items from one another so setting up our transitive dependencies
# here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth
# for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`.
- append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", []))
+ append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", []), edition=core_edition)
append_sysroot_crate("alloc", ["core"])
append_sysroot_crate("std", ["alloc", "core"])
append_sysroot_crate("proc_macro", ["core", "std"])
@@ -177,6 +179,7 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument('--verbose', '-v', action='store_true')
parser.add_argument('--cfgs', action='append', default=[])
+ parser.add_argument("core_edition")
parser.add_argument("srctree", type=pathlib.Path)
parser.add_argument("objtree", type=pathlib.Path)
parser.add_argument("sysroot", type=pathlib.Path)
@@ -193,7 +196,7 @@ def main():
assert args.sysroot in args.sysroot_src.parents
rust_project = {
- "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs),
+ "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs, args.core_edition),
"sysroot": str(args.sysroot),
}
diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs
index 8667d0ae3c82..39c82908ff3a 100644
--- a/scripts/generate_rust_target.rs
+++ b/scripts/generate_rust_target.rs
@@ -209,7 +209,7 @@ fn main() {
// target feature of the same name plus the other two target features in
// `clang/lib/Driver/ToolChains/Arch/X86.cpp`. These should be eventually enabled via
// `-Ctarget-feature` when `rustc` starts recognizing them (or via a new dedicated
- // flag); see https://github.com/rust-lang/rust/issues/116852.
+ // flag); see <https://github.com/rust-lang/rust/issues/116852>.
features += ",+retpoline-external-thunk";
features += ",+retpoline-indirect-branches";
features += ",+retpoline-indirect-calls";
@@ -218,7 +218,7 @@ fn main() {
// The kernel uses `-mharden-sls=all`, which Clang maps to both these target features in
// `clang/lib/Driver/ToolChains/Arch/X86.cpp`. These should be eventually enabled via
// `-Ctarget-feature` when `rustc` starts recognizing them (or via a new dedicated
- // flag); see https://github.com/rust-lang/rust/issues/116851.
+ // flag); see <https://github.com/rust-lang/rust/issues/116851>.
features += ",+harden-sls-ijmp";
features += ",+harden-sls-ret";
}
diff --git a/scripts/rustdoc_test_builder.rs b/scripts/rustdoc_test_builder.rs
index e5894652f12c..f7540bcf595a 100644
--- a/scripts/rustdoc_test_builder.rs
+++ b/scripts/rustdoc_test_builder.rs
@@ -28,7 +28,7 @@ fn main() {
//
// ```
// fn main() { #[allow(non_snake_case)] fn _doctest_main_rust_kernel_file_rs_28_0() {
- // fn main() { #[allow(non_snake_case)] fn _doctest_main_rust_kernel_file_rs_37_0() -> Result<(), impl core::fmt::Debug> {
+ // fn main() { #[allow(non_snake_case)] fn _doctest_main_rust_kernel_file_rs_37_0() -> Result<(), impl ::core::fmt::Debug> {
// ```
//
// It should be unlikely that doctest code matches such lines (when code is formatted properly).
@@ -49,8 +49,10 @@ fn main() {
// Qualify `Result` to avoid the collision with our own `Result` coming from the prelude.
let body = body.replace(
- &format!("{rustdoc_function_name}() -> Result<(), impl core::fmt::Debug> {{"),
- &format!("{rustdoc_function_name}() -> core::result::Result<(), impl core::fmt::Debug> {{"),
+ &format!("{rustdoc_function_name}() -> Result<(), impl ::core::fmt::Debug> {{"),
+ &format!(
+ "{rustdoc_function_name}() -> ::core::result::Result<(), impl ::core::fmt::Debug> {{"
+ ),
);
// For tests that get generated with `Result`, like above, `rustdoc` generates an `unwrap()` on
diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs
index ec8d70ac888b..1ca253594d38 100644
--- a/scripts/rustdoc_test_gen.rs
+++ b/scripts/rustdoc_test_gen.rs
@@ -167,12 +167,14 @@ fn main() {
rust_tests,
r#"/// Generated `{name}` KUnit test case from a Rust documentation test.
#[no_mangle]
-pub extern "C" fn {kunit_name}(__kunit_test: *mut kernel::bindings::kunit) {{
+pub extern "C" fn {kunit_name}(__kunit_test: *mut ::kernel::bindings::kunit) {{
/// Overrides the usual [`assert!`] macro with one that calls KUnit instead.
#[allow(unused)]
macro_rules! assert {{
($cond:expr $(,)?) => {{{{
- kernel::kunit_assert!("{kunit_name}", "{real_path}", __DOCTEST_ANCHOR - {line}, $cond);
+ ::kernel::kunit_assert!(
+ "{kunit_name}", "{real_path}", __DOCTEST_ANCHOR - {line}, $cond
+ );
}}}}
}}
@@ -180,13 +182,15 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut kernel::bindings::kunit) {{
#[allow(unused)]
macro_rules! assert_eq {{
($left:expr, $right:expr $(,)?) => {{{{
- kernel::kunit_assert_eq!("{kunit_name}", "{real_path}", __DOCTEST_ANCHOR - {line}, $left, $right);
+ ::kernel::kunit_assert_eq!(
+ "{kunit_name}", "{real_path}", __DOCTEST_ANCHOR - {line}, $left, $right
+ );
}}}}
}}
// Many tests need the prelude, so provide it by default.
#[allow(unused)]
- use kernel::prelude::*;
+ use ::kernel::prelude::*;
// Unconditionally print the location of the original doctest (i.e. rather than the location in
// the generated file) so that developers can easily map the test back to the source code.
@@ -197,11 +201,11 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut kernel::bindings::kunit) {{
// This follows the syntax for declaring test metadata in the proposed KTAP v2 spec, which may
// be used for the proposed KUnit test attributes API. Thus hopefully this will make migration
// easier later on.
- kernel::kunit::info(format_args!(" # {kunit_name}.location: {real_path}:{line}\n"));
+ ::kernel::kunit::info(format_args!(" # {kunit_name}.location: {real_path}:{line}\n"));
/// The anchor where the test code body starts.
#[allow(unused)]
- static __DOCTEST_ANCHOR: i32 = core::line!() as i32 + {body_offset} + 1;
+ static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1;
{{
{body}
main();
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 6a3932d90b43..bdfe388da198 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -192,6 +192,9 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev)
res.ext = true;
res.ops = &sdw_ace2x_callback;
+ /* ACE3+ supports microphone privacy */
+ if (chip->hw_ip_version >= SOF_INTEL_ACE_3_0)
+ res.mic_privacy = true;
}
res.irq = sdev->ipc_irq;
res.handle = hdev->info.handle;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 85180e4aaa5a..0b4a2f124d11 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2056,6 +2056,7 @@ union bpf_attr {
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
* that the modified header field is part of the pseudo-header.
+ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6072,6 +6073,7 @@ enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
+ BPF_F_IPV6 = (1ULL << 7),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b21b12ec88d9..f23bdda737aa 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -230,7 +230,8 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_7___rustc17rust_begin_unwind") ||
strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
- (strstr(func->name, "_4core5slice5index24slice_") &&
+ (strstr(func->name, "_4core5slice5index") &&
+ strstr(func->name, "slice_") &&
str_ends_with(func->name, "_fail"));
}
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index e70c502a16df..422e186cf3cf 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -10,7 +10,6 @@ CONFIG_KUNIT_EXAMPLE_TEST=y
CONFIG_KUNIT_ALL_TESTS=y
CONFIG_FORTIFY_SOURCE=y
-CONFIG_INIT_STACK_ALL_PATTERN=y
CONFIG_IIO=y
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6aa11cd3db42..339b31e6a6b5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -205,7 +205,7 @@ export KHDR_INCLUDES
all:
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 69f81cb555ca..d93f68024cc6 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -57,15 +57,15 @@ int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
{
- struct cgroup_rstat_cpu *rstat;
+ struct css_rstat_cpu *rstat;
__u32 cpu;
cpu = bpf_get_smp_processor_id();
- rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
+ rstat = (struct css_rstat_cpu *)bpf_per_cpu_ptr(
cgrp->self.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
- *(volatile int *)rstat;
+ *(volatile long *)rstat;
}
return 0;
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index e6c248e3ae54..e9e918cdf31f 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -385,7 +385,7 @@ int bpf_testmod_fentry_ok;
noinline ssize_t
bpf_testmod_test_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
struct bpf_testmod_test_read_ctx ctx = {
@@ -465,7 +465,7 @@ ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
noinline ssize_t
bpf_testmod_test_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
struct bpf_testmod_test_write_ctx ctx = {
@@ -567,7 +567,7 @@ static void testmod_unregister_uprobe(void)
static ssize_t
bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
unsigned long offset = 0;
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
new file mode 100644
index 000000000000..88ae719e6f8f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -0,0 +1,5 @@
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index e1ecb92f79d9..3370827409aa 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -39,7 +39,7 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
port = rand_port()
listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
- with bkg(listen_cmd, host=cfg.remote) as nc:
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
wait_port_listen(port, host=cfg.remote)
if ipver == "4":
@@ -216,7 +216,7 @@ def main() -> None:
("", "6", "tx-tcp6-segmentation", None),
("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
- ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")),
+ ("gre", "4", "tx-gre-segmentation", ("gre", False, "")),
("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
)
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index efea93cf23d4..cd12b8b5ac0e 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -378,7 +378,7 @@ display net,port,proto
type_spec ipv4_addr . inet_service . inet_proto
chain_spec ip daddr . udp dport . meta l4proto
dst addr4 port proto
-src
+src
start 1
count 9
src_delta 9
@@ -419,6 +419,7 @@ table inet filter {
set test {
type ${type_spec}
+ counter
flags interval,timeout
}
@@ -1158,9 +1159,18 @@ del() {
fi
}
-# Return packet count from 'test' counter in 'inet filter' table
+# Return packet count for elem $1 from 'test' counter in 'inet filter' table
count_packets() {
found=0
+ for token in $(nft reset element inet filter test "${1}" ); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets_nomatch() {
+ found=0
for token in $(nft list counter inet filter test); do
[ ${found} -eq 1 ] && echo "${token}" && return
[ "${token}" = "packets" ] && found=1
@@ -1206,6 +1216,10 @@ perf() {
# Set MAC addresses, send single packet, check that it matches, reset counter
send_match() {
+ local elem="$1"
+
+ shift
+
ip link set veth_a address "$(format_mac "${1}")"
ip -n B link set veth_b address "$(format_mac "${2}")"
@@ -1216,7 +1230,7 @@ send_match() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "1" ]; then
+ if [ "$(count_packets "$elem")" != "1" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1242,7 +1256,7 @@ send_nomatch() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "0" ]; then
+ if [ "$(count_packets_nomatch)" != "0" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1255,6 +1269,42 @@ send_nomatch() {
fi
}
+maybe_send_nomatch() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets_nomatch)" != "0" ]; then
+ err "Packet to $dst_addr4:$dst_port did match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+maybe_send_match() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets "{ $elem }")" != "1" ]; then
+ err "Packet to $dst_addr4:$dst_port did not match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+ nft reset element inet filter test "{ $elem }" >/dev/null
+}
+
# Correctness test template:
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
@@ -1262,6 +1312,8 @@ send_nomatch() {
test_correctness_main() {
range_size=1
for i in $(seq "${start}" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
@@ -1272,15 +1324,16 @@ test_correctness_main() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
- del "$(format)" || return 1
+ del "$elem" || return 1
for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
@@ -1572,14 +1625,17 @@ test_timeout() {
range_size=1
for i in $(seq "$start" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1737,7 +1793,7 @@ test_bug_reload() {
srcend=$((end + src_delta))
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$(format)" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1756,22 +1812,34 @@ test_bug_net_port_proto_match() {
range_size=1
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+ # too slow, do not test all addresses
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1
nft "add element inet filter test { $elem }" || return 1
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1
+
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
local got=$(nft "get element inet filter test { $elem }")
err "post-add: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
done
done
# recheck after set was filled
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
@@ -1779,6 +1847,9 @@ test_bug_net_port_proto_match() {
err "post-fill: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
done
done
@@ -1786,9 +1857,10 @@ test_bug_net_port_proto_match() {
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
local rnd=$((RANDOM%10))
+ local dport=$j
local got=""
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
if [ $rnd -gt 0 ];then
continue
fi
@@ -1799,6 +1871,8 @@ test_bug_net_port_proto_match() {
err "post-delete: query for $elem returned $got instead of error."
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
done
done
@@ -1817,7 +1891,7 @@ test_bug_avx2_mismatch()
dst_addr6="$a2"
send_icmp6
- if [ "$(count_packets)" -gt "0" ]; then
+ if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
err "False match for $a2"
return 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index 9e39de26455f..a954754b99b3 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -866,6 +866,24 @@ EOF
ip netns exec "$ns0" nft delete table $family nat
}
+file_cmp()
+{
+ local infile="$1"
+ local outfile="$2"
+
+ if ! cmp "$infile" "$outfile";then
+ echo -n "Infile "
+ ls -l "$infile"
+ echo -n "Outfile "
+ ls -l "$outfile"
+ echo "ERROR: in and output file mismatch when checking $msg" 1>&1
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
test_stateless_nat_ip()
{
local lret=0
@@ -966,11 +984,7 @@ EOF
wait
- if ! cmp "$INFILE" "$OUTFILE";then
- ls -l "$INFILE" "$OUTFILE"
- echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
- lret=1
- fi
+ file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1
:> "$OUTFILE"
@@ -991,6 +1005,62 @@ EOF
return $lret
}
+test_dnat_clash()
+{
+ local lret=0
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run dnat clash test without socat tool"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+flush ruleset
+table ip dnat-test {
+ chain prerouting {
+ type nat hook prerouting priority dstnat; policy accept;
+ ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add dnat rules"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ local udpdaddr="10.0.2.1"
+ for i in 1 2;do
+ echo "PING $udpdaddr" > "$INFILE"
+ echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
+ local lpid=$!
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"
+
+ result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
+ udpdaddr="10.0.1.1"
+
+ if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
+ lret=1
+ ret=1
+ fi
+
+ wait
+
+ file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1
+
+ :> "$OUTFILE"
+ done
+
+ test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"
+
+ ip netns exec "$ns0" nft flush ruleset
+
+ return $lret
+}
+
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in "$ns0" "$ns1" "$ns2" ;do
ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet
test_port_shadowing
test_stateless_nat_ip
+test_dnat_clash
if [ $ret -ne 0 ];then
echo -n "FAIL: "
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
index de9c26f98b2e..9201f2905f2c 100644
--- a/tools/testing/selftests/net/ovpn/ovpn-cli.c
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -2166,6 +2166,7 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
ovpn->peers_file = argv[4];
+ ovpn->sa_family = AF_INET;
if (argc > 5 && !strcmp(argv[5], "ipv6"))
ovpn->sa_family = AF_INET6;
break;
diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
new file mode 100755
index 000000000000..ce2a2cb64f72
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+MTU="1500"
+
+source test.sh